diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..c387ff5b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +fail_fast: false +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-yaml + exclude: "docs" + - id: end-of-file-fixer + exclude: "docs" + - id: trailing-whitespace + exclude: "docs" + +- repo: https://github.com/psf/black + rev: 23.10.0 + hooks: + - id: black + args: [ '--config', 'pyproject.toml' ] + verbose: true diff --git a/Dockerfile b/Dockerfile index 5ab8844e..e5e38efd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,4 +17,4 @@ RUN cd pomdp-py/ && pip install -e. WORKDIR /app/pomdp-py # activate 'pomdp' environment by default -RUN echo "conda activate pomdp" >> ~/.bashrc \ No newline at end of file +RUN echo "conda activate pomdp" >> ~/.bashrc diff --git a/LICENSE b/LICENSE index 48e75b68..a76ea6b0 100644 --- a/LICENSE +++ b/LICENSE @@ -6,4 +6,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile index a263dfd4..22339d43 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,3 @@ clean: .PHONY: build build: python setup.py build_ext --inplace - - - diff --git a/pomdp_py/__init__.py b/pomdp_py/__init__.py index b2ce26e2..057c552e 100644 --- a/pomdp_py/__init__.py +++ b/pomdp_py/__init__.py @@ -11,7 +11,12 @@ from pomdp_py.representations.distribution.gaussian import Gaussian from pomdp_py.representations.belief.histogram import update_histogram_belief from pomdp_py.representations.belief.particles import update_particles_belief -from pomdp_py.utils.interfaces.conversion import to_pomdp_file, to_pomdpx_file, AlphaVectorPolicy, PolicyGraph +from pomdp_py.utils.interfaces.conversion import ( + to_pomdp_file, + to_pomdpx_file, + AlphaVectorPolicy, + PolicyGraph, +) from pomdp_py.utils.interfaces.solvers import vi_pruning, sarsop # Algorithms @@ -19,8 +24,15 @@ from pomdp_py.algorithms.value_function import value, qvalue, belief_update from pomdp_py.algorithms.pomcp import POMCP from pomdp_py.algorithms.po_rollout import PORollout -from pomdp_py.algorithms.po_uct import POUCT, QNode, VNode, RootVNode,\ - RolloutPolicy, RandomRollout, ActionPrior +from pomdp_py.algorithms.po_uct import ( + POUCT, + QNode, + VNode, + RootVNode, + RolloutPolicy, + RandomRollout, + ActionPrior, +) from pomdp_py.algorithms.bsp.blqr import BLQR # Templates & Utilities diff --git a/pomdp_py/__main__.py b/pomdp_py/__main__.py index 832dd93d..7ca8e469 100644 --- a/pomdp_py/__main__.py +++ b/pomdp_py/__main__.py @@ -1,42 +1,47 @@ import argparse -available_problems = [ - 'tiger', - 'rocksample', - 'mos', - 'tag', - 'load_unload' -] +available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload"] + def parse_args(): parser = argparse.ArgumentParser(description="pomdp_py CLI") - parser.add_argument("-r", "--run", type=str, - help="run a pomdp under pomdp_py.problems." - "Available options: {}".format(available_problems)) + parser.add_argument( + "-r", + "--run", + type=str, + help="run a pomdp under pomdp_py.problems." + "Available options: {}".format(available_problems), + ) args = parser.parse_args() return parser, args + if __name__ == "__main__": parser, args = parse_args() if args.run: if args.run.lower() == "tiger": from pomdp_py.problems.tiger.tiger_problem import main + main() elif args.run.lower() == "rocksample": from pomdp_py.problems.rocksample.rocksample_problem import main + main() elif args.run.lower() == "mos": from pomdp_py.problems.multi_object_search.problem import unittest + unittest() elif args.run.lower() == "tag": from pomdp_py.problems.tag.problem import main + main() elif args.run.lower() == "load_unload": from pomdp_py.problems.load_unload.load_unload import main + main() else: diff --git a/pomdp_py/algorithms/bsp/blqr.py b/pomdp_py/algorithms/bsp/blqr.py index c39b6cc7..6059c959 100644 --- a/pomdp_py/algorithms/bsp/blqr.py +++ b/pomdp_py/algorithms/bsp/blqr.py @@ -6,21 +6,31 @@ import numpy as np from scipy import optimize -class BLQR(pomdp_py.Planner): - def __init__(self, - func_sysd, func_obs, jac_sysd, jac_obs, jac_sysd_u, - noise_obs, noise_sysd, - Qlarge, L, Q, R, - planning_horizon=15): +class BLQR(pomdp_py.Planner): + def __init__( + self, + func_sysd, + func_obs, + jac_sysd, + jac_obs, + jac_sysd_u, + noise_obs, + noise_sysd, + Qlarge, + L, + Q, + R, + planning_horizon=15, + ): """To initialize the planenr of B-LQR, one needs to supply parameters that describe the underlying linear gaussian system, and also matrices that describe the cost function. Note that math symbols - (e.g. xt, ut) are vectors or matrices represented as np.arrays. + (e.g. xt, ut) are vectors or matrices represented as np.arrays. The B-LQR cost function (Eq.14 in the paper): :math:`J(b_{t:T},u_{t:T}) = \bar{m}_T^TQ_{large}\bar{m}_T+\bar{s}_T^T\Lambda\bar{s}_T + \sum_{k=t}^{T-1}\bar{m}_k^TQ\bar{m}_k+\bar{u}_t^TR\bar{u}_t` - + Args: func_sysd (function): f: (xt, ut) -> xt+1 func_obs (function): g: (xt) -> zt @@ -49,7 +59,7 @@ def __init__(self, self._jac_obs = jac_obs self._jac_sysd = jac_sysd_u self._noise_obs = noise_obs - self._noise_sysd = noise_sysd + self._noise_sysd = noise_sysd self._Qlarge = Qlarge self._L = L # Lambda self._Q = Q @@ -58,7 +68,6 @@ def __init__(self, self._dim_state = self._Q.shape[0] self._dim_control = self._R.shape[0] - def ekf_update_mlo(self, bt, ut): """ Performs the ekf belief update assuming maximum likelihood observation. @@ -79,13 +88,15 @@ def ekf_update_mlo(self, bt, ut): observation function. This corresponds to Wt in equation 13. """ # TODO: FIX - mt, Cov_t = bt + mt, Cov_t = bt At = self._jac_sysd(mt, ut) Ct = self._jac_obs(mt) # based on maximum likelihood observation Wt = self._noise_obs(mt).cov Gat = np.dot(np.dot(At, Cov_t), At.transpose()) # Gamma_t = At*Cov_t*At^T CGC_W_inv = np.linalg.inv(np.dot(np.dot(Ct, Gat), Ct.transpose()) + Wt) - Cov_next = Gat - np.dot(np.dot(np.dot(np.dot(Gat, Ct.transpose()), CGC_W_inv), Ct), Gat) + Cov_next = Gat - np.dot( + np.dot(np.dot(np.dot(Gat, Ct.transpose()), CGC_W_inv), Ct), Gat + ) m_next = self._func_sysd(mt, ut) + self._noise_sysd(mt).random() return (m_next, Cov_next) @@ -94,13 +105,13 @@ def _b_u_seg(self, x, i): as a tuple (m_i, Covi, u_i)""" d = self._dim_state c = self._dim_control - start = i*(d + (d*d) + c) - end = (i+1)*(d + (d*d) + c) - m_i_end = i*(d + (d*d) + c) + d - Covi_end = i*(d + (d*d) + c) + d + d*d - u_end = i*(d + (d*d) + c) + d + d*d + c + start = i * (d + (d * d) + c) + end = (i + 1) * (d + (d * d) + c) + m_i_end = i * (d + (d * d) + c) + d + Covi_end = i * (d + (d * d) + c) + d + d * d + u_end = i * (d + (d * d) + c) + d + d * d + c return x[start:m_i_end], x[m_i_end:Covi_end], x[Covi_end:u_end] - + def _control_max_constraint(self, x, i, max_val): _, _, u_i = self._b_u_seg(x, i) return max_val - u_i @@ -110,9 +121,9 @@ def _control_min_constraint(self, x, i, min_val): return u_i - min_val def _mean_final_constraint(self, x, m_des, num_segments): - m_k, _, _ = self._b_u_seg(x, num_segments-1) + m_k, _, _ = self._b_u_seg(x, num_segments - 1) return m_k - m_des - + def _mean_start_constraint(self, x, m_0): m_k, _, _ = self._b_u_seg(x, 0) return m_k - m_0 @@ -121,14 +132,16 @@ def _belief_constraint(self, x, i, num_segments): m_i, s_i, u_i = self._b_u_seg(x, i) Cov_i = s_i.reshape(self._dim_state, self._dim_state).transpose() m_ip1, Cov_ip1 = self.integrate_belief_segment((m_i, Cov_i), u_i, num_segments) - s_ip1 = Cov_ip1.transpose().reshape(-1,) + s_ip1 = Cov_ip1.transpose().reshape( + -1, + ) b_i_vec = np.append(m_i, s_i) b_ip1_vec = np.append(m_ip1, s_ip1) return b_i_vec - b_ip1_vec def integrate_belief_segment(self, b_i, u_i, num_segments): """This is to represent equation 18. - + phi(b_i', u_i') = F(b_i', u_i') + sum F(b_{t+1}, u_i) - F(b_t, u_i) t in seg @@ -137,16 +150,15 @@ def integrate_belief_segment(self, b_i, u_i, num_segments): m_i, Cov_i = b_i b_seg = [self.ekf_update_mlo((m_i, Cov_i), u_i)] for t in range(1, self._planning_horizon // num_segments): - b_seg.append(self.ekf_update_mlo(b_seg[t-1], u_i)) + b_seg.append(self.ekf_update_mlo(b_seg[t - 1], u_i)) sum_mean = b_seg[0][0] sum_Cov = b_seg[0][1] for t in range(len(b_seg)): if t + 1 < len(b_seg) - 1: - sum_mean += (b_seg[t+1][0] - b_seg[t][0]) - sum_Cov += (b_seg[t+1][1] - b_seg[t][1]) + sum_mean += b_seg[t + 1][0] - b_seg[t][0] + sum_Cov += b_seg[t + 1][1] - b_seg[t][1] return sum_mean, sum_Cov - def _opt_cost_func_seg(self, x, b_des, u_des, num_segments): """This will be used as part of the objective function for scipy optimizer. Therefore we only take in one input vector, x. We require @@ -164,7 +176,7 @@ def _opt_cost_func_seg(self, x, b_des, u_des, num_segments): return self.segmented_cost_function(bu_traj, b_des, u_des, num_segments) def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments): - """The cost function in eq 17. + """The cost function in eq 17. Args: b_des (tuple): The desired belief (mT, CovT). This is needed to compute the cost function. @@ -172,13 +184,17 @@ def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments): If this information is not available, pass in an empty list. """ if len(u_des) > 0 and len(u_des) != num_segments: - raise ValueError("The list of desired controls, if provided, must have one control"\ - "per segment") + raise ValueError( + "The list of desired controls, if provided, must have one control" + "per segment" + ) b_T, u_T = bu_traj[-1] m_T, Cov_T = b_T - s_T = Cov_T.transpose().reshape(-1,) - + s_T = Cov_T.transpose().reshape( + -1, + ) + sLs = np.dot(np.dot(s_T.transpose(), self._L), s_T) Summation = 0 for i in range(num_segments): @@ -190,12 +206,21 @@ def segmented_cost_function(self, bu_traj, b_des, u_des, num_segments): else: u_i_des = np.zeros(self._dim_control) u_i_diff = u_i - u_i_des - Summation += np.dot(np.dot(m_i_diff.transpose(), self._Q), m_i_diff)\ - + np.dot(np.dot(u_i_diff.transpose(), self._R), u_i_diff) + Summation += np.dot( + np.dot(m_i_diff.transpose(), self._Q), m_i_diff + ) + np.dot(np.dot(u_i_diff.transpose(), self._R), u_i_diff) return sLs + Summation - - def create_plan(self, b_0, b_des, u_init, num_segments=5, control_bounds=None, - opt_options={}, opt_callback=None): + + def create_plan( + self, + b_0, + b_des, + u_init, + num_segments=5, + control_bounds=None, + opt_options={}, + opt_callback=None, + ): """Solves the SQP problem using direct transcription, and produce belief points and controls at segments. Reference: https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html""" @@ -204,51 +229,66 @@ def create_plan(self, b_0, b_des, u_init, num_segments=5, control_bounds=None, b_i = b_0 for i in range(num_segments): m_i, Cov_i = b_i - s_i = Cov_i.transpose().reshape(-1,) + s_i = Cov_i.transpose().reshape( + -1, + ) u_i = u_init[i] x_0.extend(m_i) x_0.extend(s_i) x_0.extend(u_i) b_i = self.integrate_belief_segment(b_i, u_i, num_segments) - # constraints + # constraints constraints = [] ## initial belief constraint - cons_start = {'type': 'eq', - 'fun': self._mean_start_constraint, - 'args': [b_0[0]]} + cons_start = { + "type": "eq", + "fun": self._mean_start_constraint, + "args": [b_0[0]], + } constraints.append(cons_start) ## belief dynamics constraints and control bounds for i in range(num_segments): - if i + 1 < num_segments-1: - cons_belief = {'type': 'eq', - 'fun': self._belief_constraint, - 'args': [i, num_segments]} + if i + 1 < num_segments - 1: + cons_belief = { + "type": "eq", + "fun": self._belief_constraint, + "args": [i, num_segments], + } constraints.append(cons_belief) # control bounds if control_bounds is not None: - cons_control_min = {'type': 'ineq', - 'fun': self._control_min_constraint, - 'args': [i, control_bounds[0]]} - cons_control_max = {'type': 'ineq', - 'fun': self._control_max_constraint, - 'args': [i, control_bounds[1]]} + cons_control_min = { + "type": "ineq", + "fun": self._control_min_constraint, + "args": [i, control_bounds[0]], + } + cons_control_max = { + "type": "ineq", + "fun": self._control_max_constraint, + "args": [i, control_bounds[1]], + } constraints.append(cons_control_min) constraints.append(cons_control_max) # final belief constraint - cons_final = {'type': 'eq', - 'fun': self._mean_final_constraint, - 'args': [b_des[0], num_segments]} + cons_final = { + "type": "eq", + "fun": self._mean_final_constraint, + "args": [b_des[0], num_segments], + } constraints.append(cons_final) - opt_res = optimize.minimize(self._opt_cost_func_seg, x_0, - method="SLSQP", - args=(b_des, u_init, num_segments), - constraints=constraints, - options=opt_options, - callback=opt_callback) + opt_res = optimize.minimize( + self._opt_cost_func_seg, + x_0, + method="SLSQP", + args=(b_des, u_init, num_segments), + constraints=constraints, + options=opt_options, + callback=opt_callback, + ) return opt_res def interpret_sqp_plan(self, opt_res, num_segments): diff --git a/pomdp_py/algorithms/pomcp.pxd b/pomdp_py/algorithms/pomcp.pxd index 74212a9b..b3d48fe9 100644 --- a/pomdp_py/algorithms/pomcp.pxd +++ b/pomdp_py/algorithms/pomcp.pxd @@ -5,4 +5,3 @@ cdef class VNodeParticles(VNode): cdef public Particles belief cdef class RootVNodeParticles(RootVNode): cdef public Particles belief - diff --git a/pomdp_py/algorithms/value_function.py b/pomdp_py/algorithms/value_function.py index f300b805..7b96c88e 100644 --- a/pomdp_py/algorithms/value_function.py +++ b/pomdp_py/algorithms/value_function.py @@ -2,8 +2,10 @@ for small POMDPs with enumerable S, A, O spaces. This can also be used as an exact value iteration algorithm. """ + import pomdp_py + def value(b, *args, horizon=1): """ Computes the value of a POMDP at belief state b @@ -18,9 +20,18 @@ def value(b, *args, horizon=1): return _value(b, *args, horizon=horizon) else: pomdp = args[0] - return _value(b, pomdp["S"], pomdp["A"], pomdp["Z"], - pomdp["T"], pomdp["O"], pomdp["R"], pomdp["gamma"], - horizon=horizon) + return _value( + b, + pomdp["S"], + pomdp["A"], + pomdp["Z"], + pomdp["T"], + pomdp["O"], + pomdp["R"], + pomdp["gamma"], + horizon=horizon, + ) + def _value(b, S, A, Z, T, O, R, gamma, horizon=1): """ @@ -44,7 +55,7 @@ def _value(b, S, A, Z, T, O, R, gamma, horizon=1): assert type(S) == list, "S must be a list" assert type(A) == list, "A must be a list" assert type(Z) == list, "Z must be a list" - max_qvalue = float('-inf') + max_qvalue = float("-inf") for a in A: # Compute Q(b,a) qv = qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=horizon) @@ -52,6 +63,7 @@ def _value(b, S, A, Z, T, O, R, gamma, horizon=1): max_qvalue = qv return max_qvalue + def qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=1): """Compute Q(v,a)""" r = expected_reward(b, R, a, T) @@ -64,13 +76,13 @@ def qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=1): # If o has non-zero probability if prob_o > 0: next_b = belief_update(b, a, o, T, O) - next_value = value(next_b, S, A, Z, T, O, R, gamma, - horizon=horizon-1) + next_value = value(next_b, S, A, Z, T, O, R, gamma, horizon=horizon - 1) value_o = prob_o * next_value expected_future_value += value_o return r + gamma * expected_future_value + def expected_reward(b, R, a, T=None): """Returns the expected reward at a given belief""" r = 0.0 @@ -82,6 +94,7 @@ def expected_reward(b, R, a, T=None): r += b[s] * R.sample(s, a, None) return r + def belief_observation_model(o, b, a, T, O): """Returns the probability of Pr(o|b,a)""" prob = 0.0 @@ -92,6 +105,7 @@ def belief_observation_model(o, b, a, T, O): prob += obsrv_prob * trans_prob * b[s] return prob + def belief_update(b, a, o, T, O): """Returns the updated belief of `b` given action `a` and observation `o`.""" @@ -116,9 +130,14 @@ def belief_update(b, a, o, T, O): def _create_case(noise=0.15, init_state="tiger-left"): """Create a tiger problem instance with uniform belief""" from pomdp_problems.tiger.tiger_problem import TigerProblem, TigerState - tiger = TigerProblem(noise, TigerState(init_state), - pomdp_py.Histogram({TigerState("tiger-left"): 0.5, - TigerState("tiger-right"): 0.5})) + + tiger = TigerProblem( + noise, + TigerState(init_state), + pomdp_py.Histogram( + {TigerState("tiger-left"): 0.5, TigerState("tiger-right"): 0.5} + ), + ) T = tiger.agent.transition_model O = tiger.agent.observation_model S = list(T.get_all_states()) @@ -131,9 +150,9 @@ def _create_case(noise=0.15, init_state="tiger-left"): s0 = tiger.env.state return b0, s0, S, A, Z, T, O, R, gamma + def _test_basic(): - b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15, - init_state="tiger-left") + b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15, init_state="tiger-left") horizon = 3 qvs = {} for a in A: @@ -145,8 +164,7 @@ def _test_basic(): def _test_planning(): - b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15, - init_state="tiger-left") + b0, s0, S, A, Z, T, O, R, gamma = _create_case(noise=0.15, init_state="tiger-left") horizon = 3 # Do planning with qvalue @@ -156,7 +174,7 @@ def _test_planning(): _actions = [] for step in range(10): a_best = None - maxq = float('-inf') + maxq = float("-inf") for a in A: q = qvalue(b, a, S, A, Z, T, O, R, gamma, horizon=horizon) if maxq < q: @@ -165,8 +183,11 @@ def _test_planning(): sp = T.sample(s, a_best) o = O.sample(sp, a_best) b = belief_update(b, a_best, o, T, O) - print("[Step {}] action={}, value={}, observation={}, belief={}"\ - .format(step, a_best, maxq, o, b)) + print( + "[Step {}] action={}, value={}, observation={}, belief={}".format( + step, a_best, maxq, o, b + ) + ) _actions.append(a_best.name) assert "open-right" in _actions print("Pass.") diff --git a/pomdp_py/framework/__init__.py b/pomdp_py/framework/__init__.py index 8b137891..e69de29b 100644 --- a/pomdp_py/framework/__init__.py +++ b/pomdp_py/framework/__init__.py @@ -1 +0,0 @@ - diff --git a/pomdp_py/framework/planner.pxd b/pomdp_py/framework/planner.pxd index f46a296f..86e56397 100644 --- a/pomdp_py/framework/planner.pxd +++ b/pomdp_py/framework/planner.pxd @@ -4,4 +4,3 @@ cdef class Planner: cpdef public plan(self, Agent agent) cpdef public update(self, Agent agent, Action real_action, Observation real_observation) - diff --git a/pomdp_py/problems/light_dark/agent/belief.py b/pomdp_py/problems/light_dark/agent/belief.py index 689de8a9..409dacec 100644 --- a/pomdp_py/problems/light_dark/agent/belief.py +++ b/pomdp_py/problems/light_dark/agent/belief.py @@ -30,7 +30,7 @@ # def __init__(self, mean, sigma): # """initialize isotropic Gaussian distribution. - + # Args: # mean (list): 2d vector of robot position # sigma (float): sigma defining the standard deviation of the Gaussian. @@ -41,12 +41,12 @@ # super().__init__(mean, # [[sigma**2, 0, # 0, sigma**2]]) - + # @property # def sigma(self): # return self._sigma - + # class BeliefState(pomdp_py.State): # """Belief state; @@ -68,19 +68,18 @@ # @property # def belief(self): # return self._belief - + # def __hash__(self): # return self._hashcode - + # def __eq__(self, other): # if isinstance(other, BeliefState): # return self._belief == other.belief # else: # return False - + # def __str__(self): # return self.__repr__() - + # def __repr__(self): # return "BeliefState(%s)" % (str(self._belief.mean, self._belief.sigma)) - diff --git a/pomdp_py/problems/light_dark/domain/action.py b/pomdp_py/problems/light_dark/domain/action.py index 575e478e..7c511161 100644 --- a/pomdp_py/problems/light_dark/domain/action.py +++ b/pomdp_py/problems/light_dark/domain/action.py @@ -2,16 +2,19 @@ Origin: Belief space planning assuming maximum likelihood observations -Action space: +Action space: :math:`U\subseteq\mathbb{R}^2`. Quote from the paper: "The robot is modeled as a first-order system such that the robot velocity is determined by the control actions, :math:`u\in\mathbb{R}^2`. """ + import pomdp_py + class Action(pomdp_py.Action): """The action is a vector of velocities""" + def __init__(self, control): """ Initializes a state in light dark domain. @@ -20,21 +23,20 @@ def __init__(self, control): control (tuple): velocity """ if len(control) != 2: - raise ValueError("Action control must be a vector of length 2") + raise ValueError("Action control must be a vector of length 2") self.control = control def __hash__(self): return hash(self.control) - + def __eq__(self, other): if isinstance(other, Action): return self.control == other.control else: return False - + def __str__(self): return self.__repr__() - + def __repr__(self): return "Action(%s)" % (str(self.control)) - diff --git a/pomdp_py/problems/light_dark/domain/observation.py b/pomdp_py/problems/light_dark/domain/observation.py index 79157252..5ebfd469 100644 --- a/pomdp_py/problems/light_dark/domain/observation.py +++ b/pomdp_py/problems/light_dark/domain/observation.py @@ -1,17 +1,19 @@ import pomdp_py + class Observation(pomdp_py.Observation): """Defines the Observation for the continuous light-dark domain; - Observation space: + Observation space: :math:`\Omega\subseteq\mathbb{R}^2` the observation of the robot is an estimate of the robot position :math:`g(x_t)\in\Omega`. """ + # the number of decimals to round up an observation when it is discrete. - PRECISION=2 - + PRECISION = 2 + def __init__(self, position, discrete=False): """ Initializes a observation in light dark domain. @@ -25,23 +27,25 @@ def __init__(self, position, discrete=False): if self._discrete: self.position = position else: - self.position = (round(position[0], Observation.PRECISION), - round(position[1], Observation.PRECISION)) + self.position = ( + round(position[0], Observation.PRECISION), + round(position[1], Observation.PRECISION), + ) def discretize(self): return Observation(self.position, discrete=True) def __hash__(self): return hash(self.position) - + def __eq__(self, other): if isinstance(other, Observation): return self.position == other.position else: return False - + def __str__(self): return self.__repr__() - + def __repr__(self): return "Observation(%s)" % (str(self.position)) diff --git a/pomdp_py/problems/light_dark/domain/state.py b/pomdp_py/problems/light_dark/domain/state.py index 07e29e4b..a73d3cb3 100644 --- a/pomdp_py/problems/light_dark/domain/state.py +++ b/pomdp_py/problems/light_dark/domain/state.py @@ -2,15 +2,18 @@ Origin: Belief space planning assuming maximum likelihood observations -State space: +State space: :math:`X\subseteq\mathbb{R}^2` the state of the robot """ + import pomdp_py import numpy as np + class State(pomdp_py.State): """The state of the problem is just the robot position""" + def __init__(self, position): """ Initializes a state in light dark domain. @@ -24,15 +27,15 @@ def __init__(self, position): def __hash__(self): return hash(self.position) - + def __eq__(self, other): if isinstance(other, State): return self.position == other.position else: return False - + def __str__(self): return self.__repr__() - + def __repr__(self): return "State(%s)" % (str(self.position)) diff --git a/pomdp_py/problems/light_dark/env/env.py b/pomdp_py/problems/light_dark/env/env.py index da4930dd..d88942bc 100644 --- a/pomdp_py/problems/light_dark/env/env.py +++ b/pomdp_py/problems/light_dark/env/env.py @@ -7,13 +7,9 @@ import pomdp_py.problems.light_dark as ld import numpy as np -class LightDarkEnvironment(pomdp_py.Environment): - def __init__(self, - init_state, - light, - const, - reward_model=None): +class LightDarkEnvironment(pomdp_py.Environment): + def __init__(self, init_state, light, const, reward_model=None): """ Args: init_state (light_dark.domain.State or np.ndarray): @@ -33,9 +29,7 @@ def __init__(self, transition_model = ld.TransitionModel() if type(init_state) == np.ndarray: init_state = ld.State(init_state) - super().__init__(init_state, - transition_model, - reward_model) + super().__init__(init_state, transition_model, reward_model) @property def light(self): diff --git a/pomdp_py/problems/light_dark/env/plotting.py b/pomdp_py/problems/light_dark/env/plotting.py index f347dac0..4728b9b1 100644 --- a/pomdp_py/problems/light_dark/env/plotting.py +++ b/pomdp_py/problems/light_dark/env/plotting.py @@ -1,9 +1,21 @@ """Plotting utilties""" + import matplotlib.pyplot as plt -def plot_points(xvals, yvals, color=None, - size=1.5, label=None, connected=True, style="--", linewidth=1.5, - xlabel='x', ylabel='f(x)', loc="lower right"): + +def plot_points( + xvals, + yvals, + color=None, + size=1.5, + label=None, + connected=True, + style="--", + linewidth=1.5, + xlabel="x", + ylabel="f(x)", + loc="lower right", +): if not connected: plt.scatter(xvals, yvals, s=size, c=color, label=label) else: @@ -27,34 +39,71 @@ def plot_polygons(verts, colors, ax=None, edgecolor=None): Creates a PolygonCollection object in the axis `ax`.""" if ax is None: fig = plt.gcf() - ax = fig.add_subplot(1,1,1) + ax = fig.add_subplot(1, 1, 1) pc = PolyCollection(verts) pc.set_edgecolor(edgecolor) pc.set_facecolor(colors) ax.add_collection(pc) - ax.set_xlabel('X axis') - ax.set_ylabel('Y axis') + ax.set_xlabel("X axis") + ax.set_ylabel("Y axis") -def plot_line(ax, p1, p2, - linewidth=1, color='black', zorder=0, alpha=1.0, linestyle="-"): +def plot_line( + ax, p1, p2, linewidth=1, color="black", zorder=0, alpha=1.0, linestyle="-" +): p1x, p1y = p1 p2x, p2y = p2 - line = lines.Line2D([p1x, p2x], [p1y, p2y], - linewidth=linewidth, color=color, zorder=zorder, - alpha=alpha, linestyle=linestyle) + line = lines.Line2D( + [p1x, p2x], + [p1y, p2y], + linewidth=linewidth, + color=color, + zorder=zorder, + alpha=alpha, + linestyle=linestyle, + ) ax.add_line(line) -def plot_circle(ax, center, radius, color="blue", - fill=False, zorder=0, linewidth=0, - edgecolor=None, label_text=None, - alpha=1.0, text_color="white"): + +def plot_circle( + ax, + center, + radius, + color="blue", + fill=False, + zorder=0, + linewidth=0, + edgecolor=None, + label_text=None, + alpha=1.0, + text_color="white", +): px, py = center - circ = plt.Circle((px, py), radius, facecolor=color, fill=fill, - zorder=zorder, linewidth=linewidth, edgecolor=edgecolor, alpha=alpha) + circ = plt.Circle( + (px, py), + radius, + facecolor=color, + fill=fill, + zorder=zorder, + linewidth=linewidth, + edgecolor=edgecolor, + alpha=alpha, + ) ax.add_artist(circ) if label_text: - text = ax.text(px, py, label_text, color=text_color, - ha='center', va='center', size=7, weight='bold') - text.set_path_effects([path_effects.Stroke(linewidth=1, foreground='black'), - path_effects.Normal()]) + text = ax.text( + px, + py, + label_text, + color=text_color, + ha="center", + va="center", + size=7, + weight="bold", + ) + text.set_path_effects( + [ + path_effects.Stroke(linewidth=1, foreground="black"), + path_effects.Normal(), + ] + ) diff --git a/pomdp_py/problems/light_dark/env/visual.py b/pomdp_py/problems/light_dark/env/visual.py index 0e497a5d..625e1ff2 100644 --- a/pomdp_py/problems/light_dark/env/visual.py +++ b/pomdp_py/problems/light_dark/env/visual.py @@ -1,10 +1,12 @@ """Plot the light dark environment""" + import matplotlib.pyplot as plt from matplotlib.collections import PolyCollection import pomdp_py.problems.light_dark as ld from pomdp_py.utils import plotting, colors from pomdp_py.utils.misc import remap + class LightDarkViz: """This class deals with visualizing a light dark domain""" @@ -22,7 +24,7 @@ def __init__(self, env, x_range, y_range, res): self._x_range = x_range self._y_range = y_range fig = plt.gcf() - self._ax = fig.add_subplot(1,1,1) + self._ax = fig.add_subplot(1, 1, 1) self._goal_pos = None self._m_0 = None # initial belief pose @@ -40,10 +42,12 @@ def set_goal(self, goal_pos): def set_initial_belief_pos(self, m_0): self._m_0 = m_0 - def plot(self, - path_colors={0: [(0,0,0), (0,0,254)]}, - path_styles={0: "--"}, - path_widths={0: 1}): + def plot( + self, + path_colors={0: [(0, 0, 0), (0, 0, 254)]}, + path_styles={0: "--"}, + path_widths={0: 1}, + ): self._plot_gradient() self._plot_path(path_colors, path_styles, path_widths) self._plot_robot() @@ -52,40 +56,53 @@ def plot(self, def _plot_robot(self): cur_pos = self._env.state.position - plotting.plot_circle(self._ax, cur_pos, - 0.25, # tentative - color="black", fill=False, - linewidth=1, edgecolor="black", - zorder=3) + plotting.plot_circle( + self._ax, + cur_pos, + 0.25, # tentative + color="black", + fill=False, + linewidth=1, + edgecolor="black", + zorder=3, + ) def _plot_initial_belief_pos(self): if self._m_0 is not None: - plotting.plot_circle(self._ax, self._m_0, - 0.25, # tentative - color="black", fill=False, - linewidth=1, edgecolor="black", - zorder=3) + plotting.plot_circle( + self._ax, + self._m_0, + 0.25, # tentative + color="black", + fill=False, + linewidth=1, + edgecolor="black", + zorder=3, + ) def _plot_goal(self): if self._goal_pos is not None: - plotting.plot_circle(self._ax, - self._goal_pos, - 0.25, # tentative - linewidth=1, edgecolor="blue", - zorder=3) + plotting.plot_circle( + self._ax, + self._goal_pos, + 0.25, # tentative + linewidth=1, + edgecolor="blue", + zorder=3, + ) def _plot_path(self, colors, styles, linewidths): """Plot robot path""" # Plot line segments for path in self._log_paths: if path not in colors: - path_color = [(0,0,0)] * len(self._log_paths[path]) + path_color = [(0, 0, 0)] * len(self._log_paths[path]) else: if len(colors[path]) == 2: c1, c2 = colors[path] - path_color = colors.linear_color_gradient(c1, c2, - len(self._log_paths[path]), - normalize=True) + path_color = colors.linear_color_gradient( + c1, c2, len(self._log_paths[path]), normalize=True + ) else: path_color = [colors[path]] * len(self._log_paths[path]) @@ -100,13 +117,22 @@ def _plot_path(self, colors, styles, linewidths): path_width = linewidths[path] for i in range(1, len(self._log_paths[path])): - p1 = self._log_paths[path][i-1] + p1 = self._log_paths[path][i - 1] p2 = self._log_paths[path][i] try: - plotting.plot_line(self._ax, p1, p2, color=path_color[i], - linestyle=path_style, zorder=2, linewidth=path_width) + plotting.plot_line( + self._ax, + p1, + p2, + color=path_color[i], + linestyle=path_style, + zorder=2, + linewidth=path_width, + ) except Exception: - import pdb; pdb.set_trace() + import pdb + + pdb.set_trace() def _plot_gradient(self): """display the light dark domain.""" @@ -114,8 +140,10 @@ def _plot_gradient(self): ymin, ymax = self._y_range # Note that higher brightness has lower brightness value hi_brightness = self._env.const - lo_brightness = max(0.5 * (self._env.light - xmin)**2 + self._env.const, - 0.5 * (self._env.light - xmax)**2 + self._env.const) + lo_brightness = max( + 0.5 * (self._env.light - xmin) ** 2 + self._env.const, + 0.5 * (self._env.light - xmax) ** 2 + self._env.const, + ) # Plot a bunch of rectangular strips along the x axis # Check out: https://stackoverflow.com/questions/10550477 x = xmin @@ -125,9 +153,11 @@ def _plot_gradient(self): x_next = x + self._res verts.append([(x, ymin), (x_next, ymin), (x_next, ymax), (x, ymax)]) # compute brightness based on equation in the paper - brightness = 0.5 * (self._env.light - x)**2 + self._env.const + brightness = 0.5 * (self._env.light - x) ** 2 + self._env.const # map brightness to a grayscale color - grayscale = int(round(remap(brightness, hi_brightness, lo_brightness, 255, 0))) + grayscale = int( + round(remap(brightness, hi_brightness, lo_brightness, 255, 0)) + ) grayscale_hex = colors.rgb_to_hex((grayscale, grayscale, grayscale)) colors.append(grayscale_hex) x = x_next @@ -135,15 +165,15 @@ def _plot_gradient(self): self._ax.set_xlim(xmin, xmax) self._ax.set_ylim(ymin, ymax) + if __name__ == "__main__": - env = ld.LightDarkEnvironment(ld.State((0.5, 2.5)), # init state - (1.5, -1), # goal pose - 5, # light - 1) # const + env = ld.LightDarkEnvironment( + ld.State((0.5, 2.5)), (1.5, -1), 5, 1 # init state # goal pose # light + ) # const viz = LightDarkViz(env, (-1, 7), (-2, 4), 0.1) - viz.log_position((5,2)) - viz.log_position((5,0)) - viz.log_position((4,-1)) + viz.log_position((5, 2)) + viz.log_position((5, 0)) + viz.log_position((4, -1)) viz.plot() plt.show() diff --git a/pomdp_py/problems/light_dark/models/observation_model.py b/pomdp_py/problems/light_dark/models/observation_model.py index ce181252..e393eece 100644 --- a/pomdp_py/problems/light_dark/models/observation_model.py +++ b/pomdp_py/problems/light_dark/models/observation_model.py @@ -14,13 +14,14 @@ state (i.e. robot position). The number 5 indicates the x-coordinate of the light bar as shown in the figure (Fig.1 of the paper). """ + import pomdp_py import copy import numpy as np from ..domain.observation import * -class ObservationModel(pomdp_py.ObservationModel): +class ObservationModel(pomdp_py.ObservationModel): def __init__(self, light, const): """ `light` and `const` are parameters in @@ -34,12 +35,11 @@ def __init__(self, light, const): self._const = const def _compute_variance(self, pos): - return 0.5 * (self._light - pos[0])**2 + self._const + return 0.5 * (self._light - pos[0]) ** 2 + self._const def noise_covariance(self, pos): variance = self._compute_variance(pos) - return np.array([[variance, 0], - [0, variance]]) + return np.array([[variance, 0], [0, variance]]) def probability(self, observation, next_state, action): """ @@ -50,11 +50,11 @@ def probability(self, observation, next_state, action): if self._discrete: observation = observation.discretize() variance = self._compute_variance(next_state.position) - gaussian_noise = pomdp_py.Gaussian([0,0], - [[variance, 0], - [0, variance]]) - omega = (observation.position[0] - next_state.position[0], - observation.position[1] - next_state.position[1]) + gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]]) + omega = ( + observation.position[0] - next_state.position[0], + observation.position[1] - next_state.position[1], + ) return gaussian_noise[omega] def sample(self, next_state, action, argmax=False): @@ -62,43 +62,40 @@ def sample(self, next_state, action, argmax=False): # Sample a position shift according to the gaussian noise. obs_pos = self.func(next_state.position, mpe=argmax) return Observation(tuple(obs_pos)) - + def argmax(self, next_state, action): return self.sample(next_state, action, argmax=True) def func(self): def g(xt, mpe=False): variance = self._compute_variance(xt) - gaussian_noise = pomdp_py.Gaussian([0,0], - [[variance, 0], - [0, variance]]) + gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]]) if mpe: omega = gaussian_noise.mpe() else: omega = gaussian_noise.random() - return np.array([xt[0] + omega[0], - xt[1] + omega[1]]) + return np.array([xt[0] + omega[0], xt[1] + omega[1]]) + return g def jac_dx(self): def dgdx(mt): variance = self._compute_variance(mt) - gaussian_noise = pomdp_py.Gaussian([0,0], - [[variance, 0], - [0, variance]]) + gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]]) omega = gaussian_noise.random() # manually compute the jacobian of d(x + omega)/dx - return np.array([[omega[0], mt[1] + omega[1]], - [mt[0] + omega[0], omega[1]]]) + return np.array( + [[omega[0], mt[1] + omega[1]], [mt[0] + omega[0], omega[1]]] + ) + return dgdx def func_noise(self): """Returns a function that returns a state-dependent Gaussian noise.""" + def fn(mt): variance = self._compute_variance(mt) - gaussian_noise = pomdp_py.Gaussian([0,0], - [[variance, 0], - [0, variance]]) + gaussian_noise = pomdp_py.Gaussian([0, 0], [[variance, 0], [0, variance]]) return gaussian_noise + return fn - diff --git a/pomdp_py/problems/light_dark/models/transition_model.py b/pomdp_py/problems/light_dark/models/transition_model.py index bbf52288..081c523b 100644 --- a/pomdp_py/problems/light_dark/models/transition_model.py +++ b/pomdp_py/problems/light_dark/models/transition_model.py @@ -8,6 +8,7 @@ :math:`f(x_t,u_t)=x_t+u`. This means the transition dynamics is deterministic. """ + import pomdp_py import copy import numpy as np @@ -17,6 +18,7 @@ class TransitionModel(pomdp_py.TransitionModel): """ The underlying deterministic system dynamics """ + def __init__(self, epsilon=1e-9): self._epsilon = epsilon @@ -43,34 +45,37 @@ def func(self): """Returns the function of the underlying system dynamics. The function is: (xt, ut) -> xt+1 where xt, ut, xt+1 are all numpy arrays.""" + def f(xt, ut): - return np.array([xt[0] + ut[0], - xt[1] + ut[1]]) + return np.array([xt[0] + ut[0], xt[1] + ut[1]]) + return f def jac_dx(self): """Returns the function of the jacobian of the system dynamics function with respect to the state vector mt: (mt, ut) -> At""" + def dfdx(mt, ut): # The result of computing the jacobian by hand - return np.array([[ut[0], mt[1] + ut[1]], - [mt[0] + ut[0], ut[1]]]) + return np.array([[ut[0], mt[1] + ut[1]], [mt[0] + ut[0], ut[1]]]) + return dfdx def jac_du(self): """Returns the function of the jacobian of the system dynamics function with respect to the state vector mt: (mt, ut) -> Bt""" + def dfdu(mt, ut): # The result of computing the jacobian by hand - return np.array([[mt[0], mt[1] + ut[1]], - [mt[0] + ut[0], mt[1]]]) + return np.array([[mt[0], mt[1] + ut[1]], [mt[0] + ut[0], mt[1]]]) + return dfdu - + def func_noise(self, var_sysd=1e-9): """Returns a function that returns a state-dependent Gaussian noise.""" + def fn(mt): - gaussian_noise = pomdp_py.Gaussian([0,0], - [[var_sysd, 0], - [0, var_sysd]]) + gaussian_noise = pomdp_py.Gaussian([0, 0], [[var_sysd, 0], [0, var_sysd]]) return gaussian_noise + return fn diff --git a/pomdp_py/problems/light_dark/problem_discrete.py b/pomdp_py/problems/light_dark/problem_discrete.py index 08275a3f..051b527f 100644 --- a/pomdp_py/problems/light_dark/problem_discrete.py +++ b/pomdp_py/problems/light_dark/problem_discrete.py @@ -1,4 +1,5 @@ import pomdp_py + class DiscreteLightDark(pomdp_py.POMDP): pass diff --git a/pomdp_py/problems/light_dark/test.py b/pomdp_py/problems/light_dark/test.py index 48137926..99210de1 100644 --- a/pomdp_py/problems/light_dark/test.py +++ b/pomdp_py/problems/light_dark/test.py @@ -23,9 +23,7 @@ var_sysd = 1e-9 # Environment. -env = ld.LightDarkEnvironment(x_0, # init state - light, # light - const) # const +env = ld.LightDarkEnvironment(x_0, light, const) # init state # light # const obsmodel = ld.ObservationModel(light, const) func_sysd = env.transition_model.func() @@ -37,28 +35,32 @@ noise_sysd = env.transition_model.func_noise(var_sysd) L = 200 -Q = np.array([[0.5, 0], - [0, 0.5]]) -R = np.array([[0.5, 0], - [0, 0.5]]) - -b_des = (goal_pos, - np.array([[1e-6, 0.0], - [0.0, 1e-6]])) +Q = np.array([[0.5, 0], [0, 0.5]]) +R = np.array([[0.5, 0], [0, 0.5]]) + +b_des = (goal_pos, np.array([[1e-6, 0.0], [0.0, 1e-6]])) # u_des = [0.5*np.random.rand(2) for _ in range(num_segments)] u_des = [[0.0, 0.0] for _ in range(num_segments)] -blqr = pomdp_py.BLQR(func_sysd, func_obs, jac_sysd, jac_obs, jac_sysd_u, - noise_obs, noise_sysd, - None, L, Q, R, - planning_horizon=planning_horizon) +blqr = pomdp_py.BLQR( + func_sysd, + func_obs, + jac_sysd, + jac_obs, + jac_sysd_u, + noise_obs, + noise_sysd, + None, + L, + Q, + R, + planning_horizon=planning_horizon, +) def manual_test(blqr): # Initial and final belief states. - b_0 = (np.array([2.0, 2.0]), - np.array([[5.0, 0.0], - [0.0, 5.0]])) + b_0 = (np.array([2.0, 2.0]), np.array([[5.0, 0.0], [0.0, 5.0]])) print("Path through the light:") print(b_0[0]) print(b_0[1]) @@ -91,11 +93,8 @@ def manual_test(blqr): print(b_1[0]) print(b_1[1]) - # Initial and final belief states. - b_0 = (np.array([2.0, 2.0]), - np.array([[5.0, 0.0], - [0.0, 5.0]])) + b_0 = (np.array([2.0, 2.0]), np.array([[5.0, 0.0], [0.0, 5.0]])) print("Path directly to goal") print(b_0[0]) print(b_0[1]) @@ -109,15 +108,17 @@ def manual_test(blqr): ####### FOR DEBUGGING # traj through light - u_traj_light = [[1.0, 0.0], - [1.0, 0.0], - [0.0, -1.0], - [0.0, -1.0], - [-1.0, 0.0], - [-1.0, 0.0], - [-1.0, 0.0], - [-1.0, 0.0], - [-0.99, 0.09]] + u_traj_light = [ + [1.0, 0.0], + [1.0, 0.0], + [0.0, -1.0], + [0.0, -1.0], + [-1.0, 0.0], + [-1.0, 0.0], + [-1.0, 0.0], + [-1.0, 0.0], + [-0.99, 0.09], + ] b_t = b_0 b_traj_light = [b_t] for t in range(len(u_traj_light)): @@ -125,10 +126,12 @@ def manual_test(blqr): b_tp1 = blqr.ekf_update_mlo(b_t, u_t) b_traj_light.append(b_tp1) b_t = b_tp1 - bu_traj_light = [(b_traj_light[t], np.array(u_traj_light[t])) for t in range(len(u_traj_light))] + bu_traj_light = [ + (b_traj_light[t], np.array(u_traj_light[t])) for t in range(len(u_traj_light)) + ] # traj not through light - u_traj_dark = [[-1., -1.], [-1., -1.], [-1., -1.]] + u_traj_dark = [[-1.0, -1.0], [-1.0, -1.0], [-1.0, -1.0]] b_t = b_0 b_traj_dark = [b_t] for t in range(len(u_traj_dark)): @@ -136,18 +139,23 @@ def manual_test(blqr): b_tp1 = blqr.ekf_update_mlo(b_t, u_t) b_traj_dark.append(b_tp1) b_t = b_tp1 - bu_traj_dark = [(b_traj_dark[t], np.array(u_traj_dark[t])) for t in range(len(u_traj_dark))] - + bu_traj_dark = [ + (b_traj_dark[t], np.array(u_traj_dark[t])) for t in range(len(u_traj_dark)) + ] total_light = 0 total_dark = 0 for i in range(1000): - cost_light = blqr.segmented_cost_function(bu_traj_light, b_des, [], len(bu_traj_light)) - cost_dark = blqr.segmented_cost_function(bu_traj_dark, b_des, [], len(bu_traj_dark)) + cost_light = blqr.segmented_cost_function( + bu_traj_light, b_des, [], len(bu_traj_light) + ) + cost_dark = blqr.segmented_cost_function( + bu_traj_dark, b_des, [], len(bu_traj_dark) + ) total_light += cost_light total_dark += cost_dark - print("avg cost light: %.3f" % (total_light/1000.0)) - print("avg cost dark: %.3f" % (total_dark/1000.0)) + print("avg cost light: %.3f" % (total_light / 1000.0)) + print("avg cost dark: %.3f" % (total_dark / 1000.0)) x_range = (-1, 7) y_range = (-2, 4) @@ -173,18 +181,18 @@ def manual_test(blqr): viz.log_position(tuple(sysd_b_dark[-1][0]), path=5) sysd_b_dark.append((func_sysd(sysd_b_dark[-1][0], u_t), 0)) - viz.plot(path_colors={2: [(0,0,0), (0,255,0)], - 3: [(0,0,0), (0,255,255)], - 4: [(0,0,0), (255,255,0)], - 5: [(0,0,0), (255,0,255)]}, - path_styles={2: "--", - 3: "-", - 4: "--", - 5: "-"}) + viz.plot( + path_colors={ + 2: [(0, 0, 0), (0, 255, 0)], + 3: [(0, 0, 0), (0, 255, 255)], + 4: [(0, 0, 0), (255, 255, 0)], + 5: [(0, 0, 0), (255, 0, 255)], + }, + path_styles={2: "--", 3: "-", 4: "--", 5: "-"}, + ) plt.show() - def opt_callback(xk, *args): global count print("Iteration %d" % count) @@ -195,14 +203,16 @@ def opt_callback(xk, *args): def test(blqr): ############ - b_0 = (np.array([2.0, 2.0]), - np.array([[5.0, 0.0], - [0.0, 5.0]])) - x_sol = blqr.create_plan(b_0, b_des, u_des, - num_segments=num_segments, - opt_options={"maxiter": 30}, - opt_callback=opt_callback, - control_bounds=(-0.1, 0.1)) + b_0 = (np.array([2.0, 2.0]), np.array([[5.0, 0.0], [0.0, 5.0]])) + x_sol = blqr.create_plan( + b_0, + b_des, + u_des, + num_segments=num_segments, + opt_options={"maxiter": 30}, + opt_callback=opt_callback, + control_bounds=(-0.1, 0.1), + ) with np.printoptions(precision=3, suppress=True): print("SLSQP solution:") print(x_sol) @@ -221,17 +231,14 @@ def test(blqr): for m_i, _, _ in plan: viz.log_position(tuple(m_i), path=0) - viz.plot(path_colors={0: [(0,0,0), (0,255,0)], - 1: [(0,0,0), (255,0,0)]}, - path_styles={0: "--", - 1: "-"}, - path_widths={0: 4, - 1: 1}) + viz.plot( + path_colors={0: [(0, 0, 0), (0, 255, 0)], 1: [(0, 0, 0), (255, 0, 0)]}, + path_styles={0: "--", 1: "-"}, + path_widths={0: 4, 1: 1}, + ) plt.show() - - if __name__ == "__main__": test(blqr) # bt = (np.array([1.3, 2.0]), np.array([[0.5, 0.0], [0.0, 0.5]])) diff --git a/pomdp_py/problems/load_unload/load_unload.py b/pomdp_py/problems/load_unload/load_unload.py index b420e195..197ea5db 100644 --- a/pomdp_py/problems/load_unload/load_unload.py +++ b/pomdp_py/problems/load_unload/load_unload.py @@ -25,14 +25,17 @@ EPSILON = 1e-3 LOAD_LOCATION = 10 + class LUState(pomdp_py.State): def __init__(self, x, loaded): if type(x) != int or x < 0: - raise ValueError("Invalid state: {}\n".format((x, loaded)) + - "x must be an integer > 0") + raise ValueError( + "Invalid state: {}\n".format((x, loaded)) + "x must be an integer > 0" + ) if type(loaded) != bool: - raise ValueError("Invalid state: {}\n".format((x, loaded)) + - "loaded must be a boolean") + raise ValueError( + "Invalid state: {}\n".format((x, loaded)) + "loaded must be a boolean" + ) if x == 0 and loaded == True: raise ValueError("Agent can not be loaded in the 0th position") if x == LOAD_LOCATION and loaded == False: @@ -40,58 +43,76 @@ def __init__(self, x, loaded): self.x = x self.loaded = loaded + def __hash__(self): return hash((self.x, self.loaded)) + def __eq__(self, other): if isinstance(other, LUState): return self.x == other.x and self.loaded == self.loaded elif type(other) == tuple: return self.x == other[0] and self.loaded == other[1] + def __str__(self): return str((self.x, self.loaded)) + def __repr__(self): return "State({})".format(self) + class LUAction(pomdp_py.Action): def __init__(self, name): if name not in ["move-left", "move-right"]: raise ValueError("Invalid action: %s" % name) self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, LUAction): return self.name == other.name elif type(other) == str: return self.name == other + def __str__(self): return self.name + def __repr__(self): return "Action(%s)" % self.name + class LUObservation(pomdp_py.Observation): def __init__(self, obs): if obs not in ["load", "unload", "middle"]: - raise ValueError("Invalid observation: {}\n".format(name) + - "Observation must be an integer > 0") + raise ValueError( + "Invalid observation: {}\n".format(name) + + "Observation must be an integer > 0" + ) self.name = obs + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, LUObservation): return self.name == other.name elif type(other) == str: return self.name == other + def __str__(self): return str(self.name) + def __repr__(self): return "Observation(%s)" % str(self.x) + # Observation model class LUObservationModel(pomdp_py.ObservationModel): """This problem is small enough for the probabilities to be directly given externally""" + def probability(self, observation, next_state, action, normalized=False, **kwargs): if observation != self.sample(next_state, action): # return EPSILON to avoid degradation of particles @@ -115,7 +136,8 @@ def argmax(self, next_state, action, normalized=False, **kwargs): # Transition Model class LUTransitionModel(pomdp_py.TransitionModel): """This problem is small enough for the probabilities to be directly given - externally""" + externally""" + def probability(self, next_state, state, action, normalized=False, **kwargs): if next_state != self.sample(state, action): return EPSILON @@ -123,8 +145,9 @@ def probability(self, next_state, state, action, normalized=False, **kwargs): return 1 - EPSILON def sample(self, state, action, normalized=False, **kwargs): - if ((state.x == LOAD_LOCATION and action == "move-right") or - (state.x == 0 and action == "move-left")): + if (state.x == LOAD_LOCATION and action == "move-right") or ( + state.x == 0 and action == "move-left" + ): # trying to make invalid move, stay in the same place return state @@ -144,9 +167,12 @@ def argmax(self, state, action, normalized=False, **kwargs): """Returns the most likely next state""" return self.sample(state, action) + # Reward Model class LURewardModel(pomdp_py.RewardModel): - def probability(self, reward, state, action, next_state, normalized=False, **kwargs): + def probability( + self, reward, state, action, next_state, normalized=False, **kwargs + ): if reward == self.sample(state, action): return 1.0 else: @@ -163,10 +189,12 @@ def argmax(self, state, action, next_state, normalized=False, **kwargs): """Returns the most likely reward""" return self.sample(state, action) + # Policy Model class LUPolicyModel(pomdp_py.RandomRollout): """This is an extremely dumb policy model; To keep consistent with the framework.""" + def __init__(self): self._all_actions = {LUAction("move-right"), LUAction("move-left")} @@ -185,21 +213,21 @@ def get_all_actions(self, **kwargs): class LoadUnloadProblem(pomdp_py.POMDP): - def __init__(self, init_state, init_belief): """init_belief is a Distribution.""" - agent = pomdp_py.Agent(init_belief, - LUPolicyModel(), - LUTransitionModel(), - LUObservationModel(), - LURewardModel()) + agent = pomdp_py.Agent( + init_belief, + LUPolicyModel(), + LUTransitionModel(), + LUObservationModel(), + LURewardModel(), + ) - env = pomdp_py.Environment(init_state, - LUTransitionModel(), - LURewardModel()) + env = pomdp_py.Environment(init_state, LUTransitionModel(), LURewardModel()) super().__init__(agent, env, name="LoadUnloadProblem") + def generate_random_state(): # Flip a coin to determine if we are loaded loaded = np.random.rand() > 0.5 @@ -210,6 +238,7 @@ def generate_random_state(): loaded = True return LUState(location, loaded) + def generate_init_belief(num_particles): particles = [] for _ in range(num_particles): @@ -217,6 +246,7 @@ def generate_init_belief(num_particles): return pomdp_py.Particles(particles) + def test_planner(load_unload_problem, planner, nsteps=3, discount=0.95): gamma = 1.0 total_reward = 0 @@ -227,21 +257,22 @@ def test_planner(load_unload_problem, planner, nsteps=3, discount=0.95): plt.xlabel("Position") ax = fig.add_subplot(111) - ax.set_xlim(-1, LOAD_LOCATION+1) + ax.set_xlim(-1, LOAD_LOCATION + 1) ax.set_ylim(0, 2) x, y = [], [] - scat, = ax.plot(x, y, marker="x", markersize=20, ls=" ", color="black") + (scat,) = ax.plot(x, y, marker="x", markersize=20, ls=" ", color="black") def update(t): nonlocal gamma, total_reward, total_discounted_reward - print("==== Step %d ====" % (t+1)) + print("==== Step %d ====" % (t + 1)) action = planner.plan(load_unload_problem.agent) env_reward = load_unload_problem.env.state_transition(action, execute=True) true_state = copy.deepcopy(load_unload_problem.env.state) real_observation = load_unload_problem.env.provide_observation( - load_unload_problem.agent.observation_model, action) + load_unload_problem.agent.observation_model, action + ) load_unload_problem.agent.update_history(action, real_observation) planner.update(load_unload_problem.agent, action, real_observation) total_reward += env_reward @@ -263,21 +294,27 @@ def update(t): new_x, new_y = [true_state.x], [1] scat.set_data(new_x, new_y) scat.set_color("b" if true_state.loaded else "r") - return scat, + return (scat,) ani = FuncAnimation(fig, update, frames=nsteps, interval=500) plt.show() + def main(): init_state = generate_random_state() init_belief = generate_init_belief(num_particles=100) load_unload_problem = LoadUnloadProblem(init_state, init_belief) print("** Testing POMCP **") - pomcp = pomdp_py.POMCP(max_depth=100, discount_factor=0.95, - num_sims=100, exploration_const=110, - rollout_policy=load_unload_problem.agent.policy_model) + pomcp = pomdp_py.POMCP( + max_depth=100, + discount_factor=0.95, + num_sims=100, + exploration_const=110, + rollout_policy=load_unload_problem.agent.policy_model, + ) test_planner(load_unload_problem, pomcp, nsteps=100) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/pomdp_py/problems/maze/README.rst b/pomdp_py/problems/maze/README.rst index 8fc77335..9eae2a29 100644 --- a/pomdp_py/problems/maze/README.rst +++ b/pomdp_py/problems/maze/README.rst @@ -9,7 +9,7 @@ Problem originally introduced in `Solving POMDPs by Searching the Space of Finit .. figure:: https://i.imgur.com/i1RDsrL.png :alt: Figure from the paper - + Maze POMDP diff --git a/pomdp_py/problems/maze/__init__.py b/pomdp_py/problems/maze/__init__.py index d1c45628..fd56c91b 100644 --- a/pomdp_py/problems/maze/__init__.py +++ b/pomdp_py/problems/maze/__init__.py @@ -10,7 +10,7 @@ .. figure:: https://i.imgur.com/i1RDsrL.png :alt: Figure from the paper - + Maze POMDP diff --git a/pomdp_py/problems/maze/domain/action.py b/pomdp_py/problems/maze/domain/action.py index b96a2ca4..300a3aad 100644 --- a/pomdp_py/problems/maze/domain/action.py +++ b/pomdp_py/problems/maze/domain/action.py @@ -4,5 +4,11 @@ # Reuses the actions in the multi object search domain import pomdp_py -from pomdp_py.problems.multi_object_search.domain.action\ - import MotionAction, MoveForward, MoveBackward, MoveLeft, MoveRight, LookAction +from pomdp_py.problems.multi_object_search.domain.action import ( + MotionAction, + MoveForward, + MoveBackward, + MoveLeft, + MoveRight, + LookAction, +) diff --git a/pomdp_py/problems/maze/domain/observation.py b/pomdp_py/problems/maze/domain/observation.py index 3a3fa956..64aff2d0 100644 --- a/pomdp_py/problems/maze/domain/observation.py +++ b/pomdp_py/problems/maze/domain/observation.py @@ -1,16 +1,11 @@ - import pomdp_py # we index the walls around a grid cell in # clockwise fashion: top wall (0), right wall (1), # bottom wall (2), left wall (3). -WALL = { - 0: "top", - 1: "right", - 2: "bottom", - 3: "left" -} +WALL = {0: "top", 1: "right", 2: "bottom", 3: "left"} + class Observation(pomdp_py.Observation): def __init__(self, walls, orientation): @@ -30,5 +25,4 @@ def __eq__(self, other): if not isinstance(other, Observation): return False else: - return self.walls == other.walls\ - and self.orientation == other.orientation + return self.walls == other.walls and self.orientation == other.orientation diff --git a/pomdp_py/problems/maze/domain/state.py b/pomdp_py/problems/maze/domain/state.py index dc1dc351..3568c1a8 100644 --- a/pomdp_py/problems/maze/domain/state.py +++ b/pomdp_py/problems/maze/domain/state.py @@ -1,10 +1,13 @@ """Defines the State for the maze domain, which is the position of the robot and its orientation. """ + import pomdp_py import numpy as np + class State(pomdp_py.State): """The state of the problem is just the robot position""" + def __init__(self, positition, orientation): """ Initializes a state in light dark domain. @@ -19,15 +22,15 @@ def __init__(self, positition, orientation): def __hash__(self): return hash(self.position, self.orientation) - + def __eq__(self, other): if isinstance(other, State): return self.position == other.position else: return False - + def __str__(self): return self.__repr__() - + def __repr__(self): return "State(%s)" % (str(self.position, self.orientation)) diff --git a/pomdp_py/problems/maze/env/env.py b/pomdp_py/problems/maze/env/env.py index 93211263..ccc11472 100644 --- a/pomdp_py/problems/maze/env/env.py +++ b/pomdp_py/problems/maze/env/env.py @@ -1,5 +1,6 @@ import pomdp_py + class MazeEnvironment(pomdp_py.Environment): def __init__(self, init_state): pass diff --git a/pomdp_py/problems/maze/models/components/map.py b/pomdp_py/problems/maze/models/components/map.py index c79f289a..79de3fe6 100644 --- a/pomdp_py/problems/maze/models/components/map.py +++ b/pomdp_py/problems/maze/models/components/map.py @@ -1,5 +1,6 @@ import numpy as np + class MazeMap: def __init__(self): pass @@ -12,27 +13,26 @@ def from_dict(self, mapdict): "start": (5, 6), "goal": (5, 3), "walls": { - "top": [[(0,0), (10,0)], - [(2,1), (8,1)], - [(4,2), (6,2)]], - "bottom": [[(0,6), (10,6)], - [(1,5), (9,5)] - [(3,4), (7,4)], - [(5,3), (5,3)]], - "left": [[(0,0), (0,6)], - [(1,1), (5,5)], - [(2,1), (2,4)], - [(3,2), (3,4)], - [(4,2), (4,3)], - [(5,3), (5,3)]], - "right": [[(5,3), (5,3)] - [(6,2), (6,3)], - [(7,2), (7,4)], - [(8,1), (8,4)], - [(9,1), (9,5)], - [(10,0), (10,6)]] - } - } - - - + "top": [[(0, 0), (10, 0)], [(2, 1), (8, 1)], [(4, 2), (6, 2)]], + "bottom": [ + [(0, 6), (10, 6)], + [(1, 5), (9, 5)][(3, 4), (7, 4)], + [(5, 3), (5, 3)], + ], + "left": [ + [(0, 0), (0, 6)], + [(1, 1), (5, 5)], + [(2, 1), (2, 4)], + [(3, 2), (3, 4)], + [(4, 2), (4, 3)], + [(5, 3), (5, 3)], + ], + "right": [ + [(5, 3), (5, 3)][(6, 2), (6, 3)], + [(7, 2), (7, 4)], + [(8, 1), (8, 4)], + [(9, 1), (9, 5)], + [(10, 0), (10, 6)], + ], + }, +} diff --git a/pomdp_py/problems/multi_object_search/agent/agent.py b/pomdp_py/problems/multi_object_search/agent/agent.py index bf64c6be..b1525706 100644 --- a/pomdp_py/problems/multi_object_search/agent/agent.py +++ b/pomdp_py/problems/multi_object_search/agent/agent.py @@ -10,20 +10,24 @@ from ..models.reward_model import * from ..models.policy_model import * + class MosAgent(pomdp_py.Agent): """One agent is one robot.""" - def __init__(self, - robot_id, - init_robot_state, # initial robot state (assuming robot state is observable perfectly) - object_ids, # target object ids - dim, # tuple (w,l) of the width (w) and length (l) of the gridworld search space. - sensor, # Sensor equipped on the robot - sigma=0.01, # parameter for observation model - epsilon=1, # parameter for observation model - belief_rep="histogram", # belief representation, either "histogram" or "particles". - prior={}, # prior belief, as defined in belief.py:initialize_belief - num_particles=100, # used if the belief representation is particles - grid_map=None): # GridMap used to avoid collision with obstacles (None if not provided) + + def __init__( + self, + robot_id, + init_robot_state, # initial robot state (assuming robot state is observable perfectly) + object_ids, # target object ids + dim, # tuple (w,l) of the width (w) and length (l) of the gridworld search space. + sensor, # Sensor equipped on the robot + sigma=0.01, # parameter for observation model + epsilon=1, # parameter for observation model + belief_rep="histogram", # belief representation, either "histogram" or "particles". + prior={}, # prior belief, as defined in belief.py:initialize_belief + num_particles=100, # used if the belief representation is particles + grid_map=None, + ): # GridMap used to avoid collision with obstacles (None if not provided) self.robot_id = robot_id self._object_ids = object_ids self.sensor = sensor @@ -34,29 +38,31 @@ def __init__(self, rth = init_robot_state.pose[2] # initialize belief - init_belief = initialize_belief(dim, - self.robot_id, - self._object_ids, - prior=prior, - representation=belief_rep, - robot_orientations={self.robot_id:rth}, - num_particles=num_particles) - transition_model = MosTransitionModel(dim, - {self.robot_id: self.sensor}, - self._object_ids) - observation_model = MosObservationModel(dim, - self.sensor, - self._object_ids, - sigma=sigma, - epsilon=epsilon) + init_belief = initialize_belief( + dim, + self.robot_id, + self._object_ids, + prior=prior, + representation=belief_rep, + robot_orientations={self.robot_id: rth}, + num_particles=num_particles, + ) + transition_model = MosTransitionModel( + dim, {self.robot_id: self.sensor}, self._object_ids + ) + observation_model = MosObservationModel( + dim, self.sensor, self._object_ids, sigma=sigma, epsilon=epsilon + ) reward_model = GoalRewardModel(self._object_ids, robot_id=self.robot_id) policy_model = PolicyModel(self.robot_id, grid_map=grid_map) - super().__init__(init_belief, policy_model, - transition_model=transition_model, - observation_model=observation_model, - reward_model=reward_model) + super().__init__( + init_belief, + policy_model, + transition_model=transition_model, + observation_model=observation_model, + reward_model=reward_model, + ) def clear_history(self): """Custum function; clear history""" self._history = None - diff --git a/pomdp_py/problems/multi_object_search/agent/belief.py b/pomdp_py/problems/multi_object_search/agent/belief.py index fb79299e..f27161df 100644 --- a/pomdp_py/problems/multi_object_search/agent/belief.py +++ b/pomdp_py/problems/multi_object_search/agent/belief.py @@ -14,9 +14,11 @@ import copy from ..domain.state import * + class MosOOBelief(pomdp_py.OOBelief): """This is needed to make sure the belief is sampling the right type of State for this problem.""" + def __init__(self, robot_id, object_beliefs): """ robot_id (int): The id of the robot that has this belief. @@ -33,8 +35,15 @@ def random(self, **kwargs): return MosOOState(pomdp_py.OOBelief.random(self, **kwargs).object_states) -def initialize_belief(dim, robot_id, object_ids, prior={}, - representation="histogram", robot_orientations={}, num_particles=100): +def initialize_belief( + dim, + robot_id, + object_ids, + prior={}, + representation="histogram", + robot_orientations={}, + num_particles=100, +): """ Returns a GenerativeDistribution that is the belief representation for the multi-object search problem. @@ -46,7 +55,7 @@ def initialize_belief(dim, robot_id, object_ids, prior={}, over; They are `assumed` to be the target objects, not obstacles, because the robot doesn't really care about obstacle locations and modeling them just adds computation cost. - prior (dict): A mapping {(objid|robot_id) -> {(x,y) -> [0,1]}}. If used, then + prior (dict): A mapping {(objid|robot_id) -> {(x,y) -> [0,1]}}. If used, then all locations not included in the prior will be treated to have 0 probability. If unspecified for an object, then the belief over that object is assumed to be a uniform distribution. @@ -58,14 +67,17 @@ def initialize_belief(dim, robot_id, object_ids, prior={}, GenerativeDistribution: the initial belief representation. """ if representation == "histogram": - return _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orientations) + return _initialize_histogram_belief( + dim, robot_id, object_ids, prior, robot_orientations + ) elif representation == "particles": - return _initialize_particles_belief(dim, robot_id, object_ids, - robot_orientations, num_particles=num_particles) + return _initialize_particles_belief( + dim, robot_id, object_ids, robot_orientations, num_particles=num_particles + ) else: raise ValueError("Unsupported belief representation %s" % representation) - + def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orientations): """ Returns the belief distribution represented as a histogram @@ -85,7 +97,7 @@ def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orienta # no prior knowledge. So uniform. for x in range(width): for y in range(length): - state = ObjectState(objid, "target", (x,y)) + state = ObjectState(objid, "target", (x, y)) hist[state] = 1.0 total_prob += hist[state] @@ -100,14 +112,16 @@ def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orienta # Its pose must have been provided in the `prior`. assert robot_id in prior, "Missing initial robot pose in prior." init_robot_pose = list(prior[robot_id].keys())[0] - oo_hists[robot_id] =\ - pomdp_py.Histogram({RobotState(robot_id, init_robot_pose, (), None): 1.0}) - + oo_hists[robot_id] = pomdp_py.Histogram( + {RobotState(robot_id, init_robot_pose, (), None): 1.0} + ) + return MosOOBelief(robot_id, oo_hists) -def _initialize_particles_belief(dim, robot_id, object_ids, prior, - robot_orientations, num_particles=100): +def _initialize_particles_belief( + dim, robot_id, object_ids, prior, robot_orientations, num_particles=100 +): """This returns a single set of particles that represent the distribution over a joint state space of all objects. @@ -123,11 +137,13 @@ def _initialize_particles_belief(dim, robot_id, object_ids, prior, # Its pose must have been provided in the `prior`. assert robot_id in prior, "Missing initial robot pose in prior." init_robot_pose = list(prior[robot_id].keys())[0] - + oo_particles = {} # objid -> Particageles width, length = dim for objid in object_ids: - particles = [RobotState(robot_id, init_robot_pose, (), None)] # list of states; Starting the observable robot state. + particles = [ + RobotState(robot_id, init_robot_pose, (), None) + ] # list of states; Starting the observable robot state. if objid in prior: # prior knowledge provided. Just use the prior knowledge prior_sum = sum(prior[objid][pose] for pose in prior[objid]) @@ -141,12 +157,12 @@ def _initialize_particles_belief(dim, robot_id, object_ids, prior, for _ in range(num_particles): x = random.randrange(0, width) y = random.randrange(0, length) - state = ObjectState(objid, "target", (x,y)) + state = ObjectState(objid, "target", (x, y)) particles.append(state) particles_belief = pomdp_py.Particles(particles) oo_particles[objid] = particles_belief - + # Return Particles distribution which contains particles # that represent joint object states particles = [] diff --git a/pomdp_py/problems/multi_object_search/domain/action.py b/pomdp_py/problems/multi_object_search/domain/action.py index 772f55b5..bbb41d16 100644 --- a/pomdp_py/problems/multi_object_search/domain/action.py +++ b/pomdp_py/problems/multi_object_search/domain/action.py @@ -1,7 +1,7 @@ """ Defines the Action for the 2D Multi-Object Search domain; -Action space: +Action space: Motion :math:`\cup` Look :math:`\cup` Find @@ -14,42 +14,52 @@ It is possible to force "Look" after every N/S/E/W action; then the Look action could be dropped. This is optional behavior. """ + import pomdp_py import math + ###### Actions ###### class Action(pomdp_py.Action): """Mos action; Simple named action.""" + def __init__(self, name): self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, Action): return self.name == other.name elif type(other) == str: return self.name == other + def __str__(self): return self.name + def __repr__(self): return "Action(%s)" % self.name -MOTION_SCHEME="xy" # can be either xy or vw -STEP_SIZE=1 + +MOTION_SCHEME = "xy" # can be either xy or vw +STEP_SIZE = 1 + + class MotionAction(Action): # scheme 1 (vx,vy,th) SCHEME_XYTH = "xyth" EAST = (STEP_SIZE, 0, 0) # x is horizontal; x+ is right. y is vertical; y+ is down. WEST = (-STEP_SIZE, 0, math.pi) - NORTH = (0, -STEP_SIZE, 3*math.pi/2) - SOUTH = (0, STEP_SIZE, math.pi/2) - + NORTH = (0, -STEP_SIZE, 3 * math.pi / 2) + SOUTH = (0, STEP_SIZE, math.pi / 2) + # scheme 2 (vt, vw) translational, rotational velocities. - SCHEME_VW = "vw" + SCHEME_VW = "vw" FORWARD = (STEP_SIZE, 0) BACKWARD = (-STEP_SIZE, 0) - LEFT = (0, -math.pi/4) # left 45 deg - RIGHT = (0, math.pi/4) # right 45 deg + LEFT = (0, -math.pi / 4) # left 45 deg + RIGHT = (0, math.pi / 4) # right 45 deg # scheme 3 (vx,vy) SCHEME_XY = "xy" @@ -60,9 +70,7 @@ class MotionAction(Action): SCHEMES = {"xyth", "xy", "vw"} - def __init__(self, motion, - scheme=MOTION_SCHEME, distance_cost=1, - motion_name=None): + def __init__(self, motion, scheme=MOTION_SCHEME, distance_cost=1, motion_name=None): """ motion (tuple): a tuple of floats that describes the motion; scheme (str): description of the motion scheme; Either @@ -72,51 +80,90 @@ def __init__(self, motion, raise ValueError("Invalid motion scheme %s" % scheme) if scheme == MotionAction.SCHEME_XYTH: - if motion not in {MotionAction.EAST, MotionAction.WEST, - MotionAction.NORTH, MotionAction.SOUTH}: + if motion not in { + MotionAction.EAST, + MotionAction.WEST, + MotionAction.NORTH, + MotionAction.SOUTH, + }: raise ValueError("Invalid move motion %s" % str(motion)) elif scheme == MotionAction.SCHEME_VW: - if motion not in {MotionAction.FORWARD, MotionAction.BACKWARD, - MotionAction.LEFT, MotionAction.RIGHT}: + if motion not in { + MotionAction.FORWARD, + MotionAction.BACKWARD, + MotionAction.LEFT, + MotionAction.RIGHT, + }: raise ValueError("Invalid move motion %s" % str(motion)) elif scheme == MotionAction.SCHEME_XY: - if motion not in {MotionAction.EAST2D, MotionAction.WEST2D, - MotionAction.NORTH2D, MotionAction.SOUTH2D}: + if motion not in { + MotionAction.EAST2D, + MotionAction.WEST2D, + MotionAction.NORTH2D, + MotionAction.SOUTH2D, + }: raise ValueError("Invalid move motion %s" % str(motion)) - + self.motion = motion self.scheme = scheme self.distance_cost = distance_cost if motion_name is None: motion_name = str(motion) super().__init__("move-%s-%s" % (scheme, motion_name)) - -# Define some constant actions -MoveEast = MotionAction(MotionAction.EAST, scheme=MotionAction.SCHEME_XYTH, motion_name="East") -MoveWest = MotionAction(MotionAction.WEST, scheme=MotionAction.SCHEME_XYTH, motion_name="West") -MoveNorth = MotionAction(MotionAction.NORTH, scheme=MotionAction.SCHEME_XYTH, motion_name="North") -MoveSouth = MotionAction(MotionAction.SOUTH, scheme=MotionAction.SCHEME_XYTH, motion_name="South") -MoveForward = MotionAction(MotionAction.FORWARD, scheme=MotionAction.SCHEME_VW, motion_name="Forward") -MoveBackward = MotionAction(MotionAction.BACKWARD, scheme=MotionAction.SCHEME_VW, motion_name="Backward") -MoveLeft = MotionAction(MotionAction.LEFT, scheme=MotionAction.SCHEME_VW, motion_name="TurnLeft") -MoveRight = MotionAction(MotionAction.RIGHT, scheme=MotionAction.SCHEME_VW, motion_name="TurnRight") -MoveEast2D = MotionAction(MotionAction.EAST2D, scheme=MotionAction.SCHEME_XY, motion_name="East2D") -MoveWest2D = MotionAction(MotionAction.WEST2D, scheme=MotionAction.SCHEME_XY, motion_name="West2D") -MoveNorth2D = MotionAction(MotionAction.NORTH2D, scheme=MotionAction.SCHEME_XY, motion_name="North2D") -MoveSouth2D = MotionAction(MotionAction.SOUTH2D, scheme=MotionAction.SCHEME_XY, motion_name="South2D") +# Define some constant actions +MoveEast = MotionAction( + MotionAction.EAST, scheme=MotionAction.SCHEME_XYTH, motion_name="East" +) +MoveWest = MotionAction( + MotionAction.WEST, scheme=MotionAction.SCHEME_XYTH, motion_name="West" +) +MoveNorth = MotionAction( + MotionAction.NORTH, scheme=MotionAction.SCHEME_XYTH, motion_name="North" +) +MoveSouth = MotionAction( + MotionAction.SOUTH, scheme=MotionAction.SCHEME_XYTH, motion_name="South" +) + +MoveForward = MotionAction( + MotionAction.FORWARD, scheme=MotionAction.SCHEME_VW, motion_name="Forward" +) +MoveBackward = MotionAction( + MotionAction.BACKWARD, scheme=MotionAction.SCHEME_VW, motion_name="Backward" +) +MoveLeft = MotionAction( + MotionAction.LEFT, scheme=MotionAction.SCHEME_VW, motion_name="TurnLeft" +) +MoveRight = MotionAction( + MotionAction.RIGHT, scheme=MotionAction.SCHEME_VW, motion_name="TurnRight" +) + +MoveEast2D = MotionAction( + MotionAction.EAST2D, scheme=MotionAction.SCHEME_XY, motion_name="East2D" +) +MoveWest2D = MotionAction( + MotionAction.WEST2D, scheme=MotionAction.SCHEME_XY, motion_name="West2D" +) +MoveNorth2D = MotionAction( + MotionAction.NORTH2D, scheme=MotionAction.SCHEME_XY, motion_name="North2D" +) +MoveSouth2D = MotionAction( + MotionAction.SOUTH2D, scheme=MotionAction.SCHEME_XY, motion_name="South2D" +) class LookAction(Action): # For simplicity, this LookAction is not parameterized by direction def __init__(self): super().__init__("look") - + + class FindAction(Action): def __init__(self): super().__init__("find") + Look = LookAction() Find = FindAction() diff --git a/pomdp_py/problems/multi_object_search/domain/observation.py b/pomdp_py/problems/multi_object_search/domain/observation.py index 1be5da68..8ff256e3 100644 --- a/pomdp_py/problems/multi_object_search/domain/observation.py +++ b/pomdp_py/problems/multi_object_search/domain/observation.py @@ -12,32 +12,37 @@ it could be something else. But the resulting observation should be a map from object id to observed pose or NULL (not observed). """ + import pomdp_py + ###### Observation ###### class ObjectObservation(pomdp_py.Observation): """The xy pose of the object is observed; or NULL if not observed""" + NULL = None + def __init__(self, objid, pose): self.objid = objid - if type(pose) == tuple and len(pose) == 2\ - or pose == ObjectObservation.NULL: + if type(pose) == tuple and len(pose) == 2 or pose == ObjectObservation.NULL: self.pose = pose else: - raise ValueError("Invalid observation %s for object" - % (str(pose), objid)) + raise ValueError("Invalid observation %s for object" % (str(pose), objid)) + def __hash__(self): return hash((self.objid, self.pose)) + def __eq__(self, other): if not isinstance(other, ObjectObservation): return False else: - return self.objid == other.objid\ - and self.pose == other.pose + return self.objid == other.objid and self.pose == other.pose + class MosOOObservation(pomdp_py.OOObservation): """Observation for Mos that can be factored by objects; thus this is an OOObservation.""" + def __init__(self, objposes): """ objposes (dict): map from objid to 2d pose or NULL (not ObjectObservation!). @@ -50,10 +55,10 @@ def for_obj(self, objid): return ObjectObservation(objid, self.objposes[objid]) else: return ObjectObservation(objid, ObjectObservation.NULL) - + def __hash__(self): return self._hashcode - + def __eq__(self, other): if not isinstance(other, MosOOObservation): return False @@ -68,15 +73,21 @@ def __repr__(self): def factor(self, next_state, *params, **kwargs): """Factor this OO-observation by objects""" - return {objid: ObjectObservation(objid, self.objposes[objid]) - for objid in next_state.object_states - if objid != next_state.robot_id} - + return { + objid: ObjectObservation(objid, self.objposes[objid]) + for objid in next_state.object_states + if objid != next_state.robot_id + } + @classmethod def merge(cls, object_observations, next_state, *params, **kwargs): """Merge `object_observations` into a single OOObservation object; - + object_observation (dict): Maps from objid to ObjectObservation""" - return MosOOObservation({objid: object_observations[objid].pose - for objid in object_observations - if objid != next_state.object_states[objid].objclass != "robot"}) + return MosOOObservation( + { + objid: object_observations[objid].pose + for objid in object_observations + if objid != next_state.object_states[objid].objclass != "robot" + } + ) diff --git a/pomdp_py/problems/multi_object_search/domain/state.py b/pomdp_py/problems/multi_object_search/domain/state.py index 08a03f3a..991544e6 100644 --- a/pomdp_py/problems/multi_object_search/domain/state.py +++ b/pomdp_py/problems/multi_object_search/domain/state.py @@ -6,7 +6,7 @@ Description: Multi-Object Search in a 2D grid world. -State space: +State space: :math:`S_1 \\times S_2 \\times ... S_n \\times S_r` where :math:`S_i (1\leq i\leq n)` is the object state, with attribute @@ -17,57 +17,84 @@ import pomdp_py import math + ###### States ###### class ObjectState(pomdp_py.ObjectState): def __init__(self, objid, objclass, pose): if objclass != "obstacle" and objclass != "target": - raise ValueError("Only allow object class to be"\ - "either 'target' or 'obstacle'." - "Got %s" % objclass) - super().__init__(objclass, {"pose":pose, "id":objid}) + raise ValueError( + "Only allow object class to beeither 'target' or 'obstacle'.Got %s" + % objclass + ) + super().__init__(objclass, {"pose": pose, "id": objid}) + def __str__(self): - return 'ObjectState(%s,%s)' % (str(self.objclass), str(self.pose)) + return "ObjectState(%s,%s)" % (str(self.objclass), str(self.pose)) + @property def pose(self): - return self.attributes['pose'] + return self.attributes["pose"] + @property def objid(self): - return self.attributes['id'] + return self.attributes["id"] + class RobotState(pomdp_py.ObjectState): def __init__(self, robot_id, pose, objects_found, camera_direction): """Note: camera_direction is None unless the robot is looking at a direction, in which case camera_direction is the string e.g. look+x, or 'look'""" - super().__init__("robot", {"id":robot_id, - "pose":pose, # x,y,th - "objects_found": objects_found, - "camera_direction": camera_direction}) + super().__init__( + "robot", + { + "id": robot_id, + "pose": pose, # x,y,th + "objects_found": objects_found, + "camera_direction": camera_direction, + }, + ) + def __str__(self): - return 'RobotState(%s,%s|%s)' % (str(self.objclass), str(self.pose), str(self.objects_found)) + return "RobotState(%s,%s|%s)" % ( + str(self.objclass), + str(self.pose), + str(self.objects_found), + ) + def __repr__(self): return str(self) + @property def pose(self): - return self.attributes['pose'] + return self.attributes["pose"] + @property def robot_pose(self): - return self.attributes['pose'] + return self.attributes["pose"] + @property def objects_found(self): - return self.attributes['objects_found'] + return self.attributes["objects_found"] + class MosOOState(pomdp_py.OOState): def __init__(self, object_states): super().__init__(object_states) + def object_pose(self, objid): return self.object_states[objid]["pose"] + def pose(self, objid): return self.object_pose(objid) + @property def object_poses(self): - return {objid:self.object_states[objid]['pose'] - for objid in self.object_states} + return { + objid: self.object_states[objid]["pose"] for objid in self.object_states + } + def __str__(self): - return 'MosOOState%s' % (str(self.object_states)) + return "MosOOState%s" % (str(self.object_states)) + def __repr__(self): return str(self) diff --git a/pomdp_py/problems/multi_object_search/env/env.py b/pomdp_py/problems/multi_object_search/env/env.py index e04ab387..da4fce30 100644 --- a/pomdp_py/problems/multi_object_search/env/env.py +++ b/pomdp_py/problems/multi_object_search/env/env.py @@ -7,8 +7,10 @@ from pomdp_py.problems.multi_object_search.models.components.sensor import * from pomdp_py.problems.multi_object_search.domain.state import * + class MosEnvironment(pomdp_py.Environment): """""" + def __init__(self, dim, init_state, sensors, obstacles=set({})): """ Args: @@ -21,18 +23,17 @@ def __init__(self, dim, init_state, sensors, obstacles=set({})): self.width, self.length = dim self.sensors = sensors self.obstacles = obstacles - transition_model = MosTransitionModel(dim, - sensors, - set(init_state.object_states.keys())) + transition_model = MosTransitionModel( + dim, sensors, set(init_state.object_states.keys()) + ) # Target objects, a set of ids, are not robot nor obstacles - self.target_objects = \ - {objid - for objid in set(init_state.object_states.keys()) - self.obstacles - if not isinstance(init_state.object_states[objid], RobotState)} + self.target_objects = { + objid + for objid in set(init_state.object_states.keys()) - self.obstacles + if not isinstance(init_state.object_states[objid], RobotState) + } reward_model = GoalRewardModel(self.target_objects) - super().__init__(init_state, - transition_model, - reward_model) + super().__init__(init_state, transition_model, reward_model) @property def robot_ids(self): @@ -56,20 +57,25 @@ def state_transition(self, action, execute=True, robot_id=None): is False. """ - assert robot_id is not None, "state transition should happen for a specific robot" + assert ( + robot_id is not None + ), "state transition should happen for a specific robot" next_state = copy.deepcopy(self.state) - next_state.object_states[robot_id] =\ - self.transition_model[robot_id].sample(self.state, action) + next_state.object_states[robot_id] = self.transition_model[robot_id].sample( + self.state, action + ) - reward = self.reward_model.sample(self.state, action, next_state, - robot_id=robot_id) + reward = self.reward_model.sample( + self.state, action, next_state, robot_id=robot_id + ) if execute: self.apply_transition(next_state) return reward else: return next_state, reward + #### Interpret string as an initial world state #### def interpret(worldstr): """ @@ -116,11 +122,10 @@ def interpret(worldstr): if mode == "sensor": sensorlines.append(line) - lines = [line for line in worldlines - if len(line) > 0] + lines = [line for line in worldlines if len(line) > 0] w, l = len(worldlines[0]), len(worldlines) - objects = {} # objid -> object_state(pose) + objects = {} # objid -> object_state(pose) obstacles = set({}) # objid robots = {} # robot_id -> robot_state(pose) sensors = {} # robot_id -> Sensor @@ -128,25 +133,26 @@ def interpret(worldstr): # Parse world for y, line in enumerate(worldlines): if len(line) != w: - raise ValueError("World size inconsistent."\ - "Expected width: %d; Actual Width: %d" - % (w, len(line))) + raise ValueError( + "World size inconsistent.Expected width: %d; Actual Width: %d" + % (w, len(line)) + ) for x, c in enumerate(line): if c == "x": # obstacle objid = 1000 + len(obstacles) # obstacle id - objects[objid] = ObjectState(objid, "obstacle", (x,y)) + objects[objid] = ObjectState(objid, "obstacle", (x, y)) obstacles.add(objid) elif c.isupper(): # target object objid = len(objects) - objects[objid] = ObjectState(objid, "target", (x,y)) + objects[objid] = ObjectState(objid, "target", (x, y)) elif c.islower(): # robot robot_id = interpret_robot_id(c) - robots[robot_id] = RobotState(robot_id, (x,y,0), (), None) + robots[robot_id] = RobotState(robot_id, (x, y, 0), (), None) else: assert c == ".", "Unrecognized character %s in worldstr" % c @@ -158,10 +164,14 @@ def interpret(worldstr): # Parse sensors for line in sensorlines: if "," in line: - raise ValueError("Wrong Fromat. SHould not have ','. Separate tokens with space.") + raise ValueError( + "Wrong Fromat. SHould not have ','. Separate tokens with space." + ) robot_name = line.split(":")[0].strip() robot_id = interpret_robot_id(robot_name) - assert robot_id in robots, "Sensor specified for unknown robot %s" % (robot_name) + assert robot_id in robots, "Sensor specified for unknown robot %s" % ( + robot_name + ) sensor_setting = line.split(":")[1].strip() sensor_type = sensor_setting.split(" ")[0].strip() @@ -179,7 +189,8 @@ def interpret(worldstr): raise ValueError("Unknown sensor type %s" % sensor_type) sensors[robot_id] = sensor - return (w,l), robots, objects, obstacles, sensors + return (w, l), robots, objects, obstacles, sensors + def interpret_robot_id(robot_name): return -ord(robot_name) @@ -202,6 +213,7 @@ def equip_sensors(worldmap, sensors): worldmap += "%s: %s\n" % (robot_char, sensors[robot_char]) return worldmap + def make_laser_sensor(fov, dist_range, angle_increment, occlusion): """ Returns string representation of the laser scanner configuration. @@ -222,6 +234,7 @@ def make_laser_sensor(fov, dist_range, angle_increment, occlusion): occstr = "occlusion_enabled=%s" % str(occlusion) return "laser %s %s %s %s" % (fovstr, rangestr, angicstr, occstr) + def make_proximity_sensor(radius, occlusion): """ Returns string representation of the proximity sensor configuration. diff --git a/pomdp_py/problems/multi_object_search/env/visual.py b/pomdp_py/problems/multi_object_search/env/visual.py index 40a3d6e3..6983bf83 100644 --- a/pomdp_py/problems/multi_object_search/env/visual.py +++ b/pomdp_py/problems/multi_object_search/env/visual.py @@ -18,25 +18,25 @@ from pomdp_py.problems.multi_object_search.domain.state import * from pomdp_py.problems.multi_object_search.example_worlds import * + # Deterministic way to get object color def object_color(objid, count): color = [107, 107, 107] if count % 3 == 0: - color[0] += 100 + (3 * (objid*5 % 11)) + color[0] += 100 + (3 * (objid * 5 % 11)) color[0] = max(12, min(222, color[0])) elif count % 3 == 1: - color[1] += 100 + (3 * (objid*5 % 11)) + color[1] += 100 + (3 * (objid * 5 % 11)) color[1] = max(12, min(222, color[1])) else: - color[2] += 100 + (3 * (objid*5 % 11)) + color[2] += 100 + (3 * (objid * 5 % 11)) color[2] = max(12, min(222, color[2])) return tuple(color) + #### Visualization through pygame #### class MosViz: - - def __init__(self, env, - res=30, fps=30, controllable=False): + def __init__(self, env, res=30, fps=30, controllable=False): self._env = env self._res = res @@ -60,8 +60,7 @@ def __init__(self, env, def _make_gridworld_image(self, r): # Preparing 2d array w, l = self._env.width, self._env.length - arr2d = np.full((self._env.width, - self._env.length), 0) # free grids + arr2d = np.full((self._env.width, self._env.length), 0) # free grids state = self._env.state for objid in state.object_states: pose = state.object_states[objid]["pose"] @@ -73,20 +72,24 @@ def _make_gridworld_image(self, r): arr2d[pose[0], pose[1]] = 2 # target # Creating image - img = np.full((w*r,l*r,3), 255, dtype=np.int32) + img = np.full((w * r, l * r, 3), 255, dtype=np.int32) for x in range(w): for y in range(l): - if arr2d[x,y] == 0: # free - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (255, 255, 255), -1) - elif arr2d[x,y] == 1: # obstacle - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (40, 31, 3), -1) - elif arr2d[x,y] == 2: # target - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (255, 165, 0), -1) - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (0, 0, 0), 1, 8) + if arr2d[x, y] == 0: # free + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (255, 255, 255), -1 + ) + elif arr2d[x, y] == 1: # obstacle + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (40, 31, 3), -1 + ) + elif arr2d[x, y] == 2: # target + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (255, 165, 0), -1 + ) + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (0, 0, 0), 1, 8 + ) return img @property @@ -117,23 +120,26 @@ def update(self, robot_id, action, observation, viz_observation, belief): self._last_belief[robot_id] = belief @staticmethod - def draw_robot(img, x, y, th, size, color=(255,12,12)): + def draw_robot(img, x, y, th, size, color=(255, 12, 12)): radius = int(round(size / 2)) - cv2.circle(img, (y+radius, x+radius), radius, color, thickness=2) + cv2.circle(img, (y + radius, x + radius), radius, color, thickness=2) - endpoint = (y+radius + int(round(radius*math.sin(th))), - x+radius + int(round(radius*math.cos(th)))) - cv2.line(img, (y+radius,x+radius), endpoint, color, 2) + endpoint = ( + y + radius + int(round(radius * math.sin(th))), + x + radius + int(round(radius * math.cos(th))), + ) + cv2.line(img, (y + radius, x + radius), endpoint, color, 2) @staticmethod - def draw_observation(img, z, rx, ry, rth, r, size, color=(12,12,255)): + def draw_observation(img, z, rx, ry, rth, r, size, color=(12, 12, 255)): assert type(z) == MosOOObservation, "%s != MosOOObservation" % (str(type(z))) radius = int(round(r / 2)) for objid in z.objposes: if z.for_obj(objid).pose != ObjectObservation.NULL: lx, ly = z.for_obj(objid).pose - cv2.circle(img, (ly*r+radius, - lx*r+radius), size, color, thickness=-1) + cv2.circle( + img, (ly * r + radius, lx * r + radius), size, color, thickness=-1 + ) @staticmethod def draw_belief(img, belief, r, size, target_colors): @@ -151,20 +157,25 @@ def draw_belief(img, belief, r, size, target_colors): last_val = -1 count = 0 for state in reversed(sorted(hist, key=hist.get)): - if state.objclass == 'target': + if state.objclass == "target": if last_val != -1: - color = util.lighter(color, 1-hist[state]/last_val) + color = util.lighter(color, 1 - hist[state] / last_val) if np.mean(np.array(color) / np.array([255, 255, 255])) < 0.99: - tx, ty = state['pose'] - if (tx,ty) not in circle_drawn: - circle_drawn[(tx,ty)] = 0 - circle_drawn[(tx,ty)] += 1 - - cv2.circle(img, (ty*r+radius, - tx*r+radius), size//circle_drawn[(tx,ty)], color, thickness=-1) + tx, ty = state["pose"] + if (tx, ty) not in circle_drawn: + circle_drawn[(tx, ty)] = 0 + circle_drawn[(tx, ty)] += 1 + + cv2.circle( + img, + (ty * r + radius, tx * r + radius), + size // circle_drawn[(tx, ty)], + color, + thickness=-1, + ) last_val = hist[state] - count +=1 + count += 1 if last_val <= 0: break @@ -173,14 +184,14 @@ def on_init(self): """pygame init""" pygame.init() # calls pygame.font.init() # init main screen and background - self._display_surf = pygame.display.set_mode((self.img_width, - self.img_height), - pygame.HWSURFACE) + self._display_surf = pygame.display.set_mode( + (self.img_width, self.img_height), pygame.HWSURFACE + ) self._background = pygame.Surface(self._display_surf.get_size()).convert() self._clock = pygame.time.Clock() # Font - self._myfont = pygame.font.SysFont('Comic Sans MS', 30) + self._myfont = pygame.font.SysFont("Comic Sans MS", 30) self._running = True def on_event(self, event): @@ -221,15 +232,18 @@ def on_event(self, event): if self._controllable: if isinstance(action, MotionAction): - reward = self._env.state_transition(action, execute=True, robot_id=robot_id) + reward = self._env.state_transition( + action, execute=True, robot_id=robot_id + ) z = None elif isinstance(action, LookAction) or isinstance(action, FindAction): robot_pose = self._env.state.pose(robot_id) - z = self._env.sensors[robot_id].observe(robot_pose, - self._env.state) + z = self._env.sensors[robot_id].observe(robot_pose, self._env.state) self._last_observation[robot_id] = z self._last_viz_observation[robot_id] = z - reward = self._env.state_transition(action, execute=True, robot_id=robot_id) + reward = self._env.state_transition( + action, execute=True, robot_id=robot_id + ) print("robot state: %s" % str(self._env.state.object_states[robot_id])) print(" action: %s" % str(action.name)) print(" observation: %s" % str(z)) @@ -248,10 +262,18 @@ def on_render(self): fps_text = "FPS: {0:.2f}".format(self._clock.get_fps()) last_action = self._last_action.get(robot_id, None) last_action_str = "no_action" if last_action is None else str(last_action) - pygame.display.set_caption("%s | Robot%d(%.2f,%.2f,%.2f) | %s | %s" % - (last_action_str, robot_id, rx, ry, rth*180/math.pi, - str(self._env.state.object_states[robot_id]["objects_found"]), - fps_text)) + pygame.display.set_caption( + "%s | Robot%d(%.2f,%.2f,%.2f) | %s | %s" + % ( + last_action_str, + robot_id, + rx, + ry, + rth * 180 / math.pi, + str(self._env.state.object_states[robot_id]["objects_found"]), + fps_text, + ) + ) pygame.display.flip() def on_cleanup(self): @@ -261,7 +283,7 @@ def on_execute(self): if self.on_init() == False: self._running = False - while( self._running ): + while self._running: for event in pygame.event.get(): self.on_event(event) self.on_loop() @@ -278,17 +300,29 @@ def render_env(self, display_surf): last_viz_observation = self._last_viz_observation.get(robot_id, None) last_belief = self._last_belief.get(robot_id, None) if last_belief is not None: - MosViz.draw_belief(img, last_belief, r, r//3, self._target_colors) + MosViz.draw_belief(img, last_belief, r, r // 3, self._target_colors) if last_viz_observation is not None: - MosViz.draw_observation(img, last_viz_observation, - rx, ry, rth, r, r//4, color=(200, 200, 12)) + MosViz.draw_observation( + img, + last_viz_observation, + rx, + ry, + rth, + r, + r // 4, + color=(200, 200, 12), + ) if last_observation is not None: - MosViz.draw_observation(img, last_observation, - rx, ry, rth, r, r//8, color=(20, 20, 180)) + MosViz.draw_observation( + img, last_observation, rx, ry, rth, r, r // 8, color=(20, 20, 180) + ) - MosViz.draw_robot(img, rx*r, ry*r, rth, r, color=(12, 255*(0.8*(i+1)), 12)) + MosViz.draw_robot( + img, rx * r, ry * r, rth, r, color=(12, 255 * (0.8 * (i + 1)), 12) + ) pygame.surfarray.blit_array(display_surf, img) + def unittest(): # If you don't want occlusion, use this: laserstr = make_laser_sensor(90, (1, 8), 0.5, False) @@ -307,11 +341,10 @@ def unittest(): dim, robots, objects, obstacles, sensors = interpret(worldstr) init_state = MosOOState({**objects, **robots}) - env = MosEnvironment(dim, - init_state, sensors, - obstacles=obstacles) + env = MosEnvironment(dim, init_state, sensors, obstacles=obstacles) viz = MosViz(env, controllable=True) viz.on_execute() -if __name__ == '__main__': + +if __name__ == "__main__": unittest() diff --git a/pomdp_py/problems/multi_object_search/example_worlds.py b/pomdp_py/problems/multi_object_search/example_worlds.py index 6bb58a1b..7fd02bd4 100644 --- a/pomdp_py/problems/multi_object_search/example_worlds.py +++ b/pomdp_py/problems/multi_object_search/example_worlds.py @@ -1,4 +1,5 @@ """This file has some examples of world string.""" + import random ############# Example Worlds ########### @@ -6,14 +7,16 @@ # the format world0 = ( -""" + """ rx... .x.xT ..... -""", "r") +""", + "r", +) world1 = ( -""" + """ rx.T... .x..... ...xx.. @@ -21,11 +24,13 @@ .xxx.T. .xxx... ....... -""", "r") +""", + "r", +) # Used to test the shape of the sensor world2 = ( -""" + """ ................. ................. ..xxxxxxxxxxxxx.. @@ -47,11 +52,13 @@ ..xxxxxxxxxxxxx.. ................. ................. -""", "r") +""", + "r", +) # Used to test sensor occlusion world3 = ( -""" + """ ................. ................. ..xxxxxxxxxxxxx.. @@ -73,11 +80,13 @@ ..xxxxxxxxxxxxx.. ................. ................. -""", "r") +""", + "r", +) + -def random_world(width, length, num_obj, num_obstacles, - robot_char="r"): - worldstr = [[ "." for i in range(width)] for j in range(length)] +def random_world(width, length, num_obj, num_obstacles, robot_char="r"): + worldstr = [["." for i in range(width)] for j in range(length)] # First place obstacles num_obstacles_placed = 0 while num_obstacles_placed < num_obstacles: @@ -86,7 +95,7 @@ def random_world(width, length, num_obj, num_obstacles, if worldstr[y][x] == ".": worldstr[y][x] = "x" num_obstacles_placed += 1 - + num_obj_placed = 0 while num_obj_placed < num_obj: x = random.randrange(0, width) diff --git a/pomdp_py/problems/multi_object_search/models/components/grid_map.py b/pomdp_py/problems/multi_object_search/models/components/grid_map.py index 38093b48..847a2d15 100644 --- a/pomdp_py/problems/multi_object_search/models/components/grid_map.py +++ b/pomdp_py/problems/multi_object_search/models/components/grid_map.py @@ -1,14 +1,18 @@ """Optional grid map to assist collision avoidance during planning.""" -from pomdp_py.problems.multi_object_search.models.transition_model import RobotTransitionModel +from pomdp_py.problems.multi_object_search.models.transition_model import ( + RobotTransitionModel, +) from pomdp_py.problems.multi_object_search.domain.action import * from pomdp_py.problems.multi_object_search.domain.state import * + class GridMap: """This map assists the agent to avoid planning invalid actions that will run into obstacles. Used if we assume the agent has a map. This map does not contain information about the object locations.""" + def __init__(self, width, length, obstacles): """ Args: @@ -28,8 +32,7 @@ def __init__(self, width, length, obstacles): for objid in self._obstacles } # set of obstacle poses - self.obstacle_poses = set({self._obstacles[objid] - for objid in self._obstacles}) + self.obstacle_poses = set({self._obstacles[objid] for objid in self._obstacles}) def valid_motions(self, robot_id, robot_pose, all_motion_actions): """ @@ -39,16 +42,18 @@ def valid_motions(self, robot_id, robot_pose, all_motion_actions): the assumption that the robot dynamics is deterministic. """ state = MosOOState(self._obstacle_states) - state.set_object_state(robot_id, - RobotState(robot_id, robot_pose, None, None)) + state.set_object_state(robot_id, RobotState(robot_id, robot_pose, None, None)) valid = set({}) for motion_action in all_motion_actions: if not isinstance(motion_action, MotionAction): - raise ValueError("This (%s) is not a motion action" % str(motion_action)) + raise ValueError( + "This (%s) is not a motion action" % str(motion_action) + ) - next_pose = RobotTransitionModel.if_move_by(robot_id, state, - motion_action, (self.width, self.length)) + next_pose = RobotTransitionModel.if_move_by( + robot_id, state, motion_action, (self.width, self.length) + ) if next_pose != robot_pose: # robot moved --> valid motion valid.add(motion_action) diff --git a/pomdp_py/problems/multi_object_search/models/components/sensor.py b/pomdp_py/problems/multi_object_search/models/components/sensor.py index 033956f2..91241837 100644 --- a/pomdp_py/problems/multi_object_search/models/components/sensor.py +++ b/pomdp_py/problems/multi_object_search/models/components/sensor.py @@ -13,21 +13,26 @@ # not receive observation for some regions in the field of view # making it a more challenging situation to deal with. + # Utility functions def euclidean_dist(p1, p2): - return math.sqrt(sum([(a - b)** 2 for a, b in zip(p1, p2)])) + return math.sqrt(sum([(a - b) ** 2 for a, b in zip(p1, p2)])) + def to_rad(deg): return deg * math.pi / 180.0 + def in_range(val, rang): # Returns True if val is in range (a,b); Inclusive. return val >= rang[0] and val <= rang[1] + #### Sensors #### class Sensor: LASER = "laser" PROXIMITY = "proximity" + def observe(self, robot_pose, env_state): """ Returns an Observation with this sensor model. @@ -48,13 +53,19 @@ def robot_id(self): # id of the robot equipped with this sensor return self._robot_id + class Laser2DSensor: """Fan shaped 2D laser sensor""" - def __init__(self, robot_id, - fov=90, min_range=1, max_range=5, - angle_increment=5, - occlusion_enabled=False): + def __init__( + self, + robot_id, + fov=90, + min_range=1, + max_range=5, + angle_increment=5, + occlusion_enabled=False, + ): """ fov (float): angle between the start and end beams of one scan (degree). min_range (int or float) @@ -72,26 +83,44 @@ def __init__(self, robot_id, # For example, the fov=pi, means the range scanner scans 180 degrees # in front of the robot. By our angle convention, 180 degrees maps to [0,90] and [270, 360].""" self._fov_left = (0, self.fov / 2) - self._fov_right = (2*math.pi - self.fov/2, 2*math.pi) + self._fov_right = (2 * math.pi - self.fov / 2, 2 * math.pi) # beams that are actually within the fov (set of angles) - self._beams = {round(th, 2) - for th in np.linspace(self._fov_left[0], - self._fov_left[1], - int(round((self._fov_left[1] - self._fov_left[0]) / self.angle_increment)))}\ - | {round(th, 2) - for th in np.linspace(self._fov_right[0], - self._fov_right[1], - int(round((self._fov_right[1] - self._fov_right[0]) / self.angle_increment)))} + self._beams = { + round(th, 2) + for th in np.linspace( + self._fov_left[0], + self._fov_left[1], + int( + round( + (self._fov_left[1] - self._fov_left[0]) / self.angle_increment + ) + ), + ) + } | { + round(th, 2) + for th in np.linspace( + self._fov_right[0], + self._fov_right[1], + int( + round( + (self._fov_right[1] - self._fov_right[0]) / self.angle_increment + ) + ), + ) + } # The size of the sensing region here is the area covered by the fan - self._sensing_region_size = self.fov / (2*math.pi) * math.pi * (max_range - min_range)**2 + self._sensing_region_size = ( + self.fov / (2 * math.pi) * math.pi * (max_range - min_range) ** 2 + ) def in_field_of_view(th, view_angles): """Determines if the beame at angle `th` is in a field of view of size `view_angles`. For example, the view_angles=180, means the range scanner scans 180 degrees - in front of the robot. By our angle convention, 180 degrees maps to [0,90] and [270, 360].""" + in front of the robot. By our angle convention, 180 degrees maps to [0,90] and [270, 360]. + """ fov_right = (0, view_angles / 2) - fov_left = (2*math.pi - view_angles/2, 2*math.pi) + fov_left = (2 * math.pi - view_angles / 2, 2 * math.pi) def within_range(self, robot_pose, point): """Returns true if the point is within range of the sensor; but the point might not @@ -99,8 +128,9 @@ def within_range(self, robot_pose, point): dist, bearing = self.shoot_beam(robot_pose, point) if not in_range(dist, (self.min_range, self.max_range)): return False - if (not in_range(bearing, self._fov_left))\ - and (not in_range(bearing, self._fov_right)): + if (not in_range(bearing, self._fov_left)) and ( + not in_range(bearing, self._fov_right) + ): return False return True @@ -108,16 +138,21 @@ def shoot_beam(self, robot_pose, point): """Shoots a beam from robot_pose at point. Returns the distance and bearing of the beame (i.e. the length and orientation of the beame)""" rx, ry, rth = robot_pose - dist = euclidean_dist(point, (rx,ry)) - bearing = (math.atan2(point[1] - ry, point[0] - rx) - rth) % (2*math.pi) # bearing (i.e. orientation) + dist = euclidean_dist(point, (rx, ry)) + bearing = (math.atan2(point[1] - ry, point[0] - rx) - rth) % ( + 2 * math.pi + ) # bearing (i.e. orientation) return (dist, bearing) def valid_beam(self, dist, bearing): """Returns true beam length (i.e. `dist`) is within range and its angle `bearing` is valid, that is, it is within the fov range and in accordance with the angle increment.""" - return dist >= self.min_range and dist <= self.max_range\ + return ( + dist >= self.min_range + and dist <= self.max_range and round(bearing, 2) in self._beams + ) def _build_beam_map(self, beam, point, beam_map={}): """beam_map (dict): Maps from bearing to (dist, point)""" @@ -125,7 +160,7 @@ def _build_beam_map(self, beam, point, beam_map={}): valid = self.valid_beam(dist, bearing) if not valid: return - bearing_key = round(bearing,2) + bearing_key = round(bearing, 2) if bearing_key in beam_map: # There's an object covered by this beame already. # see if this beame is closer @@ -180,9 +215,8 @@ def sensing_region_size(self): class ProximitySensor(Laser2DSensor): """This is a simple sensor; Observes a region centered at the robot.""" - def __init__(self, robot_id, - radius=5, - occlusion_enabled=False): + + def __init__(self, robot_id, radius=5, occlusion_enabled=False): """ radius (int or float) radius of the sensing region. """ @@ -197,9 +231,11 @@ def __init__(self, robot_id, angle_increment = 5 else: angle_increment = 0.25 - super().__init__(robot_id, - fov=360, - min_range=0.1, - max_range=radius, - angle_increment=angle_increment, - occlusion_enabled=occlusion_enabled) + super().__init__( + robot_id, + fov=360, + min_range=0.1, + max_range=radius, + angle_increment=angle_increment, + occlusion_enabled=occlusion_enabled, + ) diff --git a/pomdp_py/problems/multi_object_search/models/observation_model.py b/pomdp_py/problems/multi_object_search/models/observation_model.py index 424118e9..0ab962d2 100644 --- a/pomdp_py/problems/multi_object_search/models/observation_model.py +++ b/pomdp_py/problems/multi_object_search/models/observation_model.py @@ -25,20 +25,20 @@ from pomdp_py.problems.multi_object_search.domain.action import * from pomdp_py.problems.multi_object_search.domain.observation import * + #### Observation Models #### class MosObservationModel(pomdp_py.OOObservationModel): """Object-oriented transition model""" - def __init__(self, - dim, - sensor, - object_ids, - sigma=0.01, - epsilon=1): + + def __init__(self, dim, sensor, object_ids, sigma=0.01, epsilon=1): self.sigma = sigma self.epsilon = epsilon - observation_models = {objid: ObjectObservationModel(objid, sensor, dim, - sigma=sigma, epsilon=epsilon) - for objid in object_ids} + observation_models = { + objid: ObjectObservationModel( + objid, sensor, dim, sigma=sigma, epsilon=epsilon + ) + for objid in object_ids + } pomdp_py.OOObservationModel.__init__(self, observation_models) def sample(self, next_state, action, argmax=False, **kwargs): @@ -51,6 +51,7 @@ def sample(self, next_state, action, argmax=False, **kwargs): factored_observations = super().sample(next_state, action, argmax=argmax) return MosOOObservation.merge(factored_observations, next_state) + class ObjectObservationModel(pomdp_py.ObservationModel): def __init__(self, objid, sensor, dim, sigma=0, epsilon=1): """ @@ -97,13 +98,15 @@ def probability(self, observation, next_state, action, **kwargs): # The (funny) business of allowing histogram belief update using O(oi|si',sr',a). next_robot_state = kwargs.get("next_robot_state", None) if next_robot_state is not None: - assert next_robot_state["id"] == self._sensor.robot_id,\ - "Robot id of observation model mismatch with given state" + assert ( + next_robot_state["id"] == self._sensor.robot_id + ), "Robot id of observation model mismatch with given state" robot_pose = next_robot_state.pose if isinstance(next_state, ObjectState): - assert next_state["id"] == self._objid,\ - "Object id of observation model mismatch with given state" + assert ( + next_state["id"] == self._objid + ), "Object id of observation model mismatch with given state" object_pose = next_state.pose else: object_pose = next_state.pose(self._objid) @@ -113,7 +116,9 @@ def probability(self, observation, next_state, action, **kwargs): # Compute the probability zi = observation.pose - alpha, beta, gamma = self._compute_params(self._sensor.within_range(robot_pose, object_pose)) + alpha, beta, gamma = self._compute_params( + self._sensor.within_range(robot_pose, object_pose) + ) # Requires Python >= 3.6 prob = 0.0 @@ -124,9 +129,9 @@ def probability(self, observation, next_state, action, **kwargs): # This has 0.0 probability. prob += 0.0 * alpha else: - gaussian = pomdp_py.Gaussian(list(object_pose), - [[self.sigma**2, 0], - [0, self.sigma**2]]) + gaussian = pomdp_py.Gaussian( + list(object_pose), [[self.sigma**2, 0], [0, self.sigma**2]] + ) prob += gaussian[zi] * alpha # Event B @@ -137,7 +142,6 @@ def probability(self, observation, next_state, action, **kwargs): prob += pr_c * gamma return prob - def sample(self, next_state, action, **kwargs): """Returns observation""" if not isinstance(action, LookAction): @@ -148,21 +152,25 @@ def sample(self, next_state, action, **kwargs): object_pose = next_state.pose(self._objid) # Obtain observation according to distribution. - alpha, beta, gamma = self._compute_params(self._sensor.within_range(robot_pose, object_pose)) + alpha, beta, gamma = self._compute_params( + self._sensor.within_range(robot_pose, object_pose) + ) # Requires Python >= 3.6 - event_occured = random.choices(["A", "B", "C"], weights=[alpha, beta, gamma], k=1)[0] + event_occured = random.choices( + ["A", "B", "C"], weights=[alpha, beta, gamma], k=1 + )[0] zi = self._sample_zi(event_occured, next_state) return ObjectObservation(self._objid, zi) def argmax(self, next_state, action, **kwargs): # Obtain observation according to distribution. - alpha, beta, gamma = self._compute_params(self._sensor.within_range(robot_pose, object_pose)) + alpha, beta, gamma = self._compute_params( + self._sensor.within_range(robot_pose, object_pose) + ) - event_probs = {"A": alpha, - "B": beta, - "C": gamma} + event_probs = {"A": alpha, "B": beta, "C": gamma} event_occured = max(event_probs, key=lambda e: event_probs[e]) zi = self._sample_zi(event_occured, next_state, argmax=True) return ObjectObservation(self._objid, zi) @@ -170,9 +178,9 @@ def argmax(self, next_state, action, **kwargs): def _sample_zi(self, event, next_state, argmax=False): if event == "A": object_true_pose = next_state.object_pose(self._objid) - gaussian = pomdp_py.Gaussian(list(object_true_pose), - [[self.sigma**2, 0], - [0, self.sigma**2]]) + gaussian = pomdp_py.Gaussian( + list(object_true_pose), [[self.sigma**2, 0], [0, self.sigma**2]] + ) if not argmax: zi = gaussian.random() else: @@ -183,23 +191,29 @@ def _sample_zi(self, event, next_state, argmax=False): # TODO: FIX. zi should ONLY come from the field of view. # There is currently no easy way to sample from the field of view. width, height = self._dim - zi = (random.randint(0, width), # x axis - random.randint(0, height)) # y axis - else: # event == C + zi = ( + random.randint(0, width), # x axis + random.randint(0, height), + ) # y axis + else: # event == C zi = ObjectObservation.NULL return zi ### Unit test ### def unittest(): - from ..env.env import make_laser_sensor,\ - make_proximity_sensor, equip_sensors,\ - interpret, interpret_robot_id + from ..env.env import ( + make_laser_sensor, + make_proximity_sensor, + equip_sensors, + interpret, + interpret_robot_id, + ) + # Test within search region check, # and the observation model probability and # sampling functions. - worldmap =\ - """ + worldmap = """ .......... ....T..... ......x... @@ -208,34 +222,41 @@ def unittest(): ....T..... .......... """ - #0123456789 - # 10 x 8 - worldstr = equip_sensors(worldmap, - {"r": make_laser_sensor(90, (1,5), 0.5, False)}) + # 0123456789 + # 10 x 8 + worldstr = equip_sensors(worldmap, {"r": make_laser_sensor(90, (1, 5), 0.5, False)}) env = interpret(worldstr) robot_id = interpret_robot_id("r") robot_pose = env.state.pose(robot_id) # within_range test sensor = env.sensors[robot_id] - assert sensor.within_range(robot_pose, (4,3)) == False - assert sensor.within_range(robot_pose, (5,3)) == True - assert sensor.within_range(robot_pose, (6,3)) == True - assert sensor.within_range(robot_pose, (7,2)) == True - assert sensor.within_range(robot_pose, (7,3)) == True - assert sensor.within_range(robot_pose, (4,3)) == False - assert sensor.within_range(robot_pose, (2,4)) == False - assert sensor.within_range(robot_pose, (4,1)) == False - assert sensor.within_range(robot_pose, (4,5)) == False - assert sensor.within_range(robot_pose, (0,0)) == False + assert sensor.within_range(robot_pose, (4, 3)) == False + assert sensor.within_range(robot_pose, (5, 3)) == True + assert sensor.within_range(robot_pose, (6, 3)) == True + assert sensor.within_range(robot_pose, (7, 2)) == True + assert sensor.within_range(robot_pose, (7, 3)) == True + assert sensor.within_range(robot_pose, (4, 3)) == False + assert sensor.within_range(robot_pose, (2, 4)) == False + assert sensor.within_range(robot_pose, (4, 1)) == False + assert sensor.within_range(robot_pose, (4, 5)) == False + assert sensor.within_range(robot_pose, (0, 0)) == False print(env.state) # observation model test - O0 = ObjectObservationModel(0, sensor, (env.width, env.length), sigma=0.01, epsilon=1) - O2 = ObjectObservationModel(2, sensor, (env.width, env.length), sigma=0.01, epsilon=1) - O3 = ObjectObservationModel(3, sensor, (env.width, env.length), sigma=0.01, epsilon=1) - O5 = ObjectObservationModel(5, sensor, (env.width, env.length), sigma=0.01, epsilon=1) + O0 = ObjectObservationModel( + 0, sensor, (env.width, env.length), sigma=0.01, epsilon=1 + ) + O2 = ObjectObservationModel( + 2, sensor, (env.width, env.length), sigma=0.01, epsilon=1 + ) + O3 = ObjectObservationModel( + 3, sensor, (env.width, env.length), sigma=0.01, epsilon=1 + ) + O5 = ObjectObservationModel( + 5, sensor, (env.width, env.length), sigma=0.01, epsilon=1 + ) z0 = O0.sample(env.state, Look) assert z0.pose == ObjectObservation.NULL @@ -249,9 +270,12 @@ def unittest(): assert O0.probability(z0, env.state, Look) == 1.0 assert O2.probability(z2, env.state, Look) == 1.0 assert O3.probability(z3, env.state, Look) >= 1.0 - assert O3.probability(ObjectObservation(3, ObjectObservation.NULL), - env.state, Look) == 0.0 + assert ( + O3.probability(ObjectObservation(3, ObjectObservation.NULL), env.state, Look) + == 0.0 + ) assert O5.probability(z5, env.state, Look) == 1.0 + if __name__ == "__main__": unittest() diff --git a/pomdp_py/problems/multi_object_search/models/policy_model.py b/pomdp_py/problems/multi_object_search/models/policy_model.py index d0011576..f59a0118 100644 --- a/pomdp_py/problems/multi_object_search/models/policy_model.py +++ b/pomdp_py/problems/multi_object_search/models/policy_model.py @@ -7,6 +7,7 @@ import random from pomdp_py.problems.multi_object_search.domain.action import * + class PolicyModel(pomdp_py.RolloutPolicy): """Simple policy model. All actions are possible at any state.""" @@ -38,10 +39,9 @@ def get_all_actions(self, state=None, history=None): return ALL_MOTION_ACTIONS | {Look} | find_action else: if self._grid_map is not None: - valid_motions =\ - self._grid_map.valid_motions(self.robot_id, - state.pose(self.robot_id), - ALL_MOTION_ACTIONS) + valid_motions = self._grid_map.valid_motions( + self.robot_id, state.pose(self.robot_id), ALL_MOTION_ACTIONS + ) return valid_motions | {Look} | find_action else: return ALL_MOTION_ACTIONS | {Look} | find_action diff --git a/pomdp_py/problems/multi_object_search/models/reward_model.py b/pomdp_py/problems/multi_object_search/models/reward_model.py index 4adc31bc..98ad7ddb 100644 --- a/pomdp_py/problems/multi_object_search/models/reward_model.py +++ b/pomdp_py/problems/multi_object_search/models/reward_model.py @@ -3,6 +3,7 @@ import pomdp_py from pomdp_py.problems.multi_object_search.domain.action import * + class MosRewardModel(pomdp_py.RewardModel): def __init__(self, target_objects, big=1000, small=1, robot_id=None): """ @@ -15,14 +16,15 @@ def __init__(self, target_objects, big=1000, small=1, robot_id=None): self.small = small self._target_objects = target_objects - def probability(self, reward, state, action, next_state, normalized=False, **kwargs): + def probability( + self, reward, state, action, next_state, normalized=False, **kwargs + ): if reward == self._reward_func(state, action): return 1.0 else: return 0.0 - def sample(self, state, action, next_state, - normalized=False, robot_id=None): + def sample(self, state, action, next_state, normalized=False, robot_id=None): # deterministic return self._reward_func(state, action, next_state, robot_id=robot_id) @@ -30,21 +32,25 @@ def argmax(self, state, action, next_state, normalized=False, robot_id=None): """Returns the most likely reward""" return self._reward_func(state, action, next_state, robot_id=robot_id) + class GoalRewardModel(MosRewardModel): """ This is a reward where the agent gets reward only for detect-related actions. """ + def _reward_func(self, state, action, next_state, robot_id=None): if robot_id is None: - assert self._robot_id is not None,\ - "Reward must be computed with respect to one robot." + assert ( + self._robot_id is not None + ), "Reward must be computed with respect to one robot." robot_id = self._robot_id reward = 0 # If the robot has detected all objects - if len(state.object_states[robot_id]['objects_found'])\ - == len(self._target_objects): + if len(state.object_states[robot_id]["objects_found"]) == len( + self._target_objects + ): return 0 # no reward or penalty; the task is finished. if isinstance(action, MotionAction): @@ -52,13 +58,15 @@ def _reward_func(self, state, action, next_state, robot_id=None): elif isinstance(action, LookAction): reward = reward - self.small elif isinstance(action, FindAction): - if state.object_states[robot_id]['camera_direction'] is None: + if state.object_states[robot_id]["camera_direction"] is None: # The robot didn't look before detect. So nothing is in the field of view. reward -= self.big else: # transition function should've taken care of the detection. - new_objects_count = len(set(next_state.object_states[robot_id].objects_found)\ - - set(state.object_states[robot_id].objects_found)) + new_objects_count = len( + set(next_state.object_states[robot_id].objects_found) + - set(state.object_states[robot_id].objects_found) + ) if new_objects_count == 0: # No new detection. "detect" is a bad action. reward -= self.big diff --git a/pomdp_py/problems/multi_object_search/models/transition_model.py b/pomdp_py/problems/multi_object_search/models/transition_model.py index 4b0d28de..92e97c8a 100644 --- a/pomdp_py/problems/multi_object_search/models/transition_model.py +++ b/pomdp_py/problems/multi_object_search/models/transition_model.py @@ -8,12 +8,14 @@ Transition: deterministic """ + import pomdp_py import copy from pomdp_py.problems.multi_object_search.domain.state import * from pomdp_py.problems.multi_object_search.domain.observation import * from pomdp_py.problems.multi_object_search.domain.action import * + ####### Transition Model ####### class MosTransitionModel(pomdp_py.OOTransitionModel): """Object-oriented transition model; The transition model supports the @@ -21,22 +23,23 @@ class MosTransitionModel(pomdp_py.OOTransitionModel): multi-robot transition model should be used by the Environment, but not necessarily by each robot for planning. """ - def __init__(self, - dim, sensors, object_ids, - epsilon=1e-9): + + def __init__(self, dim, sensors, object_ids, epsilon=1e-9): """ sensors (dict): robot_id -> Sensor for_env (bool): True if this is a robot transition model used by the Environment. see RobotTransitionModel for details. """ self._sensors = sensors - transition_models = {objid: StaticObjectTransitionModel(objid, epsilon=epsilon) - for objid in object_ids - if objid not in sensors} + transition_models = { + objid: StaticObjectTransitionModel(objid, epsilon=epsilon) + for objid in object_ids + if objid not in sensors + } for robot_id in sensors: - transition_models[robot_id] = RobotTransitionModel(sensors[robot_id], - dim, - epsilon=epsilon) + transition_models[robot_id] = RobotTransitionModel( + sensors[robot_id], dim, epsilon=epsilon + ) super().__init__(transition_models) def sample(self, state, action, **kwargs): @@ -47,14 +50,16 @@ def argmax(self, state, action, normalized=False, **kwargs): oostate = pomdp_py.OOTransitionModel.argmax(self, state, action, **kwargs) return MosOOState(oostate.object_states) + class StaticObjectTransitionModel(pomdp_py.TransitionModel): """This model assumes the object is static.""" + def __init__(self, objid, epsilon=1e-9): self._objid = objid self._epsilon = epsilon def probability(self, next_object_state, state, action): - if next_object_state != state.object_states[next_object_state['id']]: + if next_object_state != state.object_states[next_object_state["id"]]: return self._epsilon else: return 1.0 - self._epsilon @@ -70,6 +75,7 @@ def argmax(self, state, action): class RobotTransitionModel(pomdp_py.TransitionModel): """We assume that the robot control is perfect and transitions are deterministic.""" + def __init__(self, sensor, dim, epsilon=1e-9): """ dim (tuple): a tuple (width, length) for the dimension of the world @@ -81,8 +87,7 @@ def __init__(self, sensor, dim, epsilon=1e-9): self._epsilon = epsilon @classmethod - def if_move_by(cls, robot_id, state, action, dim, - check_collision=True): + def if_move_by(cls, robot_id, state, action, dim, check_collision=True): """Defines the dynamics of robot motion; dim (tuple): the width, length of the search world.""" if not isinstance(action, MotionAction): @@ -99,15 +104,18 @@ def if_move_by(cls, robot_id, state, action, dim, # odometry motion model forward, angle = action.motion rth += angle # angle (radian) - rx = int(round(rx + forward*math.cos(rth))) - ry = int(round(ry + forward*math.sin(rth))) - rth = rth % (2*math.pi) - - if valid_pose((rx, ry, rth), - dim[0], dim[1], - state=state, - check_collision=check_collision, - pose_objid=robot_id): + rx = int(round(rx + forward * math.cos(rth))) + ry = int(round(ry + forward * math.sin(rth))) + rth = rth % (2 * math.pi) + + if valid_pose( + (rx, ry, rth), + dim[0], + dim[1], + state=state, + check_collision=check_collision, + pose_objid=robot_id, + ): return (rx, ry, rth) else: return robot_pose # no change because change results in invalid pose @@ -127,29 +135,34 @@ def argmax(self, state, action): next_robot_state = copy.deepcopy(robot_state) # camera direction is only not None when looking - next_robot_state['camera_direction'] = None + next_robot_state["camera_direction"] = None if isinstance(action, MotionAction): # motion action - next_robot_state['pose'] = \ - RobotTransitionModel.if_move_by(self._robot_id, - state, action, self._dim) + next_robot_state["pose"] = RobotTransitionModel.if_move_by( + self._robot_id, state, action, self._dim + ) elif isinstance(action, LookAction): if hasattr(action, "motion") and action.motion is not None: # rotate the robot - next_robot_state['pose'] = \ - self._if_move_by(self._robot_id, - state, action, self._dim) - next_robot_state['camera_direction'] = action.name + next_robot_state["pose"] = self._if_move_by( + self._robot_id, state, action, self._dim + ) + next_robot_state["camera_direction"] = action.name elif isinstance(action, FindAction): robot_pose = state.pose(self._robot_id) z = self._sensor.observe(robot_pose, state) # Update "objects_found" set for target objects - observed_target_objects = {objid - for objid in z.objposes - if (state.object_states[objid].objclass == "target"\ - and z.objposes[objid] != ObjectObservation.NULL)} - next_robot_state["objects_found"] = tuple(set(next_robot_state['objects_found'])\ - | set(observed_target_objects)) + observed_target_objects = { + objid + for objid in z.objposes + if ( + state.object_states[objid].objclass == "target" + and z.objposes[objid] != ObjectObservation.NULL + ) + } + next_robot_state["objects_found"] = tuple( + set(next_robot_state["objects_found"]) | set(observed_target_objects) + ) return next_robot_state def sample(self, state, action): @@ -173,19 +186,19 @@ def valid_pose(pose, width, length, state=None, check_collision=True, pose_objid if state.object_states[objid].objclass.startswith("obstacle"): if objid == pose_objid: continue - if (x,y) == object_poses[objid]: + if (x, y) == object_poses[objid]: return False return in_boundary(pose, width, length) def in_boundary(pose, width, length): # Check if in boundary - x,y = pose[:2] + x, y = pose[:2] if x >= 0 and x < width: if y >= 0 and y < length: if len(pose) == 3: th = pose[2] # radian - if th < 0 or th > 2*math.pi: + if th < 0 or th > 2 * math.pi: return False return True return False diff --git a/pomdp_py/problems/multi_object_search/problem.py b/pomdp_py/problems/multi_object_search/problem.py index c62f6582..b3926b89 100644 --- a/pomdp_py/problems/multi_object_search/problem.py +++ b/pomdp_py/problems/multi_object_search/problem.py @@ -2,6 +2,7 @@ Uses the domain, models, and agent/environment to actually define the POMDP problem for multi-object search. Then, solve it using POUCT or POMCP.""" + import pomdp_py from pomdp_py.problems.multi_object_search.env.env import * from pomdp_py.problems.multi_object_search.env.visual import * @@ -13,6 +14,7 @@ import time import random + class MosOOPOMDP(pomdp_py.OOPOMDP): """ A MosOOPOMDP is instantiated given a string description @@ -27,10 +29,20 @@ class MosOOPOMDP(pomdp_py.OOPOMDP): could construct an Environment object and give None to the object poses. """ - def __init__(self, robot_id, env=None, grid_map=None, - sensors=None, sigma=0.01, epsilon=1, - belief_rep="histogram", prior={}, num_particles=100, - agent_has_map=False): + + def __init__( + self, + robot_id, + env=None, + grid_map=None, + sensors=None, + sigma=0.01, + epsilon=1, + belief_rep="histogram", + prior={}, + num_particles=100, + agent_has_map=False, + ): """ Args: robot_id (int or str): the id of the agent that will solve this MosOOPOMDP. @@ -59,15 +71,14 @@ def __init__(self, robot_id, env=None, grid_map=None, num_particles (int): setting for the particle belief representation """ if env is None: - assert grid_map is not None and sensors is not None,\ - "Since env is not provided, you must provide string descriptions"\ + assert grid_map is not None and sensors is not None, ( + "Since env is not provided, you must provide string descriptions" "of the world and sensors." + ) worldstr = equip_sensors(grid_map, sensors) dim, robots, objects, obstacles, sensors = interpret(worldstr) init_state = MosOOState({**objects, **robots}) - env = MosEnvironment(dim, - init_state, sensors, - obstacles=obstacles) + env = MosEnvironment(dim, init_state, sensors, obstacles=obstacles) # construct prior if type(prior) == str: @@ -84,22 +95,33 @@ def __init__(self, robot_id, env=None, grid_map=None, # only defined over a single agent. Perhaps, MultiAgent is just a kind # of Agent, which will make the implementation of multi-agent POMDP cleaner. robot_id = robot_id if type(robot_id) == int else interpret_robot_id(robot_id) - grid_map = GridMap(env.width, env.length, - {objid: env.state.pose(objid) - for objid in env.obstacles}) if agent_has_map else None - agent = MosAgent(robot_id, - env.state.object_states[robot_id], - env.target_objects, - (env.width, env.length), - env.sensors[robot_id], - sigma=sigma, - epsilon=epsilon, - belief_rep=belief_rep, - prior=prior, - num_particles=num_particles, - grid_map=grid_map) - super().__init__(agent, env, - name="MOS(%d,%d,%d)" % (env.width, env.length, len(env.target_objects))) + grid_map = ( + GridMap( + env.width, + env.length, + {objid: env.state.pose(objid) for objid in env.obstacles}, + ) + if agent_has_map + else None + ) + agent = MosAgent( + robot_id, + env.state.object_states[robot_id], + env.target_objects, + (env.width, env.length), + env.sensors[robot_id], + sigma=sigma, + epsilon=epsilon, + belief_rep=belief_rep, + prior=prior, + num_particles=num_particles, + grid_map=grid_map, + ) + super().__init__( + agent, + env, + name="MOS(%d,%d,%d)" % (env.width, env.length, len(env.target_objects)), + ) ### Belief Update ### @@ -146,31 +168,37 @@ def belief_update(agent, real_action, real_observation, next_robot_state, planne # observation model from O(oi|s',a) to O(label_i|s',a) and # it becomes easy to define O(label_i=i|s',a) and O(label_i=FREE|s',a). # These ideas are used in my recent 3D object search work. - new_belief = pomdp_py.update_histogram_belief(belief_obj, - real_action, - real_observation.for_obj(objid), - agent.observation_model[objid], - agent.transition_model[objid], - # The agent knows the objects are static. - static_transition=objid != agent.robot_id, - oargs={"next_robot_state": next_robot_state}) + new_belief = pomdp_py.update_histogram_belief( + belief_obj, + real_action, + real_observation.for_obj(objid), + agent.observation_model[objid], + agent.transition_model[objid], + # The agent knows the objects are static. + static_transition=objid != agent.robot_id, + oargs={"next_robot_state": next_robot_state}, + ) else: - raise ValueError("Unexpected program state."\ - "Are you using the appropriate belief representation?") + raise ValueError( + "Unexpected program state." + "Are you using the appropriate belief representation?" + ) agent.cur_belief.set_object_belief(objid, new_belief) ### Solve the problem with POUCT/POMCP planner ### ### This is the main online POMDP solver logic ### -def solve(problem, - max_depth=10, # planning horizon - discount_factor=0.99, - planning_time=1., # amount of time (s) to plan each step - exploration_const=1000, # exploration constant - visualize=True, - max_time=120, # maximum amount of time allowed to solve the problem - max_steps=500): # maximum number of planning steps the agent can take. +def solve( + problem, + max_depth=10, # planning horizon + discount_factor=0.99, + planning_time=1.0, # amount of time (s) to plan each step + exploration_const=1000, # exploration constant + visualize=True, + max_time=120, # maximum amount of time allowed to solve the problem + max_steps=500, +): # maximum number of planning steps the agent can take. """ This function terminates when: - maximum time (max_time) reached; This time includes planning and updates @@ -185,31 +213,35 @@ def solve(problem, random_object_belief = problem.agent.belief.object_beliefs[random_objid] if isinstance(random_object_belief, pomdp_py.Histogram): # Use POUCT - planner = pomdp_py.POUCT(max_depth=max_depth, - discount_factor=discount_factor, - planning_time=planning_time, - exploration_const=exploration_const, - rollout_policy=problem.agent.policy_model) # Random by default + planner = pomdp_py.POUCT( + max_depth=max_depth, + discount_factor=discount_factor, + planning_time=planning_time, + exploration_const=exploration_const, + rollout_policy=problem.agent.policy_model, + ) # Random by default elif isinstance(random_object_belief, pomdp_py.Particles): # Use POMCP - planner = pomdp_py.POMCP(max_depth=max_depth, - discount_factor=discount_factor, - planning_time=planning_time, - exploration_const=exploration_const, - rollout_policy=problem.agent.policy_model) # Random by default + planner = pomdp_py.POMCP( + max_depth=max_depth, + discount_factor=discount_factor, + planning_time=planning_time, + exploration_const=exploration_const, + rollout_policy=problem.agent.policy_model, + ) # Random by default else: - raise ValueError("Unsupported object belief type %s" % str(type(random_object_belief))) + raise ValueError( + "Unsupported object belief type %s" % str(type(random_object_belief)) + ) robot_id = problem.agent.robot_id if visualize: - viz = MosViz(problem.env, controllable=False) # controllable=False means no keyboard control. + viz = MosViz( + problem.env, controllable=False + ) # controllable=False means no keyboard control. if viz.on_init() == False: raise Exception("Environment failed to initialize") - viz.update(robot_id, - None, - None, - None, - problem.agent.cur_belief) + viz.update(robot_id, None, None, None, problem.agent.cur_belief) viz.on_render() _time_used = 0 @@ -224,32 +256,38 @@ def solve(problem, break # no more time to update. # Execute action - reward = problem.env.state_transition(real_action, execute=True, - robot_id=robot_id) + reward = problem.env.state_transition( + real_action, execute=True, robot_id=robot_id + ) # Receive observation _start = time.time() - real_observation = \ - problem.env.provide_observation(problem.agent.observation_model, real_action) + real_observation = problem.env.provide_observation( + problem.agent.observation_model, real_action + ) # Updates problem.agent.clear_history() # truncate history problem.agent.update_history(real_action, real_observation) - belief_update(problem.agent, real_action, real_observation, - problem.env.state.object_states[robot_id], - planner) + belief_update( + problem.agent, + real_action, + real_observation, + problem.env.state.object_states[robot_id], + planner, + ) _time_used += time.time() - _start # Info and render _total_reward += reward if isinstance(real_action, FindAction): _find_actions_count += 1 - print("==== Step %d ====" % (i+1)) + print("==== Step %d ====" % (i + 1)) print("Action: %s" % str(real_action)) print("Observation: %s" % str(real_observation)) print("Reward: %s" % str(reward)) print("Reward (Cumulative): %s" % str(_total_reward)) - print("Find Actions Count: %d" % _find_actions_count) + print("Find Actions Count: %d" % _find_actions_count) if isinstance(planner, pomdp_py.POUCT): print("__num_sims__: %d" % planner.last_num_sims) @@ -258,21 +296,27 @@ def solve(problem, # according to observation model. robot_pose = problem.env.state.object_states[robot_id].pose viz_observation = MosOOObservation({}) - if isinstance(real_action, LookAction) or isinstance(real_action, FindAction): - viz_observation = \ - problem.env.sensors[robot_id].observe(robot_pose, - problem.env.state) - viz.update(robot_id, - real_action, - real_observation, - viz_observation, - problem.agent.cur_belief) + if isinstance(real_action, LookAction) or isinstance( + real_action, FindAction + ): + viz_observation = problem.env.sensors[robot_id].observe( + robot_pose, problem.env.state + ) + viz.update( + robot_id, + real_action, + real_observation, + viz_observation, + problem.agent.cur_belief, + ) viz.on_loop() viz.on_render() # Termination check - if set(problem.env.state.object_states[robot_id].objects_found)\ - == problem.env.target_objects: + if ( + set(problem.env.state.object_states[robot_id].objects_found) + == problem.env.target_objects + ): print("Done!") break if _find_actions_count >= len(problem.env.target_objects): @@ -282,27 +326,33 @@ def solve(problem, print("Maximum time reached.") break + # Test def unittest(): # random world grid_map, robot_char = random_world(10, 10, 5, 10) laserstr = make_laser_sensor(90, (1, 4), 0.5, False) proxstr = make_proximity_sensor(4, False) - problem = MosOOPOMDP(robot_char, # r is the robot character - sigma=0.05, # observation model parameter - epsilon=0.95, # observation model parameter - grid_map=grid_map, - sensors={robot_char: proxstr}, - prior="uniform", - agent_has_map=True) - solve(problem, - max_depth=10, - discount_factor=0.99, - planning_time=1., - exploration_const=1000, - visualize=True, - max_time=120, - max_steps=500) + problem = MosOOPOMDP( + robot_char, # r is the robot character + sigma=0.05, # observation model parameter + epsilon=0.95, # observation model parameter + grid_map=grid_map, + sensors={robot_char: proxstr}, + prior="uniform", + agent_has_map=True, + ) + solve( + problem, + max_depth=10, + discount_factor=0.99, + planning_time=1.0, + exploration_const=1000, + visualize=True, + max_time=120, + max_steps=500, + ) + if __name__ == "__main__": unittest() diff --git a/pomdp_py/problems/rocksample/rocksample_problem.py b/pomdp_py/problems/rocksample/rocksample_problem.py index 8e8036ef..c02d5efe 100644 --- a/pomdp_py/problems/rocksample/rocksample_problem.py +++ b/pomdp_py/problems/rocksample/rocksample_problem.py @@ -31,6 +31,7 @@ Initial belief: every rock has equal probability of being Good or Bad. """ + import pomdp_py import random import math @@ -40,26 +41,31 @@ EPSILON = 1e-9 + def euclidean_dist(p1, p2): - return math.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2) + return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) + class RockType: - GOOD = 'good' - BAD = 'bad' + GOOD = "good" + BAD = "bad" + @staticmethod def invert(rocktype): - if rocktype == 'good': - return 'bad' + if rocktype == "good": + return "bad" else: - return 'good' + return "good" # return 1 - rocktype + @staticmethod def random(p=0.5): - if random.uniform(0,1) >= p: + if random.uniform(0, 1) >= p: return RockType.GOOD else: return RockType.BAD + class State(pomdp_py.State): def __init__(self, position, rocktypes, terminal=False): """ @@ -79,11 +85,14 @@ def __init__(self, position, rocktypes, terminal=False): def __hash__(self): return hash((self.position, self.rocktypes, self.terminal)) + def __eq__(self, other): if isinstance(other, State): - return self.position == other.position\ - and self.rocktypes == other.rocktypes\ + return ( + self.position == other.position + and self.rocktypes == other.rocktypes and self.terminal == other.terminal + ) else: return False @@ -91,20 +100,29 @@ def __str__(self): return self.__repr__() def __repr__(self): - return "State(%s | %s | %s)" % (str(self.position), str(self.rocktypes), str(self.terminal)) + return "State(%s | %s | %s)" % ( + str(self.position), + str(self.rocktypes), + str(self.terminal), + ) + class Action(pomdp_py.Action): def __init__(self, name): self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, Action): return self.name == other.name elif type(other) == str: return self.name == other + def __str__(self): return self.name + def __repr__(self): return "Action(%s)" % self.name @@ -114,62 +132,77 @@ class MoveAction(Action): WEST = (-1, 0) NORTH = (0, -1) SOUTH = (0, 1) + def __init__(self, motion, name): - if motion not in {MoveAction.EAST, MoveAction.WEST, - MoveAction.NORTH, MoveAction.SOUTH}: + if motion not in { + MoveAction.EAST, + MoveAction.WEST, + MoveAction.NORTH, + MoveAction.SOUTH, + }: raise ValueError("Invalid move motion %s" % motion) self.motion = motion super().__init__("move-%s" % str(name)) + MoveEast = MoveAction(MoveAction.EAST, "EAST") MoveWest = MoveAction(MoveAction.WEST, "WEST") MoveNorth = MoveAction(MoveAction.NORTH, "NORTH") MoveSouth = MoveAction(MoveAction.SOUTH, "SOUTH") + class SampleAction(Action): def __init__(self): super().__init__("sample") + class CheckAction(Action): def __init__(self, rock_id): self.rock_id = rock_id super().__init__("check-%d" % self.rock_id) + class Observation(pomdp_py.Observation): def __init__(self, quality): self.quality = quality + def __hash__(self): return hash(self.quality) + def __eq__(self, other): if isinstance(other, Observation): return self.quality == other.quality elif type(other) == str: return self.quality == other + def __str__(self): return str(self.quality) + def __repr__(self): return "Observation(%s)" % str(self.quality) -class RSTransitionModel(pomdp_py.TransitionModel): - """ The model is deterministic """ +class RSTransitionModel(pomdp_py.TransitionModel): + """The model is deterministic""" def __init__(self, n, rock_locs, in_exit_area): """ rock_locs: a map from (x,y) location to rock_id - in_exit_area: a function (x,y) -> Bool that returns True if (x,y) is in exit area""" + in_exit_area: a function (x,y) -> Bool that returns True if (x,y) is in exit area + """ self._n = n self._rock_locs = rock_locs self._in_exit_area = in_exit_area def _move_or_exit(self, position, action): - expected = (position[0] + action.motion[0], - position[1] + action.motion[1]) + expected = (position[0] + action.motion[0], position[1] + action.motion[1]) if self._in_exit_area(expected): return expected, True else: - return (max(0, min(position[0] + action.motion[0], self._n-1)), - max(0, min(position[1] + action.motion[1], self._n-1))), False + return ( + max(0, min(position[0] + action.motion[0], self._n - 1)), + max(0, min(position[1] + action.motion[1], self._n - 1)), + ), False def probability(self, next_state, state, action, normalized=False, **kwargs): if next_state != self.sample(state, action): @@ -205,7 +238,7 @@ def argmax(self, state, action): class RSObservationModel(pomdp_py.ObservationModel): def __init__(self, rock_locs, half_efficiency_dist=20): self._half_efficiency_dist = half_efficiency_dist - self._rocks = {rock_locs[pos]:pos for pos in rock_locs} + self._rocks = {rock_locs[pos]: pos for pos in rock_locs} def probability(self, observation, next_state, action): if isinstance(action, CheckAction): @@ -283,12 +316,15 @@ def sample(self, state, action, next_state, normalized=False, **kwargs): def argmax(self, state, action, next_state, normalized=False, **kwargs): raise NotImplementedError - def probability(self, reward, state, action, next_state, normalized=False, **kwargs): + def probability( + self, reward, state, action, next_state, normalized=False, **kwargs + ): raise NotImplementedError class RSPolicyModel(pomdp_py.RolloutPolicy): """Simple policy model according to problem description.""" + def __init__(self, n, k): check_actions = set({CheckAction(rock_id) for rock_id in range(k)}) self._move_actions = {MoveEast, MoveWest, MoveNorth, MoveSouth} @@ -326,13 +362,11 @@ def rollout(self, state, history=None): class RockSampleProblem(pomdp_py.POMDP): - @staticmethod def random_free_location(n, not_free_locs): """returns a random (x,y) location in nxn grid that is free.""" while True: - loc = (random.randint(0, n-1), - random.randint(0, n-1)) + loc = (random.randint(0, n - 1), random.randint(0, n - 1)) if loc not in not_free_locs: return loc @@ -343,10 +377,12 @@ def in_exit_area(self, pos): def generate_instance(n, k): """Returns init_state and rock locations for an instance of RockSample(n,k)""" - rover_position = (0, random.randint(0, n-1)) + rover_position = (0, random.randint(0, n - 1)) rock_locs = {} # map from rock location to rock id for i in range(k): - loc = RockSampleProblem.random_free_location(n, set(rock_locs.keys()) | set({rover_position})) + loc = RockSampleProblem.random_free_location( + n, set(rock_locs.keys()) | set({rover_position}) + ) rock_locs[loc] = i # random rocktypes @@ -363,29 +399,29 @@ def print_state(self): rocktypes = self.env.state.rocktypes # Rock id map for y in range(self._n): - for x in range(self._n+1): + for x in range(self._n + 1): char = "." if x == self._n: char = ">" - if (x,y) in self._rock_locs: - char = str(self._rock_locs[(x,y)]) - if (x,y) == rover_position: + if (x, y) in self._rock_locs: + char = str(self._rock_locs[(x, y)]) + if (x, y) == rover_position: char = "R" string += char string += "\n" string += "_____G/B_____\n" # Good/bad map for y in range(self._n): - for x in range(self._n+1): + for x in range(self._n + 1): char = "." if x == self._n: char = ">" - if (x,y) in self._rock_locs: - if rocktypes[self._rock_locs[(x,y)]] == RockType.GOOD: + if (x, y) in self._rock_locs: + if rocktypes[self._rock_locs[(x, y)]] == RockType.GOOD: char = "$" else: char = "x" - if (x,y) == rover_position: + if (x, y) == rover_position: char = "R" string += char string += "\n" @@ -393,14 +429,18 @@ def print_state(self): def __init__(self, n, k, init_state, rock_locs, init_belief): self._n, self._k = n, k - agent = pomdp_py.Agent(init_belief, - RSPolicyModel(n,k), - RSTransitionModel(n, rock_locs, self.in_exit_area), - RSObservationModel(rock_locs), - RSRewardModel(rock_locs, self.in_exit_area)) - env = pomdp_py.Environment(init_state, - RSTransitionModel(n, rock_locs, self.in_exit_area), - RSRewardModel(rock_locs, self.in_exit_area)) + agent = pomdp_py.Agent( + init_belief, + RSPolicyModel(n, k), + RSTransitionModel(n, rock_locs, self.in_exit_area), + RSObservationModel(rock_locs), + RSRewardModel(rock_locs, self.in_exit_area), + ) + env = pomdp_py.Environment( + init_state, + RSTransitionModel(n, rock_locs, self.in_exit_area), + RSRewardModel(rock_locs, self.in_exit_area), + ) self._rock_locs = rock_locs super().__init__(agent, env, name="RockSampleProblem") @@ -410,7 +450,7 @@ def test_planner(rocksample, planner, nsteps=3, discount=0.95): total_reward = 0 total_discounted_reward = 0 for i in range(nsteps): - print("==== Step %d ====" % (i+1)) + print("==== Step %d ====" % (i + 1)) action = planner.plan(rocksample.agent) # pomdp_py.visual.visualize_pouct_search_tree(rocksample.agent.tree, # max_depth=5, anonymize=False) @@ -419,8 +459,9 @@ def test_planner(rocksample, planner, nsteps=3, discount=0.95): env_reward = rocksample.env.state_transition(action, execute=True) true_next_state = copy.deepcopy(rocksample.env.state) - real_observation = rocksample.env.provide_observation(rocksample.agent.observation_model, - action) + real_observation = rocksample.env.provide_observation( + rocksample.agent.observation_model, action + ) rocksample.agent.update_history(action, real_observation) planner.update(rocksample.agent, action, real_observation) total_reward += env_reward @@ -460,6 +501,7 @@ def init_particles_belief(k, num_particles, init_state, belief="uniform"): init_belief = pomdp_py.Particles(particles) return init_belief + def main(): n, k = 5, 5 init_state, rock_locs = RockSampleProblem.generate_instance(n, k) @@ -486,10 +528,14 @@ def main(): rocksample.print_state() print("*** Testing POMCP ***") - pomcp = pomdp_py.POMCP(max_depth=20, discount_factor=0.95, - num_sims=10000, exploration_const=20, - rollout_policy=rocksample.agent.policy_model, - num_visits_init=1) + pomcp = pomdp_py.POMCP( + max_depth=20, + discount_factor=0.95, + num_sims=10000, + exploration_const=20, + rollout_policy=rocksample.agent.policy_model, + num_visits_init=1, + ) tt, ttd = test_planner(rocksample, pomcp, nsteps=100, discount=0.95) rocksample.env.state.position = init_state_copy.position @@ -499,5 +545,5 @@ def main(): rocksample.agent.set_belief(init_belief) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/pomdp_py/problems/tag/__init__.py b/pomdp_py/problems/tag/__init__.py index 8b137891..e69de29b 100644 --- a/pomdp_py/problems/tag/__init__.py +++ b/pomdp_py/problems/tag/__init__.py @@ -1 +0,0 @@ - diff --git a/pomdp_py/problems/tag/agent/agent.py b/pomdp_py/problems/tag/agent/agent.py index dd84a67b..1a166d9b 100644 --- a/pomdp_py/problems/tag/agent/agent.py +++ b/pomdp_py/problems/tag/agent/agent.py @@ -11,6 +11,7 @@ from pomdp_py.problems.tag.models.components.motion_policy import * from pomdp_py.problems.tag.models.components.grid_map import * + ## initialize belief def initialize_belief(grid_map, init_robot_position, prior={}): """Initialize belief. @@ -28,12 +29,12 @@ def initialize_belief(grid_map, init_robot_position, prior={}): for x in range(grid_map.width): for y in range(grid_map.length): - if (x,y) in grid_map.obstacle_poses: + if (x, y) in grid_map.obstacle_poses: # Skip obstacles continue - state = TagState(init_robot_position, (x,y), False) + state = TagState(init_robot_position, (x, y), False) if len(prior) > 0: - if (x,y) not in prior: + if (x, y) not in prior: hist[state] = 1e-9 else: hist[state] = 1.0 @@ -45,7 +46,10 @@ def initialize_belief(grid_map, init_robot_position, prior={}): hist_belief = pomdp_py.Histogram(hist) return hist_belief -def initialize_particles_belief(grid_map, init_robot_position, num_particles=100, prior={}): + +def initialize_particles_belief( + grid_map, init_robot_position, num_particles=100, prior={} +): """Initialize belief. Args: @@ -63,8 +67,10 @@ def initialize_particles_belief(grid_map, init_robot_position, num_particles=100 particles.append(state) else: while len(particles) < num_particles: - target_position = (random.randint(0, grid_map.width-1), - random.randint(0, grid_map.length-1)) + target_position = ( + random.randint(0, grid_map.width - 1), + random.randint(0, grid_map.length - 1), + ) if target_position in grid_map.obstacle_poses: # Skip obstacles continue @@ -77,7 +83,9 @@ def initialize_particles_belief(grid_map, init_robot_position, num_particles=100 def belief_update(agent, real_action, real_observation): # Update agent belief current_mpe_state = agent.cur_belief.mpe() - next_robot_position = agent.transition_model.sample(current_mpe_state, real_action).robot_position + next_robot_position = agent.transition_model.sample( + current_mpe_state, real_action + ).robot_position next_state_space = set({}) for state in agent.cur_belief: @@ -86,33 +94,32 @@ def belief_update(agent, real_action, real_observation): next_state_space.add(next_state) new_belief = pomdp_py.update_histogram_belief( - agent.cur_belief, real_action, real_observation, - agent.observation_model, agent.transition_model, - next_state_space=next_state_space) + agent.cur_belief, + real_action, + real_observation, + agent.observation_model, + agent.transition_model, + next_state_space=next_state_space, + ) agent.set_belief(new_belief) -class TagAgent(pomdp_py.Agent): - def __init__(self, - init_belief, - grid_map, - pr_stay=0.2, - small=1, - big=10): +class TagAgent(pomdp_py.Agent): + def __init__(self, init_belief, grid_map, pr_stay=0.2, small=1, big=10): self._grid_map = grid_map - target_motion_policy = TagTargetMotionPolicy(grid_map, - pr_stay) - transition_model = TagTransitionModel(grid_map, - target_motion_policy) + target_motion_policy = TagTargetMotionPolicy(grid_map, pr_stay) + transition_model = TagTransitionModel(grid_map, target_motion_policy) reward_model = TagRewardModel(small=small, big=big) observation_model = TagObservationModel() policy_model = TagPolicyModel(grid_map=grid_map) - super().__init__(init_belief, - policy_model, - transition_model=transition_model, - observation_model=observation_model, - reward_model=reward_model) + super().__init__( + init_belief, + policy_model, + transition_model=transition_model, + observation_model=observation_model, + reward_model=reward_model, + ) def clear_history(self): """Custum function; clear history""" diff --git a/pomdp_py/problems/tag/domain/action.py b/pomdp_py/problems/tag/domain/action.py index 04694473..5b73eafd 100644 --- a/pomdp_py/problems/tag/domain/action.py +++ b/pomdp_py/problems/tag/domain/action.py @@ -6,11 +6,18 @@ # Reuses the actions in the multi object search domain import pomdp_py -from pomdp_py.problems.multi_object_search.domain.action\ - import Action, MotionAction, MoveEast2D, MoveWest2D, MoveSouth2D, MoveNorth2D +from pomdp_py.problems.multi_object_search.domain.action import ( + Action, + MotionAction, + MoveEast2D, + MoveWest2D, + MoveSouth2D, + MoveNorth2D, +) MOTION_ACTIONS = {MoveEast2D, MoveWest2D, MoveSouth2D, MoveNorth2D} + class TagAction(Action): def __init__(self): super().__init__("tag") diff --git a/pomdp_py/problems/tag/domain/observation.py b/pomdp_py/problems/tag/domain/observation.py index 7bfe77b9..ae341f3b 100644 --- a/pomdp_py/problems/tag/domain/observation.py +++ b/pomdp_py/problems/tag/domain/observation.py @@ -4,8 +4,10 @@ Observation space: the agent observes the target's location when the agent and the target are in the same cell. """ + import pomdp_py + class TagObservation(pomdp_py.Observation): def __init__(self, target_position): self.target_position = target_position @@ -20,5 +22,4 @@ def __eq__(self, other): return self.target_position == other.target_position def __str__(self): - return 'Observation(%s)' % (str(self.target_position)) - + return "Observation(%s)" % (str(self.target_position)) diff --git a/pomdp_py/problems/tag/domain/state.py b/pomdp_py/problems/tag/domain/state.py index d5297a6f..91e7d1f1 100644 --- a/pomdp_py/problems/tag/domain/state.py +++ b/pomdp_py/problems/tag/domain/state.py @@ -4,10 +4,11 @@ State space: state of the robot (x,y), state of the person (x,y), person found. """ + import pomdp_py + class TagState(pomdp_py.State): - def __init__(self, robot_position, target_position, target_found): """ robot_position (tuple): x,y location of the robot. @@ -25,14 +26,18 @@ def __eq__(self, other): if not isinstance(other, TagState): return False else: - return self.robot_position == other.robot_position\ - and self.target_position == other.target_position\ + return ( + self.robot_position == other.robot_position + and self.target_position == other.target_position and self.target_found == other.target_found + ) def __str__(self): - return 'State(%s, %s | %s)' % (str(self.robot_position), - str(self.target_position), - str(self.target_found)) + return "State(%s, %s | %s)" % ( + str(self.robot_position), + str(self.target_position), + str(self.target_found), + ) def __repr__(self): return str(self) diff --git a/pomdp_py/problems/tag/env/env.py b/pomdp_py/problems/tag/env/env.py index a66ac829..47211aff 100644 --- a/pomdp_py/problems/tag/env/env.py +++ b/pomdp_py/problems/tag/env/env.py @@ -7,22 +7,14 @@ from pomdp_py.problems.multi_object_search.env.env import interpret from pomdp_py.problems.multi_object_search.env.visual import MosViz -class TagEnvironment(pomdp_py.Environment): - def __init__(self, - init_state, - grid_map, - pr_stay=0.2, - small=1, - big=10): +class TagEnvironment(pomdp_py.Environment): + def __init__(self, init_state, grid_map, pr_stay=0.2, small=1, big=10): self._grid_map = grid_map - target_motion_policy = TagTargetMotionPolicy(grid_map, - pr_stay) + target_motion_policy = TagTargetMotionPolicy(grid_map, pr_stay) transition_model = TagTransitionModel(grid_map, target_motion_policy) reward_model = TagRewardModel(small=small, big=big) - super().__init__(init_state, - transition_model, - reward_model) + super().__init__(init_state, transition_model, reward_model) @property def width(self): diff --git a/pomdp_py/problems/tag/env/visual.py b/pomdp_py/problems/tag/env/visual.py index b8ee6d05..c27c638c 100644 --- a/pomdp_py/problems/tag/env/visual.py +++ b/pomdp_py/problems/tag/env/visual.py @@ -1,4 +1,5 @@ """Largely based on MosViz, except this is not an OO-POMDP""" + import pygame import cv2 import math @@ -12,9 +13,9 @@ from pomdp_py.problems.tag.example_worlds import * from pomdp_py.problems.tag.models.observation_model import * + #### Visualization through pygame #### class TagViz: - def __init__(self, env, res=30, fps=30, controllable=False, observation_model=None): self._env = env @@ -35,22 +36,24 @@ def __init__(self, env, res=30, fps=30, controllable=False, observation_model=No def _make_gridworld_image(self, r): # Preparing 2d array w, l = self._env.width, self._env.length - arr2d = np.full((self._env.width, - self._env.length), 0) # free grids + arr2d = np.full((self._env.width, self._env.length), 0) # free grids # Creating image - img = np.full((w*r,l*r,3), 255, dtype=np.int32) + img = np.full((w * r, l * r, 3), 255, dtype=np.int32) for x in range(w): for y in range(l): - if (x,y) not in self._env.grid_map.obstacle_poses: - arr2d[x,y] == 0 # free - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (255, 255, 255), -1) + if (x, y) not in self._env.grid_map.obstacle_poses: + arr2d[x, y] == 0 # free + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (255, 255, 255), -1 + ) else: - arr2d[x,y] == 1 # obstacle - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (40, 31, 3), -1) - cv2.rectangle(img, (y*r, x*r), (y*r+r, x*r+r), - (0, 0, 0), 1, 8) + arr2d[x, y] == 1 # obstacle + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (40, 31, 3), -1 + ) + cv2.rectangle( + img, (y * r, x * r), (y * r + r, x * r + r), (0, 0, 0), 1, 8 + ) return img @property @@ -75,22 +78,22 @@ def update(self, action, observation, belief): self._last_belief = belief @staticmethod - def draw_robot(img, x, y, th, size, color=(255,12,12)): + def draw_robot(img, x, y, th, size, color=(255, 12, 12)): radius = int(round(size / 2)) - cv2.circle(img, (y+radius, x+radius), radius, color, thickness=6) + cv2.circle(img, (y + radius, x + radius), radius, color, thickness=6) # endpoint = (y+radius + int(round(radius*math.sin(th))), # x+radius + int(round(radius*math.cos(th)))) # cv2.line(img, (y+radius,x+radius), endpoint, color, 2) @staticmethod - def draw_observation(img, z, rx, ry, rth, r, size, color=(12,12,255)): + def draw_observation(img, z, rx, ry, rth, r, size, color=(12, 12, 255)): assert type(z) == TagObservation, "%s != TagObservation" % (str(type(z))) radius = int(round(r / 2)) if z.target_position is not None: lx, ly = z.target_position - cv2.circle(img, (ly*r+radius, - lx*r+radius), size, color, thickness=-1) - + cv2.circle( + img, (ly * r + radius, lx * r + radius), size, color, thickness=-1 + ) # TODO! Deprecated. @staticmethod @@ -107,18 +110,23 @@ def draw_belief(img, belief, r, size, target_color): count = 0 for state in reversed(sorted(hist, key=hist.get)): if last_val != -1: - color = util.lighter(color, 1-hist[state]/last_val) + color = util.lighter(color, 1 - hist[state] / last_val) if np.mean(np.array(color) / np.array([255, 255, 255])) < 0.999: tx, ty = state.target_position - if (tx,ty) not in circle_drawn: - circle_drawn[(tx,ty)] = 0 - circle_drawn[(tx,ty)] += 1 - - cv2.circle(img, (ty*r+radius, - tx*r+radius), size//circle_drawn[(tx,ty)], color, thickness=-1) + if (tx, ty) not in circle_drawn: + circle_drawn[(tx, ty)] = 0 + circle_drawn[(tx, ty)] += 1 + + cv2.circle( + img, + (ty * r + radius, tx * r + radius), + size // circle_drawn[(tx, ty)], + color, + thickness=-1, + ) last_val = hist[state] - count +=1 + count += 1 if last_val <= 0: break @@ -127,14 +135,14 @@ def on_init(self): """pygame init""" pygame.init() # calls pygame.font.init() # init main screen and background - self._display_surf = pygame.display.set_mode((self.img_width, - self.img_height), - pygame.HWSURFACE) + self._display_surf = pygame.display.set_mode( + (self.img_width, self.img_height), pygame.HWSURFACE + ) self._background = pygame.Surface(self._display_surf.get_size()).convert() self._clock = pygame.time.Clock() # Font - self._myfont = pygame.font.SysFont('Comic Sans MS', 30) + self._myfont = pygame.font.SysFont("Comic Sans MS", 30) self._running = True def on_event(self, event): @@ -170,7 +178,12 @@ def on_event(self, event): print(" action: %s" % str(action.name)) print(" observation: %s" % str(z)) print(" reward: %s" % str(reward)) - print(" valid motions: %s" % str(self._env.grid_map.valid_motions(self._env.state.robot_position))) + print( + " valid motions: %s" + % str( + self._env.grid_map.valid_motions(self._env.state.robot_position) + ) + ) print("------------") if self._env.state.target_found: self._running = False @@ -186,10 +199,10 @@ def on_render(self): fps_text = "FPS: {0:.2f}".format(self._clock.get_fps()) last_action = self._last_action last_action_str = "no_action" if last_action is None else str(last_action) - pygame.display.set_caption("%s | Robot(%.2f,%.2f,%.2f) | %s | %s" % - (last_action_str, rx, ry, 0, - str(self._env.state.target_found), - fps_text)) + pygame.display.set_caption( + "%s | Robot(%.2f,%.2f,%.2f) | %s | %s" + % (last_action_str, rx, ry, 0, str(self._env.state.target_found), fps_text) + ) pygame.display.flip() def on_cleanup(self): @@ -199,7 +212,7 @@ def on_execute(self): if self.on_init() == False: self._running = False - while( self._running ): + while self._running: for event in pygame.event.get(): self.on_event(event) self.on_loop() @@ -212,8 +225,9 @@ def render_env(self, display_surf): # draw target tx, ty = self._env.state.target_position - cv2.rectangle(img, (ty*r, tx*r), (ty*r+r, tx*r+r), - (255, 165, 0), -1) + cv2.rectangle( + img, (ty * r, tx * r), (ty * r + r, tx * r + r), (255, 165, 0), -1 + ) # draw robot rx, ry = self._env.state.robot_position @@ -222,14 +236,16 @@ def render_env(self, display_surf): # last_viz_observation = self._last_viz_observation.get(robot_id, None) # last_belief = self._last_belief.get(robot_id, None) if self._last_belief is not None: - TagViz.draw_belief(img, self._last_belief, r, r//3, self._target_color) + TagViz.draw_belief(img, self._last_belief, r, r // 3, self._target_color) if self._last_observation is not None: - TagViz.draw_observation(img, self._last_observation, - rx, ry, 0, r, r//8, color=(20, 20, 180)) + TagViz.draw_observation( + img, self._last_observation, rx, ry, 0, r, r // 8, color=(20, 20, 180) + ) - TagViz.draw_robot(img, rx*r, ry*r, 0, r, color=(200, 12, 150)) + TagViz.draw_robot(img, rx * r, ry * r, 0, r, color=(200, 12, 150)) pygame.surfarray.blit_array(display_surf, img) + # TODO! DEPRECATED! def unittest(): worldmap, robot = world0 @@ -238,5 +254,6 @@ def unittest(): viz = TagViz(env, controllable=True, observation_model=observation_model) viz.on_execute() -if __name__ == '__main__': + +if __name__ == "__main__": unittest() diff --git a/pomdp_py/problems/tag/example_worlds.py b/pomdp_py/problems/tag/example_worlds.py index 3474abbf..374ad8d8 100644 --- a/pomdp_py/problems/tag/example_worlds.py +++ b/pomdp_py/problems/tag/example_worlds.py @@ -1,18 +1,20 @@ world0 = ( -""" + """ xxxxx...xx xxxxx...xx xxxxx...xx -.....T.... +.....T.... r......... -""", "r" +""", + "r", ) world1 = ( -""" + """ x.Tx .... r... x..x -""" -,"r") +""", + "r", +) diff --git a/pomdp_py/problems/tag/experiment.py b/pomdp_py/problems/tag/experiment.py index 647ab5fd..c23bf1b3 100644 --- a/pomdp_py/problems/tag/experiment.py +++ b/pomdp_py/problems/tag/experiment.py @@ -3,34 +3,32 @@ from pomdp_py.problems.tag.problem import * import numpy as np + def trial(worldstr, **kwargs): grid_map = GridMap.from_str(worldstr) free_cells = grid_map.free_cells() init_robot_position = random.sample(free_cells, 1)[0] init_target_position = random.sample(free_cells, 1)[0] - problem = TagProblem(init_robot_position, - init_target_position, - grid_map, **kwargs) - discounted_reward = solve(problem, - max_depth=15, - discount_factor=0.95, - planning_time=.7, - exploration_const=10, - visualize=True, - max_time=120, - max_steps=500) + problem = TagProblem(init_robot_position, init_target_position, grid_map, **kwargs) + discounted_reward = solve( + problem, + max_depth=15, + discount_factor=0.95, + planning_time=0.7, + exploration_const=10, + visualize=True, + max_time=120, + max_steps=500, + ) return discounted_reward + def main(): all_rewards = [] try: for i in range(100): - dr = trial(world0[0], - pr_stay=0.5, - small=1, - big=10, - prior="uniform") + dr = trial(world0[0], pr_stay=0.5, small=1, big=10, prior="uniform") all_rewards.append(dr) finally: print("All done!") @@ -38,5 +36,6 @@ def main(): print("Average discounted reward: %.3f" % (np.mean(all_rewards))) print("Std.dev discounted reward: %.3f" % (np.std(all_rewards))) + if __name__ == "__main__": main() diff --git a/pomdp_py/problems/tag/models/__init__.py b/pomdp_py/problems/tag/models/__init__.py index 8b137891..e69de29b 100644 --- a/pomdp_py/problems/tag/models/__init__.py +++ b/pomdp_py/problems/tag/models/__init__.py @@ -1 +0,0 @@ - diff --git a/pomdp_py/problems/tag/models/components/__init__.py b/pomdp_py/problems/tag/models/components/__init__.py index 8b137891..e69de29b 100644 --- a/pomdp_py/problems/tag/models/components/__init__.py +++ b/pomdp_py/problems/tag/models/components/__init__.py @@ -1 +0,0 @@ - diff --git a/pomdp_py/problems/tag/models/components/grid_map.py b/pomdp_py/problems/tag/models/components/grid_map.py index 462c481a..8b057efa 100644 --- a/pomdp_py/problems/tag/models/components/grid_map.py +++ b/pomdp_py/problems/tag/models/components/grid_map.py @@ -2,8 +2,8 @@ from pomdp_py.problems.tag.models.transition_model import TagTransitionModel from pomdp_py.problems.multi_object_search.env.env import interpret -class GridMap: +class GridMap: def __init__(self, width, length, obstacle_poses): self.width = width self.length = length @@ -11,15 +11,18 @@ def __init__(self, width, length, obstacle_poses): self.obstacle_poses = obstacle_poses def valid_pose(self, position): - if not (position[0] >= 0 and position[0] < self.width\ - and position[1] >= 0 and position[1] < self.length): + if not ( + position[0] >= 0 + and position[0] < self.width + and position[1] >= 0 + and position[1] < self.length + ): return False if position in self.obstacle_poses: return False return True - def valid_motions(self, position, - all_motions=MOTION_ACTIONS): + def valid_motions(self, position, all_motions=MOTION_ACTIONS): valid_motions = set({}) for motion_action in all_motions: if TagTransitionModel.if_move_by(self, position, motion_action) == position: @@ -38,8 +41,12 @@ def from_str(cls, worldstr, **kwargs): return grid_map def free_cells(self): - cells = set({(x,y) - for x in range(self.width) - for y in range(self.length) - if (x,y) not in self.obstacle_poses}) + cells = set( + { + (x, y) + for x in range(self.width) + for y in range(self.length) + if (x, y) not in self.obstacle_poses + } + ) return cells diff --git a/pomdp_py/problems/tag/models/components/motion_policy.py b/pomdp_py/problems/tag/models/components/motion_policy.py index 8900e423..bc20641f 100644 --- a/pomdp_py/problems/tag/models/components/motion_policy.py +++ b/pomdp_py/problems/tag/models/components/motion_policy.py @@ -6,43 +6,47 @@ class TagTargetMotionPolicy(pomdp_py.GenerativeDistribution): - def __init__(self, - grid_map, - pr_stay=0.2): # With 1.0 - pr_stay chance, the target moves away + def __init__( + self, grid_map, pr_stay=0.2 + ): # With 1.0 - pr_stay chance, the target moves away self._grid_map = grid_map self._pr_stay = pr_stay - def _compute_candidate_actions(self, - robot_position, - target_position, - valid_target_motion_actions): + def _compute_candidate_actions( + self, robot_position, target_position, valid_target_motion_actions + ): candidate_actions = set({}) cur_dist = euclidean_dist(robot_position, target_position) for action in valid_target_motion_actions: - next_target_position = TagTransitionModel.if_move_by(self._grid_map, - target_position, - action) + next_target_position = TagTransitionModel.if_move_by( + self._grid_map, target_position, action + ) next_dist = euclidean_dist(robot_position, next_target_position) if next_dist > cur_dist: candidate_actions.add(action) return candidate_actions - - def probability(self, next_target_position, - target_position, robot_position, - valid_target_motion_actions): + def probability( + self, + next_target_position, + target_position, + robot_position, + valid_target_motion_actions, + ): # If it is impossible to go from target position to the next, # then it is a zero probability event. diff_x = abs(next_target_position[0] - target_position[0]) diff_y = abs(next_target_position[1] - target_position[1]) - if not ((diff_x == 1 and diff_y == 0) - or (diff_x == 0 and diff_y == 1) - or (diff_x == 0 and diff_y == 0)): + if not ( + (diff_x == 1 and diff_y == 0) + or (diff_x == 0 and diff_y == 1) + or (diff_x == 0 and diff_y == 0) + ): return constants.EPSILON - candidate_actions = self._compute_candidate_actions(robot_position, - target_position, - valid_target_motion_actions) + candidate_actions = self._compute_candidate_actions( + robot_position, target_position, valid_target_motion_actions + ) if len(candidate_actions) == 0: # No action possible, yet next_target_position is a valid # transition from current. @@ -61,30 +65,33 @@ def probability(self, next_target_position, else: # The object has taken an adversarial action. for action in candidate_actions: - if (target_position[0] + action.motion[0], - target_position[1] + action.motion[1]) == next_target_position: + if ( + target_position[0] + action.motion[0], + target_position[1] + action.motion[1], + ) == next_target_position: return (1.0 - self._pr_stay) / len(candidate_actions) return constants.EPSILON - def random(self, robot_position, target_position, valid_target_motion_actions, - mpe=False): - if mpe or random.uniform(0,1) > self._pr_stay: + def random( + self, robot_position, target_position, valid_target_motion_actions, mpe=False + ): + if mpe or random.uniform(0, 1) > self._pr_stay: # Move away; Pick motion actions that makes the target moves away from the robot - candidate_actions = self._compute_candidate_actions(robot_position, - target_position, - valid_target_motion_actions) + candidate_actions = self._compute_candidate_actions( + robot_position, target_position, valid_target_motion_actions + ) if len(candidate_actions) == 0: return target_position chosen_action = random.sample(candidate_actions, 1)[0] - return TagTransitionModel.if_move_by(self._grid_map, - target_position, - chosen_action) + return TagTransitionModel.if_move_by( + self._grid_map, target_position, chosen_action + ) else: # stay return target_position def mpe(self, robot_position, target_position, valid_target_motion_actions): - return self.random(robot_position, target_position, - valid_target_motion_actions, - mpe=True) + return self.random( + robot_position, target_position, valid_target_motion_actions, mpe=True + ) diff --git a/pomdp_py/problems/tag/models/observation_model.py b/pomdp_py/problems/tag/models/observation_model.py index eda84bb6..7f5201a5 100644 --- a/pomdp_py/problems/tag/models/observation_model.py +++ b/pomdp_py/problems/tag/models/observation_model.py @@ -2,6 +2,7 @@ from pomdp_py.problems.tag.domain.observation import * import pomdp_py.problems.tag.constants as constants + class TagObservationModel(pomdp_py.ObservationModel): """In this observation model, the robot deterministically observes the target location when it is in the same grid cell diff --git a/pomdp_py/problems/tag/models/policy_model.py b/pomdp_py/problems/tag/models/policy_model.py index fcc89a61..9bc7188e 100644 --- a/pomdp_py/problems/tag/models/policy_model.py +++ b/pomdp_py/problems/tag/models/policy_model.py @@ -3,6 +3,7 @@ from pomdp_py.problems.tag.domain.action import * from pomdp_py.problems.tag.models.transition_model import * + class TagPolicyModel(pomdp_py.RolloutPolicy): def __init__(self, grid_map=None): self._grid_map = grid_map @@ -13,8 +14,9 @@ def sample(self, state, **kwargs): def get_all_actions(self, state=None, history=None): if state is not None: if self._grid_map is not None: - valid_motions = self._grid_map.valid_motions(state.robot_position, - all_motions=MOTION_ACTIONS) + valid_motions = self._grid_map.valid_motions( + state.robot_position, all_motions=MOTION_ACTIONS + ) return valid_motions | set({TagAction()}) return MOTION_ACTIONS | set({TagAction()}) diff --git a/pomdp_py/problems/tag/models/reward_model.py b/pomdp_py/problems/tag/models/reward_model.py index 36499fd9..ce84fbe0 100644 --- a/pomdp_py/problems/tag/models/reward_model.py +++ b/pomdp_py/problems/tag/models/reward_model.py @@ -1,13 +1,15 @@ import pomdp_py from pomdp_py.problems.tag.domain.action import * -class TagRewardModel(pomdp_py.RewardModel): +class TagRewardModel(pomdp_py.RewardModel): def __init__(self, small=1, big=10): self.small = small self.big = big - def probability(self, reward, state, action, next_state, normalized=False, **kwargs): + def probability( + self, reward, state, action, next_state, normalized=False, **kwargs + ): if reward == self._reward_func(state, action): return 1.0 else: diff --git a/pomdp_py/problems/tag/models/transition_model.py b/pomdp_py/problems/tag/models/transition_model.py index 4a524f75..cb77f226 100644 --- a/pomdp_py/problems/tag/models/transition_model.py +++ b/pomdp_py/problems/tag/models/transition_model.py @@ -6,16 +6,15 @@ and with Pr=0.2, the target stays at the same place. The target never moves closer to the robot. """ + import copy import pomdp_py import pomdp_py.problems.tag.constants as constants from pomdp_py.problems.tag.domain.action import * -class TagTransitionModel(pomdp_py.TransitionModel): - def __init__(self, - grid_map, - target_motion_policy): +class TagTransitionModel(pomdp_py.TransitionModel): + def __init__(self, grid_map, target_motion_policy): self._grid_map = grid_map self.target_motion_policy = target_motion_policy @@ -23,17 +22,16 @@ def __init__(self, def if_move_by(cls, grid_map, position, action): if isinstance(action, MotionAction): dx, dy = action.motion - next_position = (position[0] + dx, - position[1] + dy) + next_position = (position[0] + dx, position[1] + dy) if grid_map.valid_pose(next_position): return next_position return position def probability(self, next_state, state, action, **kwargs): # Robot motion - expected_robot_position = TagTransitionModel.if_move_by(self._grid_map, - state.robot_position, - action) + expected_robot_position = TagTransitionModel.if_move_by( + self._grid_map, state.robot_position, action + ) if expected_robot_position != next_state.robot_position: return constants.EPSILON @@ -50,18 +48,22 @@ def probability(self, next_state, state, action, **kwargs): return 1.0 - constants.EPSILON # Target motion - valid_target_motion_actions = self._grid_map.valid_motions(state.target_position) - return self.target_motion_policy.probability(next_state.target_position, - state.target_position, - state.robot_position, - valid_target_motion_actions) + valid_target_motion_actions = self._grid_map.valid_motions( + state.target_position + ) + return self.target_motion_policy.probability( + next_state.target_position, + state.target_position, + state.robot_position, + valid_target_motion_actions, + ) def sample(self, state, action, argmax=False): # Robot motion next_state = copy.deepcopy(state) - next_state.robot_position = TagTransitionModel.if_move_by(self._grid_map, - state.robot_position, - action) + next_state.robot_position = TagTransitionModel.if_move_by( + self._grid_map, state.robot_position, action + ) # If Tag action if isinstance(action, TagAction): @@ -71,15 +73,17 @@ def sample(self, state, action, argmax=False): return next_state # Target motion - valid_target_motion_actions = self._grid_map.valid_motions(state.target_position) + valid_target_motion_actions = self._grid_map.valid_motions( + state.target_position + ) if not argmax: - next_state.target_position = self.target_motion_policy.random(state.robot_position, - state.target_position, - valid_target_motion_actions) + next_state.target_position = self.target_motion_policy.random( + state.robot_position, state.target_position, valid_target_motion_actions + ) else: - next_state.target_position = self.target_motion_policy.mpe(state.robot_position, - state.target_position, - valid_target_motion_actions) + next_state.target_position = self.target_motion_policy.mpe( + state.robot_position, state.target_position, valid_target_motion_actions + ) return next_state def argmax(self, state, action, **kwargs): diff --git a/pomdp_py/problems/tag/problem.py b/pomdp_py/problems/tag/problem.py index d5a9df14..a158af64 100644 --- a/pomdp_py/problems/tag/problem.py +++ b/pomdp_py/problems/tag/problem.py @@ -6,26 +6,22 @@ from pomdp_py.problems.tag.example_worlds import * import time -class TagProblem(pomdp_py.POMDP): - def __init__(self, - init_robot_position, - init_target_position, - grid_map, - pr_stay=0.2, - small=1, - big=10, - prior="uniform", - belief_type="hist", - num_particles=6): - init_state = TagState(init_robot_position, - init_target_position, - False) - env = TagEnvironment(init_state, - grid_map, - pr_stay=pr_stay, - small=1, - big=10) +class TagProblem(pomdp_py.POMDP): + def __init__( + self, + init_robot_position, + init_target_position, + grid_map, + pr_stay=0.2, + small=1, + big=10, + prior="uniform", + belief_type="hist", + num_particles=6, + ): + init_state = TagState(init_robot_position, init_target_position, False) + env = TagEnvironment(init_state, grid_map, pr_stay=pr_stay, small=1, big=10) if prior == "uniform": prior = {} elif prior == "informed": @@ -34,47 +30,47 @@ def __init__(self, raise ValueError("Unrecognized prior type: %s" % prior) if belief_type == "particles": - init_belief = initialize_particles_belief(grid_map, init_robot_position, - prior=prior, num_particles=num_particles) + init_belief = initialize_particles_belief( + grid_map, init_robot_position, prior=prior, num_particles=num_particles + ) else: init_belief = initialize_belief(grid_map, init_robot_position, prior=prior) - agent = TagAgent(init_belief, - grid_map, - pr_stay=pr_stay, - small=1, - big=10) - super().__init__(agent, env, - name="TagProblem") - - -def solve(problem, - planner_type="pouct", - max_depth=10, # planning horizon - discount_factor=0.99, - planning_time=1., # amount of time (s) to plan each step - exploration_const=1000, # exploration constant - visualize=True, - max_time=120, # maximum amount of time allowed to solve the problem - max_steps=500): # maximum number of planning steps the agent can take. + agent = TagAgent(init_belief, grid_map, pr_stay=pr_stay, small=1, big=10) + super().__init__(agent, env, name="TagProblem") + + +def solve( + problem, + planner_type="pouct", + max_depth=10, # planning horizon + discount_factor=0.99, + planning_time=1.0, # amount of time (s) to plan each step + exploration_const=1000, # exploration constant + visualize=True, + max_time=120, # maximum amount of time allowed to solve the problem + max_steps=500, +): # maximum number of planning steps the agent can take. if planner_type == "pouct": - planner = pomdp_py.POUCT(max_depth=max_depth, - discount_factor=discount_factor, - planning_time=planning_time, - exploration_const=exploration_const, - rollout_policy=problem.agent.policy_model) + planner = pomdp_py.POUCT( + max_depth=max_depth, + discount_factor=discount_factor, + planning_time=planning_time, + exploration_const=exploration_const, + rollout_policy=problem.agent.policy_model, + ) else: - planner = pomdp_py.POMCP(max_depth=max_depth, - discount_factor=discount_factor, - planning_time=planning_time, - exploration_const=exploration_const, - rollout_policy=problem.agent.policy_model) + planner = pomdp_py.POMCP( + max_depth=max_depth, + discount_factor=discount_factor, + planning_time=planning_time, + exploration_const=exploration_const, + rollout_policy=problem.agent.policy_model, + ) if visualize: viz = TagViz(problem.env, controllable=False) if viz.on_init() == False: raise Exception("Environment failed to initialize") - viz.update(None, - None, - problem.agent.cur_belief) + viz.update(None, None, problem.agent.cur_belief) viz.on_render() _discount = 1.0 @@ -95,8 +91,9 @@ def solve(problem, # Receive observation _start = time.time() - real_observation = \ - problem.env.provide_observation(problem.agent.observation_model, real_action) + real_observation = problem.env.provide_observation( + problem.agent.observation_model, real_action + ) # Updates problem.agent.clear_history() # truncate history @@ -110,20 +107,18 @@ def solve(problem, _total_reward += reward _total_discounted_reward += reward * _discount _discount = _discount * discount_factor - print("==== Step %d ====" % (i+1)) + print("==== Step %d ====" % (i + 1)) print("Action: %s" % str(real_action)) print("Observation: %s" % str(real_observation)) print("Reward: %s" % str(reward)) print("Reward (Cumulative): %s" % str(_total_reward)) print("Reward (Discounted): %s" % str(_total_discounted_reward)) - print("Find Actions Count: %d" % _find_actions_count) + print("Find Actions Count: %d" % _find_actions_count) if isinstance(planner, pomdp_py.POUCT): print("__num_sims__: %d" % planner.last_num_sims) if visualize: - viz.update(real_action, - real_observation, - problem.agent.cur_belief) + viz.update(real_action, real_observation, problem.agent.cur_belief) viz.on_loop() viz.on_render() @@ -134,7 +129,7 @@ def solve(problem, if _time_used > max_time: print("Maximum time reached.") break - if _discount*10 < 1e-4: + if _discount * 10 < 1e-4: print("Discount factor already too small") break @@ -148,23 +143,27 @@ def main(): init_robot_position = random.sample(free_cells, 1)[0] init_target_position = random.sample(free_cells, 1)[0] - problem = TagProblem(init_robot_position, - init_target_position, - grid_map, - pr_stay=0.2, - small=1, - big=10, - prior="uniform", - belief_type="histogram") - solve(problem, - max_depth=15, - discount_factor=0.95, - planning_time=0.8, - exploration_const=20, - visualize=True, - max_time=360, - max_steps=251, - planner_type="pouct") + problem = TagProblem( + init_robot_position, + init_target_position, + grid_map, + pr_stay=0.2, + small=1, + big=10, + prior="uniform", + belief_type="histogram", + ) + solve( + problem, + max_depth=15, + discount_factor=0.95, + planning_time=0.8, + exploration_const=20, + visualize=True, + max_time=360, + max_steps=251, + planner_type="pouct", + ) if __name__ == "__main__": diff --git a/pomdp_py/problems/tiger/tiger_problem.py b/pomdp_py/problems/tiger/tiger_problem.py index 039f8846..67a378ba 100644 --- a/pomdp_py/problems/tiger/tiger_problem.py +++ b/pomdp_py/problems/tiger/tiger_problem.py @@ -41,53 +41,70 @@ import sys import copy + class TigerState(pomdp_py.State): def __init__(self, name): self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, TigerState): return self.name == other.name return False + def __str__(self): return self.name + def __repr__(self): return "TigerState(%s)" % self.name + def other(self): if self.name.endswith("left"): return TigerState("tiger-right") else: return TigerState("tiger-left") + class TigerAction(pomdp_py.Action): def __init__(self, name): self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, TigerAction): return self.name == other.name return False + def __str__(self): return self.name + def __repr__(self): return "TigerAction(%s)" % self.name + class TigerObservation(pomdp_py.Observation): def __init__(self, name): self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, TigerObservation): return self.name == other.name return False + def __str__(self): return self.name + def __repr__(self): return "TigerObservation(%s)" % self.name + # Observation model class ObservationModel(pomdp_py.ObservationModel): def __init__(self, noise=0.15): @@ -109,7 +126,7 @@ def sample(self, next_state, action): else: thresh = 0.5 - if random.uniform(0,1) < thresh: + if random.uniform(0, 1) < thresh: return TigerObservation(next_state.name) else: return TigerObservation(next_state.other().name) @@ -118,8 +135,8 @@ def get_all_observations(self): """Only need to implement this if you're using a solver that needs to enumerate over the observation space (e.g. value iteration)""" - return [TigerObservation(s) - for s in {"tiger-left", "tiger-right"}] + return [TigerObservation(s) for s in {"tiger-left", "tiger-right"}] + # Transition Model class TransitionModel(pomdp_py.TransitionModel): @@ -142,9 +159,11 @@ def sample(self, state, action): def get_all_states(self): """Only need to implement this if you're using - a solver that needs to enumerate over the observation space (e.g. value iteration)""" + a solver that needs to enumerate over the observation space (e.g. value iteration) + """ return [TigerState(s) for s in {"tiger-left", "tiger-right"}] + # Reward Model class RewardModel(pomdp_py.RewardModel): def _reward_func(self, state, action): @@ -158,19 +177,20 @@ def _reward_func(self, state, action): return 10 else: return -100 - else: # listen + else: # listen return -1 def sample(self, state, action, next_state): # deterministic return self._reward_func(state, action) + # Policy Model class PolicyModel(pomdp_py.RolloutPolicy): """A simple policy model with uniform prior over a - small, finite action space""" - ACTIONS = [TigerAction(s) - for s in {"open-left", "open-right", "listen"}] + small, finite action space""" + + ACTIONS = [TigerAction(s) for s in {"open-left", "open-right", "listen"}] def sample(self, state): return random.sample(self.get_all_actions(), 1)[0] @@ -192,14 +212,14 @@ class TigerProblem(pomdp_py.POMDP): def __init__(self, obs_noise, init_true_state, init_belief): """init_belief is a Distribution.""" - agent = pomdp_py.Agent(init_belief, - PolicyModel(), - TransitionModel(), - ObservationModel(obs_noise), - RewardModel()) - env = pomdp_py.Environment(init_true_state, - TransitionModel(), - RewardModel()) + agent = pomdp_py.Agent( + init_belief, + PolicyModel(), + TransitionModel(), + ObservationModel(obs_noise), + RewardModel(), + ) + env = pomdp_py.Environment(init_true_state, TransitionModel(), RewardModel()) super().__init__(agent, env, name="TigerProblem") @staticmethod @@ -214,20 +234,15 @@ def create(state="tiger-left", belief=0.5, obs_noise=0.15): model (default 0.15) """ init_true_state = TigerState(state) - init_belief = pomdp_py.Histogram({ - TigerState("tiger-left"): belief, - TigerState("tiger-right"): 1.0 - belief - }) - tiger_problem = TigerProblem(obs_noise, - init_true_state, init_belief) + init_belief = pomdp_py.Histogram( + {TigerState("tiger-left"): belief, TigerState("tiger-right"): 1.0 - belief} + ) + tiger_problem = TigerProblem(obs_noise, init_true_state, init_belief) tiger_problem.agent.set_belief(init_belief, prior=True) return tiger_problem - - -def test_planner(tiger_problem, planner, nsteps=3, - debug_tree=False): +def test_planner(tiger_problem, planner, nsteps=3, debug_tree=False): """ Runs the action-feedback loop of Tiger problem POMDP @@ -242,10 +257,8 @@ def test_planner(tiger_problem, planner, nsteps=3, action = planner.plan(tiger_problem.agent) if debug_tree: from pomdp_py.utils import TreeDebugger - dd = TreeDebugger(tiger_problem.agent.tree) - import pdb; pdb.set_trace() - print("==== Step %d ====" % (i+1)) + print("==== Step %d ====" % (i + 1)) print(f"True state: {tiger_problem.env.state}") print(f"Belief: {tiger_problem.agent.cur_belief}") print(f"Action: {action}") @@ -260,7 +273,9 @@ def test_planner(tiger_problem, planner, nsteps=3, # in real world); In that case, you could skip # the state transition and re-estimate the state # (e.g. through the perception stack on the robot). - reward = tiger_problem.env.reward_model.sample(tiger_problem.env.state, action, None) + reward = tiger_problem.env.reward_model.sample( + tiger_problem.env.state, action, None + ) print("Reward:", reward) # Let's create some simulated real observation; @@ -274,7 +289,7 @@ def test_planner(tiger_problem, planner, nsteps=3, # reading). Note that tiger_problem.env.state stores the # environment state after action execution. real_observation = TigerObservation(tiger_problem.env.state.name) - print(">> Observation:", real_observation) + print(">> Observation:", real_observation) tiger_problem.agent.update_history(action, real_observation) # Update the belief. If the planner is POMCP, planner.update @@ -284,13 +299,14 @@ def test_planner(tiger_problem, planner, nsteps=3, print("Num sims:", planner.last_num_sims) print("Plan time: %.5f" % planner.last_planning_time) - if isinstance(tiger_problem.agent.cur_belief, - pomdp_py.Histogram): + if isinstance(tiger_problem.agent.cur_belief, pomdp_py.Histogram): new_belief = pomdp_py.update_histogram_belief( tiger_problem.agent.cur_belief, - action, real_observation, + action, + real_observation, tiger_problem.agent.observation_model, - tiger_problem.agent.transition_model) + tiger_problem.agent.transition_model, + ) tiger_problem.agent.set_belief(new_belief) if action.name.startswith("open"): @@ -298,20 +314,28 @@ def test_planner(tiger_problem, planner, nsteps=3, # until every time door is opened. print("\n") + def make_tiger(noise=0.15, init_state="tiger-left", init_belief=[0.5, 0.5]): """Convenient function to quickly build a tiger domain. Useful for testing""" - tiger = TigerProblem(noise, TigerState(init_state), - pomdp_py.Histogram({TigerState("tiger-left"): init_belief[0], - TigerState("tiger-right"): init_belief[1]})) + tiger = TigerProblem( + noise, + TigerState(init_state), + pomdp_py.Histogram( + { + TigerState("tiger-left"): init_belief[0], + TigerState("tiger-right"): init_belief[1], + } + ), + ) return tiger def main(): - init_true_state = random.choice(["tiger-left", - "tiger-right"]) - init_belief = pomdp_py.Histogram({TigerState("tiger-left"): 0.5, - TigerState("tiger-right"): 0.5}) + init_true_state = random.choice(["tiger-left", "tiger-right"]) + init_belief = pomdp_py.Histogram( + {TigerState("tiger-left"): 0.5, TigerState("tiger-right"): 0.5} + ) tiger = make_tiger(init_state=init_true_state) init_belief = tiger.agent.belief @@ -320,10 +344,14 @@ def main(): test_planner(tiger, vi, nsteps=3) print("\n** Testing POUCT **") - pouct = pomdp_py.POUCT(max_depth=3, discount_factor=0.95, - num_sims=4096, exploration_const=50, - rollout_policy=tiger.agent.policy_model, - show_progress=True) + pouct = pomdp_py.POUCT( + max_depth=3, + discount_factor=0.95, + num_sims=4096, + exploration_const=50, + rollout_policy=tiger.agent.policy_model, + show_progress=True, + ) test_planner(tiger, pouct, nsteps=10) TreeDebugger(tiger.agent.tree).pp @@ -332,13 +360,21 @@ def main(): tiger.agent.tree = None print("** Testing POMCP **") - tiger.agent.set_belief(pomdp_py.Particles.from_histogram(init_belief, num_particles=100), prior=True) - pomcp = pomdp_py.POMCP(max_depth=3, discount_factor=0.95, - num_sims=1000, exploration_const=50, - rollout_policy=tiger.agent.policy_model, - show_progress=True, pbar_update_interval=500) + tiger.agent.set_belief( + pomdp_py.Particles.from_histogram(init_belief, num_particles=100), prior=True + ) + pomcp = pomdp_py.POMCP( + max_depth=3, + discount_factor=0.95, + num_sims=1000, + exploration_const=50, + rollout_policy=tiger.agent.policy_model, + show_progress=True, + pbar_update_interval=500, + ) test_planner(tiger, pomcp, nsteps=10) TreeDebugger(tiger.agent.tree).pp -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/pomdp_py/representations/belief/histogram.py b/pomdp_py/representations/belief/histogram.py index 8caaf5a1..899416c2 100644 --- a/pomdp_py/representations/belief/histogram.py +++ b/pomdp_py/representations/belief/histogram.py @@ -2,7 +2,7 @@ def abstraction_over_histogram(current_histogram, state_mapper): - state_mappings = {s:state_mapper(s) for s in current_histogram} + state_mappings = {s: state_mapper(s) for s in current_histogram} hist = {} for s in current_histogram: a_s = state_mapper(s) @@ -11,11 +11,19 @@ def abstraction_over_histogram(current_histogram, state_mapper): hist[a_s] += current_histogram[s] return hist -def update_histogram_belief(current_histogram, - real_action, real_observation, - observation_model, transition_model, oargs={}, - targs={}, normalize=True, static_transition=False, - next_state_space=None): + +def update_histogram_belief( + current_histogram, + real_action, + real_observation, + observation_model, + transition_model, + oargs={}, + targs={}, + normalize=True, + static_transition=False, + next_state_space=None, +): """ update_histogram_belief(current_histogram, real_action, real_observation, observation_model, transition_model, oargs={}, @@ -54,20 +62,21 @@ def update_histogram_belief(current_histogram, if next_state_space is None: next_state_space = current_histogram for next_state in next_state_space: - observation_prob = observation_model.probability(real_observation, - next_state, - real_action, - **oargs) + observation_prob = observation_model.probability( + real_observation, next_state, real_action, **oargs + ) if not static_transition: transition_prob = 0 for state in current_histogram: - transition_prob += transition_model.probability(next_state, - state, - real_action, - **targs) * current_histogram[state] + transition_prob += ( + transition_model.probability( + next_state, state, real_action, **targs + ) + * current_histogram[state] + ) else: transition_prob = current_histogram[next_state] - + new_histogram[next_state] = observation_prob * transition_prob total_prob += new_histogram[next_state] diff --git a/pomdp_py/representations/belief/particles.pyx b/pomdp_py/representations/belief/particles.pyx index 7f71d46c..ecb066c6 100644 --- a/pomdp_py/representations/belief/particles.pyx +++ b/pomdp_py/representations/belief/particles.pyx @@ -30,9 +30,9 @@ cpdef particle_reinvigoration(Particles particles, if len(new_particles) > num_particles: return new_particles - + print("Particle reinvigoration for %d particles" % (num_particles - len(new_particles))) - cdef State next_state + cdef State next_state while len(new_particles) < num_particles: # need to make a copy otherwise the transform affects states in 'particles' next_state = copy.deepcopy(particles.random()) @@ -85,4 +85,3 @@ cpdef update_particles_belief(Particles current_particles, # Particle reinvigoration return particle_reinvigoration(Particles(filtered_particles), len(current_particles.particles), state_transform_func=state_transform_func) - diff --git a/pomdp_py/representations/distribution/gaussian.pyx b/pomdp_py/representations/distribution/gaussian.pyx index e1909d55..8fd18c0d 100644 --- a/pomdp_py/representations/distribution/gaussian.pyx +++ b/pomdp_py/representations/distribution/gaussian.pyx @@ -10,7 +10,7 @@ if scipy_spec is not None: else: raise ImportError("scipy not found."\ "Requires scipy.stats.multivariate_normal to use Gaussian") - + cdef class Gaussian(GenerativeDistribution): @@ -46,7 +46,7 @@ cdef class Gaussian(GenerativeDistribution): def __getitem__(self, value): """__getitem__(self, value) Evaluate the probability of given value - + Args: value (list or array like) Returns: @@ -54,8 +54,8 @@ cdef class Gaussian(GenerativeDistribution): return multivariate_normal.pdf(np.array(value), np.array(self._mean), np.array(self._cov)) - - + + def __setitem__(self, value, prob): """__setitem__(self, value, prob) Not Implemented; diff --git a/pomdp_py/utils/__init__.py b/pomdp_py/utils/__init__.py index 59d457f8..82e106de 100644 --- a/pomdp_py/utils/__init__.py +++ b/pomdp_py/utils/__init__.py @@ -1,10 +1,23 @@ # Assorted convenient functions -from pomdp_py.utils.math import (vec, proj, - R_x, R_y, R_z, R_between, T, - to_radians, approx_equal) -from pomdp_py.utils.misc import (remap, json_safe, safe_slice, similar, special_char) -from pomdp_py.utils.colors import (lighter, rgb_to_hex, hex_to_rgb, - inverse_color_rgb, inverse_color_hex, - random_unique_color) +from pomdp_py.utils.math import ( + vec, + proj, + R_x, + R_y, + R_z, + R_between, + T, + to_radians, + approx_equal, +) +from pomdp_py.utils.misc import remap, json_safe, safe_slice, similar, special_char +from pomdp_py.utils.colors import ( + lighter, + rgb_to_hex, + hex_to_rgb, + inverse_color_rgb, + inverse_color_hex, + random_unique_color, +) from pomdp_py.utils import typ from pomdp_py.utils.debugging import TreeDebugger diff --git a/pomdp_py/utils/colors.py b/pomdp_py/utils/colors.py index 54829c15..ef47d450 100644 --- a/pomdp_py/utils/colors.py +++ b/pomdp_py/utils/colors.py @@ -3,36 +3,42 @@ import numpy as np import random + # colors def lighter(color, percent): - '''assumes color is rgb between (0, 0, 0) and (255, 255, 255)''' + """assumes color is rgb between (0, 0, 0) and (255, 255, 255)""" color = np.array(color) white = np.array([255, 255, 255]) - vector = white-color + vector = white - color return color + vector * percent + def rgb_to_hex(rgb): - r,g,b = rgb - return '#%02x%02x%02x' % (int(r), int(g), int(b)) + r, g, b = rgb + return "#%02x%02x%02x" % (int(r), int(g), int(b)) + def hex_to_rgb(hx): """hx is a string, begins with #. ASSUME len(hx)=7.""" if len(hx) != 7: raise ValueError("Hex must be #------") hx = hx[1:] # omit the '#' - r = int('0x'+hx[:2], 16) - g = int('0x'+hx[2:4], 16) - b = int('0x'+hx[4:6], 16) - return (r,g,b) + r = int("0x" + hx[:2], 16) + g = int("0x" + hx[2:4], 16) + b = int("0x" + hx[4:6], 16) + return (r, g, b) + def inverse_color_rgb(rgb): - r,g,b = rgb - return (255-r, 255-g, 255-b) + r, g, b = rgb + return (255 - r, 255 - g, 255 - b) + def inverse_color_hex(hx): """hx is a string, begins with #. ASSUME len(hx)=7.""" return inverse_color_rgb(hex_to_rgb(hx)) + def random_unique_color(colors, ctype=1): """ ctype=1: completely random diff --git a/pomdp_py/utils/debugging.py b/pomdp_py/utils/debugging.py index a4df9b9d..3e79b2fc 100644 --- a/pomdp_py/utils/debugging.py +++ b/pomdp_py/utils/debugging.py @@ -87,6 +87,7 @@ To explore more features, browse the list of methods in the documentation. """ + import sys from pomdp_py.algorithms.po_uct import TreeNode, QNode, VNode, RootVNode from pomdp_py.utils import typ, similar, special_char @@ -95,6 +96,7 @@ DEFAULT_MARK_COLOR = "blue" MARKED = {} # tracks marked nodes on tree + def _node_pp(node, e=None, p=None, o=None): # We want to return the node, but we don't want to print it on pdb with # its default string. But instead, we want to print it with our own @@ -104,10 +106,10 @@ def _node_pp(node, e=None, p=None, o=None): else: return _QNodePP(node, parent_edge=e, parent=p, original=o) -class _NodePP: +class _NodePP: def __init__(self, node, parent_edge=None, parent=None, original=None): - """node: either VNode or QNode (the actual node on the tree) """ + """node: either VNode or QNode (the actual node on the tree)""" self.parent_edge = parent_edge self.parent = parent self.children = node.children @@ -137,8 +139,7 @@ def to_edge(self, key): edges = list(sorted_by_str(self.children.keys())) return edges[key] elif type(key) == str: - chosen = max(self.children.keys(), - key=lambda edge: similar(str(edge), key)) + chosen = max(self.children.keys(), key=lambda edge: similar(str(edge), key)) if similar(str(chosen), key) >= SIMILAR_THRESH: return chosen raise ValueError("Cannot access children with key {}".format(key)) @@ -186,17 +187,18 @@ def interpret_print_type(opt): def p(self, opt=None, **kwargs): if opt is None: max_depth = None - print_type_opt = kwargs.get('t', "summary") + print_type_opt = kwargs.get("t", "summary") elif type(opt) == int: max_depth = opt - print_type_opt = kwargs.get('t', "summary") + print_type_opt = kwargs.get("t", "summary") elif type(opt) == str: print_type_opt = opt - max_depth = kwargs.get('d', None) + max_depth = kwargs.get("d", None) else: raise ValueError("Cannot deal with opt of type {}".format(type(opt))) - self.print_tree(max_depth=max_depth, - print_type=_NodePP.interpret_print_type(print_type_opt)) + self.print_tree( + max_depth=max_depth, print_type=_NodePP.interpret_print_type(print_type_opt) + ) @property def pp(self): @@ -207,13 +209,15 @@ def print_tree(self, **options): _NodePP._print_tree_helper(self, 0, "", [None], -1, **options) @staticmethod - def _print_tree_helper(root, - depth, # depth of root - parent_edge, - branch_positions, # list of 'first', 'middle', 'last' for each level prior to root - child_index, # Index of the root as a child of parent - max_depth=None, - print_type="summary"): + def _print_tree_helper( + root, + depth, # depth of root + parent_edge, + branch_positions, # list of 'first', 'middle', 'last' for each level prior to root + child_index, # Index of the root as a child of parent + max_depth=None, + print_type="summary", + ): """ pos_among_children is either 'first', 'middle', or 'last' """ @@ -224,7 +228,9 @@ def _print_tree_helper(root, # Print the tree branches for all levels up to current root branches = "" - preceding_positions = branch_positions[:-1] # all positions except for current root + preceding_positions = branch_positions[ + :-1 + ] # all positions except for current root for pos in preceding_positions: if pos is None: continue @@ -243,22 +249,25 @@ def _print_tree_helper(root, root.print_children = False if child_index >= 0: - line = branches + str(child_index).translate(special_char.SUBSCRIPT) + str(root) + line = ( + branches + + str(child_index).translate(special_char.SUBSCRIPT) + + str(root) + ) else: line = branches + str(root) if isinstance(root, VNode): - line += typ.cyan("(depth="+str(depth)+")") + line += typ.cyan("(depth=" + str(depth) + ")") print(line) for i, c in enumerate(sorted_by_str(root.children)): - skip = True if root[c].marked: skip = False elif print_type == "complete": skip = False - elif (root[c].num_visits > 1): + elif root[c].num_visits > 1: skip = False if print_type == "marked-only" and not root[c].marked: skip = True @@ -276,36 +285,41 @@ def _print_tree_helper(root, else: next_pos = "middle" - _NodePP._print_tree_helper(root[c], - next_depth, - c, - branch_positions + [next_pos], - i, - max_depth=max_depth, - print_type=print_type) + _NodePP._print_tree_helper( + root[c], + next_depth, + c, + branch_positions + [next_pos], + i, + max_depth=max_depth, + print_type=print_type, + ) class _QNodePP(_NodePP, QNode): """QNode for better printing""" + def __init__(self, qnode, **kwargs): QNode.__init__(self, qnode.num_visits, qnode.value) _NodePP.__init__(self, qnode, **kwargs) def __str__(self): - return TreeDebugger.single_node_str(self, - parent_edge=self.parent_edge, - include_children=self.print_children) + return TreeDebugger.single_node_str( + self, parent_edge=self.parent_edge, include_children=self.print_children + ) + class _VNodePP(_NodePP, VNode): """VNode for better printing""" + def __init__(self, vnode, **kwargs): VNode.__init__(self, vnode.num_visits) _NodePP.__init__(self, vnode, **kwargs) def __str__(self): - return TreeDebugger.single_node_str(self, - parent_edge=self.parent_edge, - include_children=self.print_children) + return TreeDebugger.single_node_str( + self, parent_edge=self.parent_edge, include_children=self.print_children + ) class TreeDebugger: @@ -315,6 +329,7 @@ class TreeDebugger: QNodes (value represents Q(b,a); children are observations) and VNodes (value represents V(b); children are actions). """ + def __init__(self, tree): """ Args: @@ -323,18 +338,21 @@ def __init__(self, tree): which can be accessed by agent.tree. """ if not isinstance(tree, TreeNode): - raise ValueError("Expecting tree to be a TreeNode, but got {}".format(type(tree))) + raise ValueError( + "Expecting tree to be a TreeNode, but got {}".format(type(tree)) + ) self.tree = _node_pp(tree) - self.current = self.tree # points to the node the user is interacting with + self.current = self.tree # points to the node the user is interacting with self._stats_cache = {} def __str__(self): return str(self.current) def __repr__(self): - nodestr = TreeDebugger.single_node_str(self.current, - parent_edge=self.current.parent_edge) + nodestr = TreeDebugger.single_node_str( + self.current, parent_edge=self.current.parent_edge + ) return "TreeDebugger@\n{}".format(nodestr) def __getitem__(self, key): @@ -354,29 +372,31 @@ def _get_stats(self): self._stats_cache[id(self.current)] = stats return stats - def num_nodes(self, kind='all'): + def num_nodes(self, kind="all"): """ Returns the total number of nodes in the tree rooted at "current" """ stats = self._get_stats() res = { - 'all': stats['total_vnodes'] + stats['total_qnodes'], - 'q': stats['total_qnodes'], - 'v': stats['total_vnodes'] + "all": stats["total_vnodes"] + stats["total_qnodes"], + "q": stats["total_qnodes"], + "v": stats["total_vnodes"], } if kind in res: return res[kind] else: - raise ValueError("Invalid value for kind={}; Valid values are {}"\ - .format(kind, list(res.keys()))) - + raise ValueError( + "Invalid value for kind={}; Valid values are {}".format( + kind, list(res.keys()) + ) + ) @property def depth(self): """Tree depth starts from 0 (root node only). It is the largest number of edges on a path from root to leaf.""" stats = self._get_stats() - return stats['max_depth'] + return stats["max_depth"] @property def d(self): @@ -397,17 +417,17 @@ def nl(self): @property def nn(self): """Returns the total number of nodes in the tree""" - return self.num_nodes(kind='all') + return self.num_nodes(kind="all") @property def nq(self): """Returns the total number of QNodes in the tree""" - return self.num_nodes(kind='q') + return self.num_nodes(kind="q") @property def nv(self): """Returns the total number of VNodes in the tree""" - return self.num_nodes(kind='v') + return self.num_nodes(kind="v") def l(self, depth, as_debuggers=True): """alias for layer""" @@ -424,12 +444,16 @@ def layer(self, depth, as_debuggers=True): one for each tree on the layer. """ if depth < 0 or depth > self.depth: - raise ValueError("Depth {} is out of range (0-{})".format(depth, self.depth)) + raise ValueError( + "Depth {} is out of range (0-{})".format(depth, self.depth) + ) nodes = [] self._layer_helper(self.current, 0, depth, nodes) return nodes - def _layer_helper(self, root, current_depth, target_depth, nodes, as_debuggers=True): + def _layer_helper( + self, root, current_depth, target_depth, nodes, as_debuggers=True + ): if current_depth == target_depth: if isinstance(root, VNode): if as_debuggers: @@ -442,10 +466,7 @@ def _layer_helper(self, root, current_depth, target_depth, nodes, as_debuggers=T next_depth = current_depth else: next_depth = current_depth + 1 - self._layer_helper(root[c], - next_depth, - target_depth, - nodes) + self._layer_helper(root[c], next_depth, target_depth, nodes) @property def leaf(self): @@ -470,7 +491,6 @@ def step(self, key): self.current = self[edge] print("step: " + str(edge)) - def s(self, key): """alias for step""" return self.step(key) @@ -587,9 +607,9 @@ def single_node_str(node, parent_edge=None, indent=1, include_children=True): if parent_edge is not None: output += opposite_color(str(parent_edge)) + "⟶" - output += color(str(node.__class__.__name__))\ - + "(n={}, v={:.3f})".format(node.num_visits, - node.value) + output += color(str(node.__class__.__name__)) + "(n={}, v={:.3f})".format( + node.num_visits, node.value + ) if include_children: output += "\n" for i, action in enumerate(sorted_by_str(node.children)): @@ -597,9 +617,9 @@ def single_node_str(node, parent_edge=None, indent=1, include_children=True): child_info = TreeDebugger.single_node_str(child, include_children=False) spaces = " " * indent - output += "{}- [{}] {}: {}".format(spaces, i, - typ.white(str(action)), - child_info) + output += "{}- [{}] {}: {}".format( + spaces, i, typ.white(str(action)), child_info + ) if i < len(node.children) - 1: output += "\n" output += "\n" @@ -632,7 +652,7 @@ def _preferred_actions_helper(root, depth, seq, max_depth=None): equally_good = [] if isinstance(root, VNode): for c in root.children: - if not(c == best_child) and root[c].value == best_value: + if not (c == best_child) and root[c].value == best_value: equally_good.append(c) if best_child is not None and root[best_child] is not None: @@ -642,8 +662,9 @@ def _preferred_actions_helper(root, depth, seq, max_depth=None): else: next_depth = depth + 1 - TreeDebugger._preferred_actions_helper(root[best_child], next_depth, seq, - max_depth=max_depth) + TreeDebugger._preferred_actions_helper( + root[best_child], next_depth, seq, max_depth=max_depth + ) def path(self, dest): """alias for path_to; @@ -691,17 +712,17 @@ def _get_path(self, start, dest, parent): def tree_stats(root, max_depth=None): """Gether statistics about the tree""" stats = { - 'total_vnodes': 0, - 'total_qnodes': 0, - 'total_vnodes_children': 0, - 'total_qnodes_children': 0, - 'max_vnodes_children': 0, - 'max_qnodes_children': 0, - 'max_depth': 0 + "total_vnodes": 0, + "total_qnodes": 0, + "total_vnodes_children": 0, + "total_qnodes_children": 0, + "max_vnodes_children": 0, + "max_qnodes_children": 0, + "max_depth": 0, } TreeDebugger._tree_stats_helper(root, 0, stats, max_depth=max_depth) - stats['num_visits'] = root.num_visits - stats['value'] = root.value + stats["num_visits"] = root.num_visits + stats["value"] = root.value return stats @staticmethod @@ -710,28 +731,37 @@ def _tree_stats_helper(root, depth, stats, max_depth=None): return else: if isinstance(root, VNode): - stats['total_vnodes'] += 1 - stats['total_vnodes_children'] += len(root.children) - stats['max_vnodes_children'] = max(stats['max_vnodes_children'], len(root.children)) - stats['max_depth'] = max(stats['max_depth'], depth) + stats["total_vnodes"] += 1 + stats["total_vnodes_children"] += len(root.children) + stats["max_vnodes_children"] = max( + stats["max_vnodes_children"], len(root.children) + ) + stats["max_depth"] = max(stats["max_depth"], depth) else: - stats['total_qnodes'] += 1 - stats['total_qnodes_children'] += len(root.children) - stats['max_qnodes_children'] = max(stats['max_qnodes_children'], len(root.children)) + stats["total_qnodes"] += 1 + stats["total_qnodes_children"] += len(root.children) + stats["max_qnodes_children"] = max( + stats["max_qnodes_children"], len(root.children) + ) for c in root.children: if isinstance(root[c], QNode): next_depth = depth else: next_depth = depth + 1 - TreeDebugger._tree_stats_helper(root[c], next_depth, stats, max_depth=max_depth) + TreeDebugger._tree_stats_helper( + root[c], next_depth, stats, max_depth=max_depth + ) + def sorted_by_str(enumerable): return sorted(enumerable, key=lambda n: str(n)) + def interpret_color(colorstr): if colorstr.lower() in typ.colors: return eval("typ.{}".format(colorstr)) else: - raise ValueError("Invalid color: {};\n" - "The available ones are {}".format(colorstr, typ.colors)) + raise ValueError( + "Invalid color: {};\nThe available ones are {}".format(colorstr, typ.colors) + ) diff --git a/pomdp_py/utils/interfaces/conversion.py b/pomdp_py/utils/interfaces/conversion.py index feece47f..b5fb7339 100644 --- a/pomdp_py/utils/interfaces/conversion.py +++ b/pomdp_py/utils/interfaces/conversion.py @@ -2,14 +2,15 @@ Provides utility to convert a POMDP written in pomdp_py to a POMDP file format (.pomdp or .pomdpx). Output to a file. """ + import subprocess import os import pomdp_py import numpy as np import xml.etree.ElementTree as ET -def to_pomdp_file(agent, output_path=None, - discount_factor=0.95, float_precision=9): + +def to_pomdp_file(agent, output_path=None, discount_factor=0.95, float_precision=9): """ Pass in an Agent, and use its components to generate a .pomdp file to `output_path`. @@ -42,30 +43,41 @@ def to_pomdp_file(agent, output_path=None, all_actions = list(agent.all_actions) all_observations = list(agent.all_observations) except NotImplementedError: - raise ValueError("S, A, O must be enumerable for a given agent to convert to .pomdp format") + raise ValueError( + "S, A, O must be enumerable for a given agent to convert to .pomdp format" + ) content = f"discount: %.{float_precision}f\n" % discount_factor - content += "values: reward\n" # We only consider reward, not cost. + content += "values: reward\n" # We only consider reward, not cost. list_of_states = " ".join(str(s) for s in all_states) - assert len(list_of_states.split(" ")) == len(all_states),\ - "states must be convertable to strings without blank spaces" + assert len(list_of_states.split(" ")) == len( + all_states + ), "states must be convertable to strings without blank spaces" content += "states: %s\n" % list_of_states list_of_actions = " ".join(str(a) for a in all_actions) - assert len(list_of_actions.split(" ")) == len(all_actions),\ - "actions must be convertable to strings without blank spaces" + assert len(list_of_actions.split(" ")) == len( + all_actions + ), "actions must be convertable to strings without blank spaces" content += "actions: %s\n" % list_of_actions list_of_observations = " ".join(str(o) for o in all_observations) - assert len(list_of_observations.split(" ")) == len(all_observations),\ - "observations must be convertable to strings without blank spaces" + assert len(list_of_observations.split(" ")) == len( + all_observations + ), "observations must be convertable to strings without blank spaces" content += "observations: %s\n" % list_of_observations # Starting belief state - they need to be normalized total_belief = sum(agent.belief[s] for s in all_states) - content += "start: %s\n" % (" ".join([f"%.{float_precision}f" % (agent.belief[s]/total_belief) - for s in all_states])) + content += "start: %s\n" % ( + " ".join( + [ + f"%.{float_precision}f" % (agent.belief[s] / total_belief) + for s in all_states + ] + ) + ) # State transition probabilities - they need to be normalized for s in all_states: @@ -77,7 +89,12 @@ def to_pomdp_file(agent, output_path=None, total_prob = sum(probs) for i, s_next in enumerate(all_states): prob_norm = probs[i] / total_prob - content += f'T : %s : %s : %s %.{float_precision}f\n' % (a, s, s_next, prob_norm) + content += f"T : %s : %s : %s %.{float_precision}f\n" % ( + a, + s, + s_next, + prob_norm, + ) # Observation probabilities - they need to be normalized for s_next in all_states: @@ -87,12 +104,17 @@ def to_pomdp_file(agent, output_path=None, prob = agent.observation_model.probability(o, s_next, a) probs.append(prob) total_prob = sum(probs) - assert total_prob > 0.0,\ - "No observation is probable under state={} action={}"\ - .format(s_next, a) + assert ( + total_prob > 0.0 + ), "No observation is probable under state={} action={}".format(s_next, a) for i, o in enumerate(all_observations): prob_norm = probs[i] / total_prob - content += f'O : %s : %s : %s %.{float_precision}f\n' % (a, s_next, o, prob_norm) + content += f"O : %s : %s : %s %.{float_precision}f\n" % ( + a, + s_next, + o, + prob_norm, + ) # Immediate rewards for s in all_states: @@ -100,7 +122,12 @@ def to_pomdp_file(agent, output_path=None, for s_next in all_states: # We will take the argmax reward, which works for deterministic rewards. r = agent.reward_model.sample(s, a, s_next) - content += f'R : %s : %s : %s : * %.{float_precision}f\n' % (a, s, s_next, r) + content += f"R : %s : %s : %s : * %.{float_precision}f\n" % ( + a, + s, + s_next, + r, + ) if output_path is not None: with open(output_path, "w") as f: @@ -108,10 +135,7 @@ def to_pomdp_file(agent, output_path=None, return all_states, all_actions, all_observations - -def to_pomdpx_file(agent, pomdpconvert_path, - output_path=None, - discount_factor=0.95): +def to_pomdpx_file(agent, pomdpconvert_path, output_path=None, discount_factor=0.95): """ Converts an agent to a pomdpx file. This works by first converting the agent into a .pomdp file and then using the :code:`pomdpconvert` utility program to convert that file to a .pomdpx file. Check out @@ -133,15 +157,14 @@ def to_pomdpx_file(agent, pomdpconvert_path, discount_factor (float): The discount factor """ pomdp_path = "./temp-pomdp.pomdp" - to_pomdp_file(agent, pomdp_path, - discount_factor=discount_factor) + to_pomdp_file(agent, pomdp_path, discount_factor=discount_factor) proc = subprocess.Popen([pomdpconvert_path, pomdp_path]) proc.wait() pomdpx_path = pomdp_path + "x" assert os.path.exists(pomdpx_path), "POMDPx conversion failed." - with open(pomdpx_path, 'r') as f: + with open(pomdpx_path, "r") as f: content = f.read() if output_path is not None: @@ -175,7 +198,7 @@ def parse_pomdp_solve_output(alpha_path, pg_path=None): policy_graph: a mapping from node number to (action_number, edges) """ alphas = [] # (alpha_vector, action_number) tuples - with open(alpha_path, 'r') as f: + with open(alpha_path, "r") as f: action_number = None alpha_vector = None mode = "action" @@ -198,19 +221,19 @@ def parse_pomdp_solve_output(alpha_path, pg_path=None): if pg_path is None: return alphas else: - with open(pg_path, 'r') as f: + with open(pg_path, "r") as f: for line in f: line = line.rstrip() if len(line) == 0: continue parts = list(map(int, line.split())) # Splits on whitespace - assert parts[0] not in policy_graph,\ + assert parts[0] not in policy_graph, ( "The node id %d already exists. Something wrong" % parts[0] + ) policy_graph[parts[0]] = (parts[1], parts[2:]) return alphas, policy_graph - class AlphaVectorPolicy(pomdp_py.Planner): """ An offline POMDP policy is specified by a collection @@ -227,6 +250,7 @@ class AlphaVectorPolicy(pomdp_py.Planner): This can be constructed using .policy file created by sarsop. """ + def __init__(self, alphas, states): """ Args: @@ -240,8 +264,7 @@ def __init__(self, alphas, states): def plan(self, agent): """Returns an action that is mapped by the agent belief, under this policy""" b = [agent.belief[s] for s in self.states] - _, action = max(self.alphas, - key=lambda va: np.dot(b, va[0])) + _, action = max(self.alphas, key=lambda va: np.dot(b, va[0])) return action def value(self, belief): @@ -251,13 +274,11 @@ def value(self, belief): :math:`V(b) = max_{a\in\Gamma} {a} \cdot b` """ b = [belief[s] for s in self.states] - alpha_vector, _ = max(self.alphas, - key=lambda va: np.dot(b, va[0])) + alpha_vector, _ = max(self.alphas, key=lambda va: np.dot(b, va[0])) return np.dot(b, alpha_vector) @classmethod - def construct(cls, policy_path, - states, actions, solver="pomdpsol"): + def construct(cls, policy_path, states, actions, solver="pomdpsol"): """ Returns an AlphaVectorPolicy, given `alphas`, which are the output of parse_appl_policy_file. @@ -279,35 +300,43 @@ def construct(cls, policy_path, action = actions[int(vector.attrib["action"])] alpha_vector = tuple(map(float, vector.text.split())) alphas.append((alpha_vector, action)) - return AlphaVectorPolicy(alphas, - states) + return AlphaVectorPolicy(alphas, states) @classmethod - def construct_from_pomdp_solve(cls, alpha_path, - states, actions): + def construct_from_pomdp_solve(cls, alpha_path, states, actions): alphas_with_action_numbers = parse_pomdp_solve_output(alpha_path) - alphas = [(alpha_vector, actions[action_number]) - for alpha_vector, action_number in alphas_with_action_numbers] + alphas = [ + (alpha_vector, actions[action_number]) + for alpha_vector, action_number in alphas_with_action_numbers + ] return AlphaVectorPolicy(alphas, states) class PGNode: """A node on the policy graph""" + def __init__(self, node_id, alpha_vector, action): self.node_id = node_id self.alpha_vector = alpha_vector self.action = action + def __eq__(self, other): if isinstance(other, PolicyNode): return self.node_id == other.node_id return False + def __hash__(self): return hash(self.node_id) + def __str__(self): return repr(self) + def __repr__(self): - return "NodeID(%d)::AlphaVector(%s)::Action(%s)\n"\ - % (self.node_id, str(self.alpha_vector), self.action) + return "NodeID(%d)::AlphaVector(%s)::Action(%s)\n" % ( + self.node_id, + str(self.alpha_vector), + self.action, + ) class PolicyGraph(pomdp_py.Planner): @@ -323,14 +352,13 @@ def __init__(self, nodes, edges, states): edges (dict): Mapping from node_id to a dictionary {observation -> node_id} states (list): List of states, ordered as in .pomdp file """ - self.nodes = {n.node_id:n for n in nodes} + self.nodes = {n.node_id: n for n in nodes} self.edges = edges self.states = states self._current_node = None @classmethod - def construct(cls, alpha_path, pg_path, - states, actions, observations): + def construct(cls, alpha_path, pg_path, states, actions, observations): """ See parse_pomdp_solve_output for detailed definitions of alphas and pg. @@ -356,8 +384,9 @@ def construct(cls, alpha_path, pg_path, assert 0 <= node_id < len(nodes), "Invalid node id in policy graph" action_number, o_links = pg[node_id] - assert actions[action_number] == nodes[node_id].action,\ - "Inconsistent action mapping" + assert ( + actions[action_number] == nodes[node_id].action + ), "Inconsistent action mapping" edges[node_id] = {} @@ -374,10 +403,9 @@ def plan(self, agent): def _find_node(self, agent): """Locate the node in the policy graph corresponding to the agent's current - belief state. """ + belief state.""" b = [agent.belief[s] for s in self.states] - nid = max(self.nodes, - key=lambda nid: np.dot(b, self.nodes[nid].alpha_vector)) + nid = max(self.nodes, key=lambda nid: np.dot(b, self.nodes[nid].alpha_vector)) return self.nodes[nid] def update(self, agent, action, observation): @@ -389,4 +417,6 @@ def update(self, agent, action, observation): # Find out the node number using agent current belief self._current_node = self._find_node(agent) # Transition the current node following the graph - self._current_node = self.nodes[self.edges[self._current_node.node_id][observation]] + self._current_node = self.nodes[ + self.edges[self._current_node.node_id][observation] + ] diff --git a/pomdp_py/utils/interfaces/simple_rl.py b/pomdp_py/utils/interfaces/simple_rl.py index ba627fcb..7df1d273 100644 --- a/pomdp_py/utils/interfaces/simple_rl.py +++ b/pomdp_py/utils/interfaces/simple_rl.py @@ -15,8 +15,10 @@ lacking. Providing this inteface is mostly to potentially leverage the MDP algorithms in simple_rl. """ + import simple_rl + def convert_to_MDPClass(pomdp, discount_factor=0.99, step_cost=0): """Converts the pomdp to the building block MDPClass of simple_rl. There are a lot of variants of MDPClass in simple_rl. It is up to the user to then convert this @@ -39,16 +41,18 @@ def convert_to_MDPClass(pomdp, discount_factor=0.99, step_cost=0): # that simple_rl is supposed to work with. state = simple_rl.State(data=env.state) - return simple_rl.MDP(all_actions, - agent.transition_model.sample, - agent.reward_model.sample, - gamma=discount_factor, - step_cost=step_cost) + return simple_rl.MDP( + all_actions, + agent.transition_model.sample, + agent.reward_model.sample, + gamma=discount_factor, + step_cost=step_cost, + ) -def convert_to_POMDPClass(pomdp, - discount_factor=0.99, step_cost=0, - belief_updater_type="discrete"): +def convert_to_POMDPClass( + pomdp, discount_factor=0.99, step_cost=0, belief_updater_type="discrete" +): agent = pomdp.agent env = pomdp.env try: @@ -63,15 +67,19 @@ def convert_to_POMDPClass(pomdp, try: belief_hist = agent.belief.get_histogram() except Exception: - raise ValueError("Agent belief cannot be converted into a histogram;\n" - "thus cannot create POMDPClass.") + raise ValueError( + "Agent belief cannot be converted into a histogram;\n" + "thus cannot create POMDPClass." + ) - return simple_rl.POMDP(all_actions, - all_observations, - agent.transition_model.sample, - agent.reward_model.sample, - agent.observation_model.sample, - belief_hist, - belief_updater_type=belief_updater_type, - gamma=discount_factor, - step_cost=step_cost) + return simple_rl.POMDP( + all_actions, + all_observations, + agent.transition_model.sample, + agent.reward_model.sample, + agent.observation_model.sample, + belief_hist, + belief_updater_type=belief_updater_type, + gamma=discount_factor, + step_cost=step_cost, + ) diff --git a/pomdp_py/utils/interfaces/solvers.py b/pomdp_py/utils/interfaces/solvers.py index f05632bf..87b9eddc 100644 --- a/pomdp_py/utils/interfaces/solvers.py +++ b/pomdp_py/utils/interfaces/solvers.py @@ -10,18 +10,27 @@ * `POMDP.jl `_ * more? Help us if you can! """ + import pomdp_py -from pomdp_py.utils.interfaces.conversion\ - import to_pomdp_file, PolicyGraph, AlphaVectorPolicy, parse_pomdp_solve_output +from pomdp_py.utils.interfaces.conversion import ( + to_pomdp_file, + PolicyGraph, + AlphaVectorPolicy, + parse_pomdp_solve_output, +) import subprocess import os, sys -def vi_pruning(agent, pomdp_solve_path, - discount_factor=0.95, - options=[], - pomdp_name="temp-pomdp", - remove_generated_files=False, - return_policy_graph=False): + +def vi_pruning( + agent, + pomdp_solve_path, + discount_factor=0.95, + options=[], + pomdp_name="temp-pomdp", + remove_generated_files=False, + return_policy_graph=False, +): """ Value Iteration with pruning, using the software pomdp-solve https://www.pomdp.org/code/ developed by Anthony R. Cassandra. @@ -49,24 +58,29 @@ def vi_pruning(agent, pomdp_solve_path, all_actions = list(agent.all_actions) all_observations = list(agent.all_observations) except NotImplementedError: - raise("S, A, O must be enumerable for a given agent to convert to .pomdp format") + raise ( + "S, A, O must be enumerable for a given agent to convert to .pomdp format" + ) pomdp_path = "./%s.pomdp" % pomdp_name to_pomdp_file(agent, pomdp_path, discount_factor=discount_factor) - proc = subprocess.Popen([pomdp_solve_path, - "-pomdp", pomdp_path, - "-o", pomdp_name] + list(map(str,options))) + proc = subprocess.Popen( + [pomdp_solve_path, "-pomdp", pomdp_path, "-o", pomdp_name] + + list(map(str, options)) + ) proc.wait() # Read the value and policy graph files alpha_path = "%s.alpha" % pomdp_name pg_path = "%s.pg" % pomdp_name if return_policy_graph: - policy = PolicyGraph.construct(alpha_path, pg_path, - all_states, all_actions, all_observations) + policy = PolicyGraph.construct( + alpha_path, pg_path, all_states, all_actions, all_observations + ) else: policy = AlphaVectorPolicy.construct( - alpha_path, all_states, all_actions, solver="pomdp-solve") + alpha_path, all_states, all_actions, solver="pomdp-solve" + ) # Remove temporary files if remove_generated_files: @@ -76,13 +90,17 @@ def vi_pruning(agent, pomdp_solve_path, return policy -def sarsop(agent, pomdpsol_path, - discount_factor=0.95, - timeout=30, memory=100, - precision=0.5, - pomdp_name="temp-pomdp", - remove_generated_files=False, - logfile=None): +def sarsop( + agent, + pomdpsol_path, + discount_factor=0.95, + timeout=30, + memory=100, + precision=0.5, + pomdp_name="temp-pomdp", + remove_generated_files=False, + logfile=None, +): """ SARSOP, using the binary from https://github.com/AdaCompNUS/sarsop This is an anytime POMDP planning algorithm @@ -104,7 +122,9 @@ def sarsop(agent, pomdpsol_path, all_actions = list(agent.all_actions) all_observations = list(agent.all_observations) except NotImplementedError: - raise("S, A, O must be enumerable for a given agent to convert to .pomdpx format") + raise ( + "S, A, O must be enumerable for a given agent to convert to .pomdpx format" + ) if logfile is None: stdout = None @@ -116,12 +136,22 @@ def sarsop(agent, pomdpsol_path, pomdp_path = "./%s.pomdp" % pomdp_name to_pomdp_file(agent, pomdp_path, discount_factor=discount_factor) - proc = subprocess.Popen([pomdpsol_path, - "--timeout", str(timeout), - "--memory", str(memory), - "--precision", str(precision), - "--output", "%s.policy" % pomdp_name, - pomdp_path], stdout=stdout, stderr=stderr) + proc = subprocess.Popen( + [ + pomdpsol_path, + "--timeout", + str(timeout), + "--memory", + str(memory), + "--precision", + str(precision), + "--output", + "%s.policy" % pomdp_name, + pomdp_path, + ], + stdout=stdout, + stderr=stderr, + ) if logfile is not None: for line in proc.stdout: line = line.decode("utf-8") @@ -130,8 +160,7 @@ def sarsop(agent, pomdpsol_path, proc.wait() policy_path = "%s.policy" % pomdp_name - policy = AlphaVectorPolicy.construct(policy_path, - all_states, all_actions) + policy = AlphaVectorPolicy.construct(policy_path, all_states, all_actions) # Remove temporary files if remove_generated_files: diff --git a/pomdp_py/utils/math.py b/pomdp_py/utils/math.py index 763941d9..2da296c5 100644 --- a/pomdp_py/utils/math.py +++ b/pomdp_py/utils/math.py @@ -1,16 +1,19 @@ """Assorted utilities for math""" + import math import numpy as np + # Math def vec(p1, p2): - """ vector from p1 to p2 """ + """vector from p1 to p2""" if type(p1) != np.ndarray: p1 = np.array(p1) if type(p2) != np.ndarray: p2 = np.array(p2) return p2 - p1 + def proj(vec1, vec2, scalar=False): # Project vec1 onto vec2. Returns a vector in the direction of vec2. scale = np.dot(vec1, vec2) / np.linalg.norm(vec2) @@ -19,40 +22,83 @@ def proj(vec1, vec2, scalar=False): else: return vec2 * scale + def R_x(th): - return np.array([ - 1, 0, 0, 0, - 0, np.cos(th), -np.sin(th), 0, - 0, np.sin(th), np.cos(th), 0, - 0, 0, 0, 1 - ]).reshape(4,4) + return np.array( + [ + 1, + 0, + 0, + 0, + 0, + np.cos(th), + -np.sin(th), + 0, + 0, + np.sin(th), + np.cos(th), + 0, + 0, + 0, + 0, + 1, + ] + ).reshape(4, 4) + def R_y(th): - return np.array([ - np.cos(th), 0, np.sin(th), 0, - 0, 1, 0, 0, - -np.sin(th), 0, np.cos(th), 0, - 0, 0, 0, 1 - ]).reshape(4,4) + return np.array( + [ + np.cos(th), + 0, + np.sin(th), + 0, + 0, + 1, + 0, + 0, + -np.sin(th), + 0, + np.cos(th), + 0, + 0, + 0, + 0, + 1, + ] + ).reshape(4, 4) + def R_z(th): - return np.array([ - np.cos(th), -np.sin(th), 0, 0, - np.sin(th), np.cos(th), 0, 0, - 0, 0, 1, 0, - 0, 0, 0, 1 - ]).reshape(4,4) + return np.array( + [ + np.cos(th), + -np.sin(th), + 0, + 0, + np.sin(th), + np.cos(th), + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + ] + ).reshape(4, 4) + def T(dx, dy, dz): - return np.array([ - 1, 0, 0, dx, - 0, 1, 0, dy, - 0, 0, 1, dz, - 0, 0, 0, 1 - ]).reshape(4,4) + return np.array([1, 0, 0, dx, 0, 1, 0, dy, 0, 0, 1, dz, 0, 0, 0, 1]).reshape(4, 4) + def to_radians(th): - return th*np.pi / 180 + return th * np.pi / 180 + def R_between(v1, v2): if len(v1) != 3 or len(v2) != 3: @@ -62,14 +108,11 @@ def R_between(v1, v2): s = np.linalg.norm(v) I = np.identity(3) - vX = np.array([ - 0, -v[2], v[1], - v[2], 0, -v[0], - -v[1], v[0], 0 - ]).reshape(3,3) - R = I + vX + np.matmul(vX,vX) * ((1-c)/(s**2)) + vX = np.array([0, -v[2], v[1], v[2], 0, -v[0], -v[1], v[0], 0]).reshape(3, 3) + R = I + vX + np.matmul(vX, vX) * ((1 - c) / (s**2)) return R + def approx_equal(v1, v2, epsilon=1e-6): if len(v1) != len(v2): return False @@ -78,5 +121,6 @@ def approx_equal(v1, v2, epsilon=1e-6): return False return True + def euclidean_dist(p1, p2): - return math.sqrt(sum([(a - b)** 2 for a, b in zip(p1, p2)])) + return math.sqrt(sum([(a - b) ** 2 for a, b in zip(p1, p2)])) diff --git a/pomdp_py/utils/misc.py b/pomdp_py/utils/misc.py index 172267f6..c6a798c3 100644 --- a/pomdp_py/utils/misc.py +++ b/pomdp_py/utils/misc.py @@ -1,6 +1,8 @@ """Misc Python utilities""" + from difflib import SequenceMatcher + def remap(oldvalue, oldmin, oldmax, newmin, newmax): if oldmax - oldmin == 0: print("Warning in remap: the old range has size 0") @@ -14,29 +16,30 @@ def json_safe(obj): elif isinstance(obj, (list, tuple)): return [json_safe(item) for item in obj] elif isinstance(obj, dict): - return {json_safe(key):json_safe(value) for key, value in obj.items()} + return {json_safe(key): json_safe(value) for key, value in obj.items()} else: return str(obj) return obj def safe_slice(arr, start, end): - true_start = max(0, min(len(arr)-1, start)) - true_end = max(0, min(len(arr)-1, end)) + true_start = max(0, min(len(arr) - 1, start)) + true_end = max(0, min(len(arr) - 1, end)) return arr[true_start:true_end] + def similar(a, b): # Reference: https://stackoverflow.com/questions/17388213/find-the-similarity-metric-between-two-strings return SequenceMatcher(None, a, b).ratio() class special_char: - left = '\u2190' - up = '\u2191' - right = '\u2192' - down = '\u2193' - longleft = '\u27F5' - longright = '\u27F6' + left = "\u2190" + up = "\u2191" + right = "\u2192" + down = "\u2193" + longleft = "\u27f5" + longright = "\u27f6" hline = "─" vline = "│" diff --git a/pomdp_py/utils/templates.py b/pomdp_py/utils/templates.py index 9d3d146d..e7cb874a 100644 --- a/pomdp_py/utils/templates.py +++ b/pomdp_py/utils/templates.py @@ -1,69 +1,93 @@ """ Some particular implementations of the interface for convenience """ + import pomdp_py import random + class SimpleState(pomdp_py.State): """A SimpleState is a state that stores one piece of hashable data and the equality of two states of this kind depends just on this data""" + def __init__(self, data): self.data = data + def __hash__(self): return hash(self.data) + def __eq__(self, other): if isinstance(other, SimpleState): return self.data == other.data return False + def __ne__(self, other): return not self.__eq__(other) + def __str__(self): return str(self.data) + def __repr__(self): return "SimpleState({})".format(self.data) + class SimpleAction(pomdp_py.Action): """A SimpleAction is an action defined by a string name""" + def __init__(self, name): self.name = name + def __hash__(self): return hash(self.name) + def __eq__(self, other): if isinstance(other, SimpleAction): return self.name == other.name return False + def __ne__(self, other): return not self.__eq__(other) + def __str__(self): return self.name + def __repr__(self): return "SimpleAction({})".format(self.name) + class SimpleObservation(pomdp_py.Observation): """A SimpleObservation is an observation with a piece of hashable data that defines the equality.""" + def __init__(self, data): self.data = data + def __hash__(self): return hash(self.data) + def __eq__(self, other): if isinstance(other, SimpleObservation): return self.data == other.data return False + def __ne__(self, other): return not self.__eq__(other) + def __str__(self): return str(self.data) + def __repr__(self): return "SimpleObservation({})".format(self.data) + class DetTransitionModel(pomdp_py.TransitionModel): """A DetTransitionModel is a deterministic transition model. A probability of 1 - epsilon is given for correct transition, and epsilon is given for incorrect transition.""" + def __init__(self, epsilon=1e-12): self.epsilon = epsilon @@ -81,6 +105,7 @@ class DetObservationModel(pomdp_py.ObservationModel): """A DetTransitionModel is a deterministic transition model. A probability of 1 - epsilon is given for correct transition, and epsilon is given for incorrect transition.""" + def __init__(self, epsilon=1e-12): self.epsilon = epsilon @@ -96,6 +121,7 @@ def sample(self, next_state, action): class DetRewardModel(pomdp_py.RewardModel): """A DetRewardModel is a deterministic reward model (the most typical kind).""" + def reward_func(self, state, action, next_state): raise NotImplementedError @@ -106,6 +132,7 @@ def sample(self, state, action, next_state): def argmax(self, state, action, next_state): return self.sample(state, action, next_state) + class UniformPolicyModel(pomdp_py.RolloutPolicy): def __init__(self, actions): self.actions = actions @@ -127,6 +154,7 @@ class TabularTransitionModel(pomdp_py.TransitionModel): given `weights` is complete, that is, it specifies the probability of all state-action-nextstate combinations """ + def __init__(self, weights): self.weights = weights self._states = set() @@ -137,13 +165,16 @@ def __init__(self, weights): def probability(self, next_state, state, action): if (state, action, next_state) in self.weights: return self.weights[(state, action, next_state)] - raise ValueError("The transition probability for"\ - f"{(state, action, next_state)} is not defined") + raise ValueError( + "The transition probability for" + f"{(state, action, next_state)} is not defined" + ) def sample(self, state, action): next_states = list(self._states) - probs = [self.probability(next_state, state, action) - for next_state in next_states] + probs = [ + self.probability(next_state, state, action) for next_state in next_states + ] return random.choices(next_states, weights=probs, k=1)[0] def get_all_states(self): @@ -155,6 +186,7 @@ class TabularObservationModel(pomdp_py.ObservationModel): (next_state, action, observation) to a probability. This model assumes that the given `weights` is complete. """ + def __init__(self, weights): self.weights = weights self._observations = set() @@ -167,14 +199,18 @@ def probability(self, observation, next_state, action): return self.weights[(next_state, action, observation)] elif (next_state, observation) in self.weights: return self.weights[(next_state, observation)] - raise ValueError("The observation probability for" - f"{(next_state, action, observation)} or {(next_state, observation)}" - "is not defined") + raise ValueError( + "The observation probability for" + f"{(next_state, action, observation)} or {(next_state, observation)}" + "is not defined" + ) def sample(self, next_state, action): observations = list(self._observations) - probs = [self.probability(observation, next_state, action) - for observation in observations] + probs = [ + self.probability(observation, next_state, action) + for observation in observations + ] return random.choices(observations, weights=probs, k=1)[0] def get_all_observations(self): @@ -186,6 +222,7 @@ class TabularRewardModel(pomdp_py.RewardModel): tuple (state, action), or (state, action, next_state) to a probability. This model assumes that the given `rewards` is complete. """ + def __init__(self, rewards): self.rewards = rewards @@ -200,6 +237,8 @@ def sample(self, state, action, *args): if (state, action, next_state) in self.rewards: return self.rewards[(state, action, next_state)] - raise ValueError("The reward is undefined for" - f"state={state}, action={action}" - f"next_state={args}") + raise ValueError( + "The reward is undefined for" + f"state={state}, action={action}" + f"next_state={args}" + ) diff --git a/pomdp_py/utils/typ.py b/pomdp_py/utils/typ.py index ef7e8bb4..6d37f6a4 100644 --- a/pomdp_py/utils/typ.py +++ b/pomdp_py/utils/typ.py @@ -3,28 +3,29 @@ strings for the purpose of displaying them. """ + # Colors on terminal https://stackoverflow.com/a/287944/2893053 class bcolors: - WHITE = '\033[97m' - CYAN = '\033[96m' - MAGENTA = '\033[95m' - BLUE = '\033[94m' - GREEN = '\033[92m' - YELLOW = '\033[93m' - RED = '\033[91m' - BOLD = '\033[1m' - ENDC = '\033[0m' + WHITE = "\033[97m" + CYAN = "\033[96m" + MAGENTA = "\033[95m" + BLUE = "\033[94m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + RED = "\033[91m" + BOLD = "\033[1m" + ENDC = "\033[0m" @staticmethod def disable(): - bcolors.WHITE = '' - bcolors.CYAN = '' - bcolors.MAGENTA = '' - bcolors.BLUE = '' - bcolors.GREEN = '' - bcolors.YELLOW = '' - bcolors.RED = '' - bcolors.ENDC = '' + bcolors.WHITE = "" + bcolors.CYAN = "" + bcolors.MAGENTA = "" + bcolors.BLUE = "" + bcolors.GREEN = "" + bcolors.YELLOW = "" + bcolors.RED = "" + bcolors.ENDC = "" @staticmethod def s(color, content): @@ -32,51 +33,67 @@ def s(color, content): `color` is a constant in `bcolors` class.""" return color + content + bcolors.ENDC + def info(content): return bcolors.s(bcolors.BLUE, content) + def note(content): return bcolors.s(bcolors.YELLOW, content) + def error(content): return bcolors.s(bcolors.GREEN, content) + def warning(content): return bcolors.s(bcolors.YELLOW, content) + def success(content): return bcolors.s(bcolors.GREEN, content) + def bold(content): return bcolors.s(bcolors.BOLD, content) + def white(content): return bcolors.s(bcolors.WHITE, content) + def green(content): return bcolors.s(bcolors.GREEN, content) + def cyan(content): return bcolors.s(bcolors.CYAN, content) + def magenta(content): return bcolors.s(bcolors.MAGENTA, content) + def blue(content): return bcolors.s(bcolors.BLUE, content) + def green(content): return bcolors.s(bcolors.GREEN, content) + def yellow(content): return bcolors.s(bcolors.YELLOW, content) + def red(content): return bcolors.s(bcolors.RED, content) + def white(content): return bcolors.s(bcolors.WHITE, content) + colors = { "white", "green", diff --git a/pyproject.toml b/pyproject.toml index 317b1555..bd28e85c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,3 +37,43 @@ problems = [ [project.urls] Home = "https://github.com/h2r/pomdp-py" + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + "docs", + ".rst", + ".bib", + ".html" +] + +[tool.black] +line-length = 88 +target-version = ["py38", "py39", "py310", "py311", "py312"] +include = "\\.(py|pyx|pxd|pyi)$" +preview = true +force-exclude = ''' +/( + docs +)/ +''' diff --git a/setup.py b/setup.py index 5b01767d..6ed6be1d 100644 --- a/setup.py +++ b/setup.py @@ -5,9 +5,10 @@ import os.path -with open("README.rst", 'r') as f: +with open("README.rst", "r") as f: long_description = f.read() + # Build cython files as extensions def build_extensions(pkg_name, major_submodules): cwd = os.path.abspath(os.path.dirname(__file__)) @@ -22,18 +23,27 @@ def build_extensions(pkg_name, major_submodules): return extensions -extensions = build_extensions("pomdp_py", ["framework", - "algorithms", - "utils", - "representations.distribution", - "representations.belief", - "problems.tiger.cythonize", - "problems.rocksample.cythonize"]) - -setup(ext_modules=cythonize(extensions, - build_dir="build", - compiler_directives={'language_level' : "3"}), - package_data={"pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"], - "pomdp_problems": ["*.pxd", "*.pyx", "*.so", "*.c"]}, - zip_safe=False + +extensions = build_extensions( + "pomdp_py", + [ + "framework", + "algorithms", + "utils", + "representations.distribution", + "representations.belief", + "problems.tiger.cythonize", + "problems.rocksample.cythonize", + ], +) + +setup( + ext_modules=cythonize( + extensions, build_dir="build", compiler_directives={"language_level": "3"} + ), + package_data={ + "pomdp_py": ["*.pxd", "*.pyx", "*.so", "*.c"], + "pomdp_problems": ["*.pxd", "*.pyx", "*.so", "*.c"], + }, + zip_safe=False, ) diff --git a/tests/test_all.py b/tests/test_all.py index 81f43e2e..e9b4796f 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -10,6 +10,7 @@ ABS_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(ABS_DIR) + def main(): parser = argparse.ArgumentParser(description="Running tests.") args = parser.parse_args() @@ -17,12 +18,16 @@ def main(): # load the test modules tests = [] for fname in sorted(os.listdir(ABS_DIR)): - if fname != "test_all.py" and fname.startswith("test") and fname.endswith(".py"): + if ( + fname != "test_all.py" + and fname.startswith("test") + and fname.endswith(".py") + ): test_module = importlib.import_module(fname.split(".py")[0]) tests.append(test_module) for i, test_module in enumerate(tests): - print(typ.bold("[{}/{}] {}".format(i+1, len(tests), test_module.description))) + print(typ.bold("[{}/{}] {}".format(i + 1, len(tests), test_module.description))) old_stdout = sys.stdout try: @@ -31,5 +36,6 @@ def main(): sys.stdout = old_stdout print(" Error:", str(ex)) + if __name__ == "__main__": main() diff --git a/tests/test_conversion_pomdp-solve.py b/tests/test_conversion_pomdp-solve.py index 399432d7..4023b76b 100644 --- a/tests/test_conversion_pomdp-solve.py +++ b/tests/test_conversion_pomdp-solve.py @@ -10,13 +10,15 @@ from pomdp_py.problems.tiger import make_tiger -description="testing conversion to .pomdp file" +description = "testing conversion to .pomdp file" + def remove_files(pattern): file_list = glob.glob(pattern) for file_path in file_list: os.remove(file_path) + def test_pomdp_file_conversion(pomdp_solve_path): print("[testing] test_pomdp_file_conversion") tiger = make_tiger() @@ -27,8 +29,9 @@ def test_pomdp_file_conversion(pomdp_solve_path): assert os.path.exists(filename) print("[testing] Running pomdp-solve on generated %s" % filename) - proc = subprocess.Popen([pomdp_solve_path, "-pomdp", filename], - stdout=subprocess.PIPE) + proc = subprocess.Popen( + [pomdp_solve_path, "-pomdp", filename], stdout=subprocess.PIPE + ) solution_found = False for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): if "Solution found" in line: @@ -43,21 +46,26 @@ def test_pomdp_file_conversion(pomdp_solve_path): remove_files("./*.pg") remove_files("./*.alpha") + def _check_pomdp_solve(): pomdp_solve_path = os.getenv("POMDP_SOLVE_PATH") if pomdp_solve_path is None or not os.path.exists(pomdp_solve_path): - raise FileNotFoundError("To run this test, download pomdp-solve from " - "https://www.pomdp.org/code/. Then, follow the " - "instructions on this web page to compile this software. " - "Finally, set the environment variable POMDP_SOLVE_PATH " - "to be the path to the pomdp-solve binary file " - "generated after compilation, likely located at " - "/path/to/pomdp-solve-/src/pomdp-solve ") + raise FileNotFoundError( + "To run this test, download pomdp-solve from " + "https://www.pomdp.org/code/. Then, follow the " + "instructions on this web page to compile this software. " + "Finally, set the environment variable POMDP_SOLVE_PATH " + "to be the path to the pomdp-solve binary file " + "generated after compilation, likely located at " + "/path/to/pomdp-solve-/src/pomdp-solve " + ) return pomdp_solve_path + def run(): pomdp_solve_path = _check_pomdp_solve() test_pomdp_file_conversion(pomdp_solve_path) + if __name__ == "__main__": run() diff --git a/tests/test_conversion_pomdpx.py b/tests/test_conversion_pomdpx.py index 958743ce..f905d431 100644 --- a/tests/test_conversion_pomdpx.py +++ b/tests/test_conversion_pomdpx.py @@ -5,7 +5,8 @@ from pomdp_py.problems.tiger import make_tiger import os -description="testing conversion to .pomdpx file" +description = "testing conversion to .pomdpx file" + def test_pomdpx_file_conversion(pomdpconvert_path): """ @@ -19,27 +20,31 @@ def test_pomdpx_file_conversion(pomdpconvert_path): filename = "./test_tiger.POMDPX" print("[testing] converting to .pomdpx file") - to_pomdpx_file(tiger.agent, pomdpconvert_path, - output_path=filename, - discount_factor=0.95) + to_pomdpx_file( + tiger.agent, pomdpconvert_path, output_path=filename, discount_factor=0.95 + ) assert os.path.exists(filename), ".pomdpx file not created." print("Pass.") # Remove file os.remove(filename) + def _check_pomdpconvert(): pomdpconvert_path = os.getenv("POMDPCONVERT_PATH") if pomdpconvert_path is None or not os.path.exists(pomdpconvert_path): - raise FileNotFoundError("To run this test, download sarsop from " - "https://github.com/AdaCompNUS/sarsop. Then, follow the " - "instructions on this web page to compile this software. " - "Finally, set the environment variable POMDPCONVERT_PATH " - "to be the path to the pomdpconvert binary file " - "generated after compilation, likely located at " - "/path/to/sarsop/src/pomdpconvert") + raise FileNotFoundError( + "To run this test, download sarsop from " + "https://github.com/AdaCompNUS/sarsop. Then, follow the " + "instructions on this web page to compile this software. " + "Finally, set the environment variable POMDPCONVERT_PATH " + "to be the path to the pomdpconvert binary file " + "generated after compilation, likely located at " + "/path/to/sarsop/src/pomdpconvert" + ) return pomdpconvert_path + def run(): pomdpconvert_path = _check_pomdpconvert() test_pomdpx_file_conversion(pomdpconvert_path) diff --git a/tests/test_hashing_pickling.py b/tests/test_hashing_pickling.py index 25b4859c..8c20824d 100644 --- a/tests/test_hashing_pickling.py +++ b/tests/test_hashing_pickling.py @@ -5,7 +5,8 @@ import subprocess import textwrap -description="testing hashing & pickling some objects" +description = "testing hashing & pickling some objects" + def test_hashing_pickling(): objstate1 = pomdp_py.ObjectState("apple", {"color": "red", "weight": 1.5}) @@ -25,7 +26,7 @@ def test_hashing_pickling(): assert hash(objstate3) == hash(objstate1) assert objstate3 != objstate1 - oos1 = pomdp_py.OOState({1:objstate1, 2:objstate2}) + oos1 = pomdp_py.OOState({1: objstate1, 2: objstate2}) assert hash(oos1.copy()) == hash(oos1) # save oos1 as a pickle file @@ -52,22 +53,25 @@ def test_hashing_pickling(): with open(temp_prog_file, "w") as f: f.write(prog) - proc = subprocess.Popen(["python", temp_prog_file], - stdout=subprocess.PIPE) + proc = subprocess.Popen(["python", temp_prog_file], stdout=subprocess.PIPE) passed = False try: for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): if "Passed" in line: passed = True break - assert passed, "Something wrong - pickled object does not equal to original object" + assert ( + passed + ), "Something wrong - pickled object does not equal to original object" print("Pass.") finally: os.remove(temp_oos1_file) os.remove(temp_prog_file) + def run(): test_hashing_pickling() + if __name__ == "__main__": run() diff --git a/tests/test_particles.py b/tests/test_particles.py index 64e5ed45..5470176c 100644 --- a/tests/test_particles.py +++ b/tests/test_particles.py @@ -3,6 +3,7 @@ description = "testing particle representation" + def test_particles(): random_dist = {} total_prob = 0 @@ -12,7 +13,9 @@ def test_particles(): for v in random_dist: random_dist[v] /= total_prob - particles = pomdp_py.Particles.from_histogram(pomdp_py.Histogram(random_dist), num_particles=int(1e6)) + particles = pomdp_py.Particles.from_histogram( + pomdp_py.Histogram(random_dist), num_particles=int(1e6) + ) for v in random_dist: assert abs(particles[v] - random_dist[v]) <= 2e-3 @@ -41,7 +44,9 @@ def test_weighted_particles(): for v in random_dist: random_dist[v] /= total_prob - particles = pomdp_py.WeightedParticles.from_histogram(pomdp_py.Histogram(random_dist)) + particles = pomdp_py.WeightedParticles.from_histogram( + pomdp_py.Histogram(random_dist) + ) assert abs(sum(particles[v] for v, _ in particles) - 1.0) <= 1e-6 @@ -60,6 +65,7 @@ def test_weighted_particles(): assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe() + def run(): test_particles() test_weighted_particles() diff --git a/tests/test_sarsop.py b/tests/test_sarsop.py index 82d490ca..cd48ed78 100644 --- a/tests/test_sarsop.py +++ b/tests/test_sarsop.py @@ -7,7 +7,8 @@ import os import io -description="testing sarsop" +description = "testing sarsop" + def test_sarsop(pomdpsol_path): print("[testing] test_sarsop") @@ -15,13 +16,18 @@ def test_sarsop(pomdpsol_path): # Building a policy graph print("[testing] solving the tiger problem...") - policy = sarsop(tiger.agent, pomdpsol_path, discount_factor=0.95, - timeout=10, memory=20, precision=0.000001, - remove_generated_files=True, - logfile="test_sarsop.log") + policy = sarsop( + tiger.agent, + pomdpsol_path, + discount_factor=0.95, + timeout=10, + memory=20, + precision=0.000001, + remove_generated_files=True, + logfile="test_sarsop.log", + ) - assert str(policy.plan(tiger.agent)) == "listen",\ - "Bad solution. Test failed." + assert str(policy.plan(tiger.agent)) == "listen", "Bad solution. Test failed." assert os.path.exists("test_sarsop.log") os.remove("test_sarsop.log") @@ -34,14 +40,20 @@ def test_sarsop(pomdpsol_path): action = policy.plan(tiger.agent) observation = tiger.agent.observation_model.sample(true_state, action) reward = tiger.env.reward_model.sample(true_state, action, None) - print("[testing] running computed policy graph"\ - "(step=%d, action=%s, observation=%s, reward=%d)" % (step, action, observation, reward)) + print( + "[testing] running computed policy graph" + "(step=%d, action=%s, observation=%s, reward=%d)" + % (step, action, observation, reward) + ) # belief update - new_belief = pomdp_py.update_histogram_belief(tiger.agent.cur_belief, - action, observation, - tiger.agent.observation_model, - tiger.agent.transition_model) + new_belief = pomdp_py.update_histogram_belief( + tiger.agent.cur_belief, + action, + observation, + tiger.agent.observation_model, + tiger.agent.transition_model, + ) tiger.agent.set_belief(new_belief) assert reward == -1 or reward == 10, "Reward is negative. Failed." @@ -50,18 +62,22 @@ def test_sarsop(pomdpsol_path): assert got_high_reward, "Should have gotten high reward. Failed." print("Pass.") + def _check_pomdpsol(): pomdpsol_path = os.getenv("POMDPSOL_PATH") if pomdpsol_path is None or not os.path.exists(pomdpsol_path): - raise FileNotFoundError("To run this test, download sarsop from" - "https://github.com/AdaCompNUS/sarsop. Then, follow the " - "instructions on this web page to compile this software. " - "Finally, set the environment variable POMDPSOL_PATH " - "to be the path to the pomdpsol binary file " - "generated after compilation, likely located at " - "/path/to/sarsop/src/pomdpsol") + raise FileNotFoundError( + "To run this test, download sarsop from" + "https://github.com/AdaCompNUS/sarsop. Then, follow the " + "instructions on this web page to compile this software. " + "Finally, set the environment variable POMDPSOL_PATH " + "to be the path to the pomdpsol binary file " + "generated after compilation, likely located at " + "/path/to/sarsop/src/pomdpsol" + ) return pomdpsol_path + def run(): pomdpsol_path = _check_pomdpsol() test_sarsop(pomdpsol_path) diff --git a/tests/test_tree_debugger.py b/tests/test_tree_debugger.py index 7f9ebab7..82277b17 100644 --- a/tests/test_tree_debugger.py +++ b/tests/test_tree_debugger.py @@ -3,13 +3,18 @@ import pomdp_py from pomdp_py.utils.debugging import TreeDebugger -description="testing pomdp_py.utils.TreeDebugger" +description = "testing pomdp_py.utils.TreeDebugger" + def test_tree_debugger_tiger(debug_tree=False): tiger_problem = TigerProblem.create("tiger-left", 0.5, 0.15) - pouct = pomdp_py.POUCT(max_depth=4, discount_factor=0.95, - num_sims=4096, exploration_const=200, - rollout_policy=tiger_problem.agent.policy_model) + pouct = pomdp_py.POUCT( + max_depth=4, + discount_factor=0.95, + num_sims=4096, + exploration_const=200, + rollout_policy=tiger_problem.agent.policy_model, + ) pouct.plan(tiger_problem.agent) dd = TreeDebugger(tiger_problem.agent.tree) @@ -33,6 +38,7 @@ def test_tree_debugger_tiger(debug_tree=False): test_planner(tiger_problem, pouct, nsteps=3, debug_tree=debug_tree) + def run(verbose=False, debug_tree=False): test_tree_debugger_tiger(debug_tree=debug_tree) diff --git a/tests/test_vi_pruning.py b/tests/test_vi_pruning.py index 02810a90..8e7e7a76 100644 --- a/tests/test_vi_pruning.py +++ b/tests/test_vi_pruning.py @@ -8,7 +8,8 @@ import os import io -description="testing vi_pruning (pomdp-solve)" +description = "testing vi_pruning (pomdp-solve)" + def test_vi_pruning(pomdp_solve_path, return_policy_graph=True): print("[testing] test_vi_pruning") @@ -16,13 +17,16 @@ def test_vi_pruning(pomdp_solve_path, return_policy_graph=True): # Building a policy graph print("[testing] solving the tiger problem...") - policy = vi_pruning(tiger.agent, pomdp_solve_path, discount_factor=0.95, - options=["-horizon", "100"], - remove_generated_files=True, - return_policy_graph=return_policy_graph) + policy = vi_pruning( + tiger.agent, + pomdp_solve_path, + discount_factor=0.95, + options=["-horizon", "100"], + remove_generated_files=True, + return_policy_graph=return_policy_graph, + ) - assert str(policy.plan(tiger.agent)) == "listen",\ - "Bad solution. Test failed." + assert str(policy.plan(tiger.agent)) == "listen", "Bad solution. Test failed." # Plan with the graph for several steps. So we should get high rewards # eventually in the tiger domain. @@ -32,18 +36,24 @@ def test_vi_pruning(pomdp_solve_path, return_policy_graph=True): action = policy.plan(tiger.agent) observation = tiger.agent.observation_model.sample(true_state, action) reward = tiger.env.reward_model.sample(true_state, action, None) - print("[testing] simulating computed policy graph"\ - "(step=%d, action=%s, observation=%s, reward=%d)" % (step, action, observation, reward)) + print( + "[testing] simulating computed policy graph" + "(step=%d, action=%s, observation=%s, reward=%d)" + % (step, action, observation, reward) + ) # No belief update needed. Just update the policy graph if return_policy_graph: # We use policy graph. No belief update is needed. Just update the policy. policy.update(tiger.agent, action, observation) else: # belief update is needed - new_belief = pomdp_py.update_histogram_belief(tiger.agent.cur_belief, - action, observation, - tiger.agent.observation_model, - tiger.agent.transition_model) + new_belief = pomdp_py.update_histogram_belief( + tiger.agent.cur_belief, + action, + observation, + tiger.agent.observation_model, + tiger.agent.transition_model, + ) tiger.agent.set_belief(new_belief) assert reward == -1 or reward == 10, "Reward is negative. Failed." @@ -52,21 +62,26 @@ def test_vi_pruning(pomdp_solve_path, return_policy_graph=True): assert got_high_reward, "Should have gotten high reward. Failed." print("Pass.") + def _check_pomdp_solve(): pomdp_solve_path = os.getenv("POMDP_SOLVE_PATH") if pomdp_solve_path is None or not os.path.exists(pomdp_solve_path): - raise FileNotFoundError("To run this test, download pomdp-solve from " - "https://www.pomdp.org/code/. Then, follow the " - "instructions on this web page to compile this software. " - "Finally, set the environment variable POMDP_SOLVE_PATH " - "to be the path to the pomdp-solve binary file " - "generated after compilation, likely located at " - "/path/to/pomdp-solve-/src/pomdp-solve ") + raise FileNotFoundError( + "To run this test, download pomdp-solve from " + "https://www.pomdp.org/code/. Then, follow the " + "instructions on this web page to compile this software. " + "Finally, set the environment variable POMDP_SOLVE_PATH " + "to be the path to the pomdp-solve binary file " + "generated after compilation, likely located at " + "/path/to/pomdp-solve-/src/pomdp-solve " + ) return pomdp_solve_path + def run(): pomdp_solve_path = _check_pomdp_solve() test_vi_pruning(pomdp_solve_path, return_policy_graph=False) + if __name__ == "__main__": run()