Skip to content

Commit c19f69d

Browse files
committed
Add retry functionality to monitor status request
1 parent 9c48a1c commit c19f69d

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

src/isar/config/settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def __init__(self) -> None:
5656
# Number of attempts to initiate a step or mission before cancelling
5757
INITIATE_FAILURE_COUNTER_LIMIT: int = Field(default=10)
5858

59+
# Number of attempts to request a step status in monitor before cancelling
60+
REQUEST_STATUS_FAILURE_COUNTER_LIMIT: int = Field(default=3)
61+
5962
# Number of attempts to stop the robot before giving up
6063
STOP_ROBOT_ATTEMPTS_LIMIT: int = Field(default=10)
6164

src/isar/state_machine/states/monitor.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from transitions import State
88

99
from isar.mission_planner.task_selector_interface import TaskSelectorStop
10+
from isar.config.settings import settings
1011
from isar.services.utilities.threaded_request import (
1112
ThreadedRequest,
1213
ThreadedRequestNotFinishedError,
@@ -17,6 +18,7 @@
1718
RobotMissionStatusException,
1819
RobotRetrieveInspectionException,
1920
RobotStepStatusException,
21+
RobotCommunicationTimeoutException,
2022
)
2123
from robot_interface.models.inspection.inspection import Inspection
2224
from robot_interface.models.mission.mission import Mission
@@ -32,6 +34,10 @@ class Monitor(State):
3234
def __init__(self, state_machine: "StateMachine") -> None:
3335
super().__init__(name="monitor", on_enter=self.start, on_exit=self.stop)
3436
self.state_machine: "StateMachine" = state_machine
37+
self.request_status_failure_counter: int = 0
38+
self.request_status_failure_counter_limit: int = (
39+
settings.REQUEST_STATUS_FAILURE_COUNTER_LIMIT
40+
)
3541

3642
self.logger = logging.getLogger("state_machine")
3743
self.step_status_thread: Optional[ThreadedRequest] = None
@@ -61,7 +67,6 @@ def _run(self) -> None:
6167
status_function=self.state_machine.robot.step_status,
6268
thread_name="State Machine Monitor Get Step Status",
6369
)
64-
6570
try:
6671
status: Union[StepStatus, MissionStatus] = (
6772
self.step_status_thread.get_output()
@@ -70,6 +75,34 @@ def _run(self) -> None:
7075
time.sleep(self.state_machine.sleep_time)
7176
continue
7277

78+
except RobotCommunicationTimeoutException as e:
79+
self.state_machine.current_mission.error_message = ErrorMessage(
80+
error_reason=e.error_reason, error_description=e.error_description
81+
)
82+
self.step_status_thread = None
83+
self.request_status_failure_counter += 1
84+
self.logger.warning(
85+
f"Monitoring step {self.state_machine.current_step.id} failed #: "
86+
f"{self.request_status_failure_counter} failed because: {e.error_description}"
87+
)
88+
89+
if (
90+
self.request_status_failure_counter
91+
>= self.request_status_failure_counter_limit
92+
):
93+
self.state_machine.current_step.error_message = ErrorMessage(
94+
error_reason=e.error_reason,
95+
error_description=e.error_description,
96+
)
97+
self.logger.error(
98+
f"Step will be cancelled after failing to get step status "
99+
f"{self.request_status_failure_counter} times because: "
100+
f"{e.error_description}"
101+
)
102+
status = StepStatus.Failed
103+
else:
104+
continue
105+
73106
except RobotStepStatusException as e:
74107
self.state_machine.current_step.error_message = ErrorMessage(
75108
error_reason=e.error_reason, error_description=e.error_description
@@ -98,6 +131,8 @@ def _run(self) -> None:
98131
f"Retrieving the status failed because: {e.error_description}"
99132
)
100133

134+
self.request_status_failure_counter = 0
135+
101136
if isinstance(status, StepStatus):
102137
self.state_machine.current_step.status = status
103138
elif isinstance(status, MissionStatus):

src/robot_interface/models/exceptions/robot_exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
class ErrorReason(str, Enum):
77
RobotCommunicationException: str = "robot_communication_exception"
8+
RobotCommunicationTimeoutException: str = "robot_communication_timeout_exception"
89
RobotInfeasibleStepException: str = "robot_infeasible_step_exception"
910
RobotInfeasibleMissionException: str = "robot_infeasible_mission_exception"
1011
RobotMissionStatusException: str = "robot_mission_status_exception"
@@ -49,6 +50,18 @@ def __init__(self, error_description: str) -> None:
4950
pass
5051

5152

53+
# An exception which should be thrown by the robot package if the communication has timed
54+
# out and ISAR should retry the request.
55+
class RobotCommunicationTimeoutException(RobotException):
56+
def __init__(self, error_description: str) -> None:
57+
super().__init__(
58+
error_reason=ErrorReason.RobotCommunicationTimeoutException,
59+
error_description=error_description,
60+
)
61+
62+
pass
63+
64+
5265
# An exception which should be thrown by the robot package if it is unable to start the
5366
# current step.
5467
class RobotInfeasibleStepException(RobotException):

0 commit comments

Comments
 (0)