Skip to content

Commit 9a28660

Browse files
committed
better control of the exit signal thread
Signed-off-by: Sylvain Hellegouarch <sh@defuze.org>
1 parent 505c33f commit 9a28660

File tree

3 files changed

+42
-12
lines changed

3 files changed

+42
-12
lines changed

CHANGELOG.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,16 @@
22

33
## [Unreleased][]
44

5-
[Unreleased]: https://github.com/chaostoolkit/chaostoolkit-addons/compare/0.7.0...HEAD
5+
[Unreleased]: https://github.com/chaostoolkit/chaostoolkit-addons/compare/0.8.0...HEAD
6+
7+
## [0.8.0][]
8+
9+
[0.8.0]: https://github.com/chaostoolkit/chaostoolkit-addons/compare/0.7.0...0.8.0
10+
11+
### Changed
12+
13+
- Reworked how we trigger the actual exit call so that we never block the
14+
threads playing the safeguards. Now only one thread can trigger the exit.
615

716
## [0.7.0][]
817

chaosaddons/controls/safeguards.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def __init__(self) -> None:
111111
self._lock = threading.Lock()
112112
self._interrupted = False
113113
self._setup = False
114+
self.triggered_by = None
114115

115116
@property
116117
def interrupted(self) -> bool:
@@ -145,10 +146,12 @@ def prepare(self, probes: List[Probe]) -> None:
145146
now_count += 1
146147

147148
self.repeating_until = threading.Event()
149+
self.wait_for_interruption = threading.Event()
148150
self.now_all_done = threading.Barrier(parties=now_count + 1)
149151
self.now = ThreadPoolExecutor(max_workers=now_count or 1)
150152
self.once = ThreadPoolExecutor(max_workers=once_count or 1)
151153
self.repeating = ThreadPoolExecutor(max_workers=repeating_count or 1)
154+
self.interrupter = ThreadPoolExecutor(max_workers=1)
152155
self._setup = True
153156

154157
def run(self, experiment: Experiment, probes: List[Probe],
@@ -161,6 +164,7 @@ def run(self, experiment: Experiment, probes: List[Probe],
161164
or not), then this call blocks until all these pre-check safeguards
162165
are completed.
163166
"""
167+
self.interrupter.submit(self._wait_interruption)
164168
for p in probes:
165169
f = None
166170
if p.get("frequency"):
@@ -186,6 +190,26 @@ def run(self, experiment: Experiment, probes: List[Probe],
186190
# this allows the experiment to block until these are passed
187191
self.now_all_done.wait()
188192

193+
def interrupt_now(self, triggered_by: str) -> None:
194+
with self._lock:
195+
self.triggered_by = triggered_by
196+
197+
self.wait_for_interruption.set()
198+
199+
def _wait_interruption(self) -> None:
200+
self.wait_for_interruption.wait()
201+
202+
if not self.triggered_by:
203+
return None
204+
205+
if not self.interrupted:
206+
self.interrupted = True
207+
if not experiment_finished.is_set():
208+
logger.critical(
209+
"Safeguard '{}' triggered the end of the experiment".format(
210+
self.triggered_by))
211+
exit_gracefully()
212+
189213
def _log_finished(self, f: Future, probe: Probe) -> None:
190214
"""
191215
Logs each safeguard when they terminated.
@@ -206,10 +230,12 @@ def terminate(self) -> None:
206230
if not self._setup:
207231
return None
208232

233+
self.wait_for_interruption.set()
209234
self.repeating_until.set()
210-
self.now.shutdown(wait=True)
211-
self.repeating.shutdown(wait=True)
212-
self.once.shutdown(wait=True)
235+
self.now.shutdown(wait=False, cancel_futures=False)
236+
self.repeating.shutdown(wait=False, cancel_futures=False)
237+
self.once.shutdown(wait=False, cancel_futures=False)
238+
logger.debug("Guardian is now terminated")
213239

214240

215241
guardian = Guardian()
@@ -292,13 +318,8 @@ def interrupt_experiment_on_unhealthy_probe(guard: Guardian, probe: Probe,
292318
checked = within_tolerance(
293319
tolerance, run["output"], configuration=configuration,
294320
secrets=secrets)
295-
if not checked and not guard.interrupted:
296-
guard.interrupted = True
297-
if not experiment_finished.is_set():
298-
logger.critical(
299-
"Safeguard '{}' triggered the end of the experiment".format(
300-
probe["name"]))
301-
exit_gracefully()
321+
if not checked:
322+
guard.interrupt_now(probe["name"])
302323

303324

304325
def execute_activity(experiment: Experiment, probe: Probe,

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55
setup(
66
use_scm_version=True,
77
name="chaostoolkit-addons",
8-
version="0.7.0",
8+
version="0.8.0",
99
)

0 commit comments

Comments
 (0)