diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 28f8950..d392b3b 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -622,6 +622,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): super(ChassisdDaemon, self).__init__(log_identifier) self.stop = threading.Event() + self.loop_interval = CHASSIS_INFO_UPDATE_PERIOD_SECS # Override signal handler from DaemonBase def signal_handler(self, sig, frame): @@ -669,25 +670,32 @@ class ChassisdDaemon(daemon_base.DaemonBase): self.log_error("Chassisd not supported for this platform") sys.exit(CHASSIS_NOT_SUPPORTED) - # Start configuration manager task on supervisor module - if self.module_updater.supervisor_slot == self.module_updater.my_slot: - config_manager = ConfigManagerTask() - config_manager.task_run() - else: - config_manager = None - - # Start main loop - self.log_info("Start daemon main loop") - - while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS): - self.module_updater.module_db_update() - self.module_updater.check_midplane_reachability() - self.module_updater.module_down_chassis_db_cleanup() - - self.log_info("Stop daemon main loop") - - if config_manager is not None: - config_manager.task_stop() + try: + # Start configuration manager task on supervisor module + if self.module_updater.supervisor_slot == self.module_updater.my_slot: + self.config_manager = ConfigManagerTask() + self.config_manager.task_run() + else: + self.config_manager = None + + # Start main loop + self.log_info("Start daemon main loop") + + while not self.stop.wait(self.loop_interval): + self.module_updater.module_db_update() + self.module_updater.check_midplane_reachability() + self.module_updater.module_down_chassis_db_cleanup() + + self.log_info("Stop daemon main loop") + + if config_manager is not None: + config_manager.task_stop() + finally: + # If we don't cleanup the config_manager process the chassisd process + # won't die when an exception occurs. + # https://github.com/sonic-net/sonic-buildimage/issues/24775 + if self.config_manager is not None: + self.config_manager.task_stop() # Delete all the information from DB and then exit self.module_updater.deinit() diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 83418a9..b072a33 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1,6 +1,8 @@ import os import sys import mock +import pytest +import time from imp import load_source from mock import Mock, MagicMock, patch @@ -968,3 +970,23 @@ def test_chassis_db_bootup_with_empty_slot(): assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] assert down_module_lc1_key in sup_module_updater.down_modules.keys() +def test_chassis_daemon_assertion(): + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER) + + # Reduce wait time from 10s to 1s to speed up test + daemon_chassisd.loop_interval=1 + + # Simulate an Assertion occurring in the forever loop + with patch('chassisd.ModuleUpdater.module_db_update', MagicMock(side_effect=AssertionError)): + with pytest.raises(AssertionError): + daemon_chassisd.run() + + # Wait for the child thread to die + start = time.time() + timeout = 30 + while time.time() - start < timeout: + if not daemon_chassisd.config_manager._task_process.is_alive(): + break + time.sleep(1) + else: + assert False, "config_manager thread never died"