From eaedd2ebd45dd6e1820cf6c3070dbacc2bf133d9 Mon Sep 17 00:00:00 2001 From: Longxiang Lyu Date: Mon, 2 Feb 2026 05:33:57 +0000 Subject: [PATCH 1/3] Fix `parallel_run` fork deadlock Signed-off-by: Longxiang Lyu --- tests/common/helpers/parallel.py | 50 ++++++++++++++++++++++++++++++++ tests/conftest.py | 4 +++ 2 files changed, 54 insertions(+) diff --git a/tests/common/helpers/parallel.py b/tests/common/helpers/parallel.py index e9101646b9e..02ff9f8cd92 100644 --- a/tests/common/helpers/parallel.py +++ b/tests/common/helpers/parallel.py @@ -5,10 +5,12 @@ import shutil import signal import tempfile +import threading import time import traceback from multiprocessing import Process, Manager, Pipe, TimeoutError from multiprocessing.pool import ThreadPool +from ansible.executor.process.worker import WorkerProcess from psutil import wait_procs @@ -17,6 +19,50 @@ logger = logging.getLogger(__name__) +def patch_ansible_worker_process(): + """Patch AnsibleWorkerProcess to avoid logging deadlock after fork.""" + + def start(self): + self._save_stdin() + try: + return super(WorkerProcess, self).start() + finally: + self._new_stdin.close() + + WorkerProcess.start = start + + +# NOTE: https://github.com/google/python-atfork/blob/main/atfork/stdlib_fixer.py +# This is to avoid any deadlock issues with logging module after fork. +_forked_handlers = set() +_forked_handlers_lock = threading.Lock() +os.register_at_fork(before=logging._acquireLock, + after_in_parent=logging._releaseLock, + after_in_child=logging._releaseLock) + + +def _fix_logging_handler_fork_lock(): + """Prevent logging handlers from deadlocking after fork.""" + # Collect all loggers including root + loggers = [logging.getLogger()] + list(logging.Logger.manager.loggerDict.values()) + handlers = set() + for logger in loggers: + if hasattr(logger, 'handlers'): + handlers.update(logger.handlers) + for handler in handlers: + new_handlers = [] + with _forked_handlers_lock: + if handler not in _forked_handlers and handler.lock is not None: + os.register_at_fork(before=handler.lock.acquire, + after_in_parent=handler.lock.release, + after_in_child=handler.lock.release) + new_handlers.append(handler) + _forked_handlers.add(handler) + + if new_handlers: + logging.debug("Add handler %s to forked handlers list", new_handlers) + + class SonicProcess(Process): """ Wrapper class around multiprocessing.Process that would capture the exception thrown if the Process throws @@ -136,6 +182,10 @@ def force_terminate(workers, init_result): ) if timeout else None failed_processes = {} + # Before spawning the child process, ensure current thread is + # holding the logging handler locks to avoid deadlock in child process. + _fix_logging_handler_fork_lock() + while tasks_done < total_tasks: # If execution time of processes exceeds timeout, need to force # terminate them all. diff --git a/tests/conftest.py b/tests/conftest.py index ee4ff91414f..105ce4db265 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,6 +70,7 @@ from tests.common.utilities import InterruptableThread from tests.common.plugins.ptfadapter.dummy_testutils import DummyTestUtils from tests.common.helpers.multi_thread_utils import SafeThreadPoolExecutor +from tests.common.helpers.parallel import patch_ansible_worker_process import tests.common.gnmi_setup as gnmi_setup @@ -113,6 +114,9 @@ 'tests.common.fixtures.duthost_utils') +patch_ansible_worker_process() + + def pytest_addoption(parser): parser.addoption("--testbed", action="store", default=None, help="testbed name") parser.addoption("--testbed_file", action="store", default=None, help="testbed file name") From e2ac49b81e3d43f4d4c4fc499e95125a3d1963ad Mon Sep 17 00:00:00 2001 From: Longxiang Lyu Date: Mon, 2 Feb 2026 12:08:25 +0000 Subject: [PATCH 2/3] Fix comments Signed-off-by: Longxiang Lyu --- tests/common/helpers/parallel.py | 4 ++-- tests/conftest.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/common/helpers/parallel.py b/tests/common/helpers/parallel.py index 02ff9f8cd92..4ba26cb632f 100644 --- a/tests/common/helpers/parallel.py +++ b/tests/common/helpers/parallel.py @@ -41,7 +41,7 @@ def start(self): after_in_child=logging._releaseLock) -def _fix_logging_handler_fork_lock(): +def fix_logging_handler_fork_lock(): """Prevent logging handlers from deadlocking after fork.""" # Collect all loggers including root loggers = [logging.getLogger()] + list(logging.Logger.manager.loggerDict.values()) @@ -184,7 +184,7 @@ def force_terminate(workers, init_result): # Before spawning the child process, ensure current thread is # holding the logging handler locks to avoid deadlock in child process. - _fix_logging_handler_fork_lock() + fix_logging_handler_fork_lock() while tasks_done < total_tasks: # If execution time of processes exceeds timeout, need to force diff --git a/tests/conftest.py b/tests/conftest.py index 105ce4db265..c80a820af50 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -71,6 +71,7 @@ from tests.common.plugins.ptfadapter.dummy_testutils import DummyTestUtils from tests.common.helpers.multi_thread_utils import SafeThreadPoolExecutor from tests.common.helpers.parallel import patch_ansible_worker_process +from tests.common.helpers.parallel import fix_logging_handler_fork_lock import tests.common.gnmi_setup as gnmi_setup @@ -115,7 +116,7 @@ patch_ansible_worker_process() - +fix_logging_handler_fork_lock() def pytest_addoption(parser): parser.addoption("--testbed", action="store", default=None, help="testbed name") From 554e1c60ccc74f67966b2e4c58c3d0bbd728dd99 Mon Sep 17 00:00:00 2001 From: Longxiang Lyu Date: Mon, 2 Feb 2026 12:15:58 +0000 Subject: [PATCH 3/3] Fix syntax Signed-off-by: Longxiang Lyu --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index c80a820af50..ee5d08e7de3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -118,6 +118,7 @@ patch_ansible_worker_process() fix_logging_handler_fork_lock() + def pytest_addoption(parser): parser.addoption("--testbed", action="store", default=None, help="testbed name") parser.addoption("--testbed_file", action="store", default=None, help="testbed file name")