From 2df963241466f32b45b6651ab10f37ea9da31817 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Mon, 4 Dec 2023 19:55:22 -0800 Subject: [PATCH] fix pexpect child shutdown race * ensure that any stdout data produced by the child after the last `expect()` call is consumed * add regression test --- src/ansible_runner/runner.py | 13 +++++++++++-- .../pexpect_timeout_data_loss/project/pb.yml | 9 +++++++++ test/integration/test_runner.py | 16 +++++++++++++++- 3 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml diff --git a/src/ansible_runner/runner.py b/src/ansible_runner/runner.py index 02abba80a..1b6a103a1 100644 --- a/src/ansible_runner/runner.py +++ b/src/ansible_runner/runner.py @@ -297,10 +297,11 @@ def run(self): child.logfile_read = stdout_handle except pexpect.exceptions.ExceptionPexpect as e: child = collections.namedtuple( - 'MissingProcess', 'exitstatus isalive close' + 'MissingProcess', 'exitstatus isalive expect close' )( exitstatus=127, isalive=lambda: False, + expect=lambda *args, **kwargs: None, close=lambda: None, ) @@ -341,9 +342,17 @@ def run(self): Runner.handle_termination(child.pid) self.timed_out = True + # fix for https://github.com/ansible/ansible-runner/issues/1330 + # Since we're (ab)using pexpect's logging callback as our source of stdout data, we need to pump the stream one last + # time, in case any new output was written by the child between the last return from expect and its termination. Ideally + # this would have an arbitrarily large timeout value as well, in case a ridiculous amount of data was written, but just + # invoking one last pump should cover the vast majority of real-world cases. + child.expect(pexpect.EOF, timeout=5) + + # close the child to ensure no more output will be written before we close the stream interposers + child.close() stdout_handle.close() stderr_handle.close() - child.close() self.rc = child.exitstatus if not (self.timed_out or self.canceled) else 254 if self.canceled: diff --git a/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml b/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml new file mode 100644 index 000000000..ee19ab0a2 --- /dev/null +++ b/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml @@ -0,0 +1,9 @@ +# part of the regression test for https://github.com/ansible/ansible-runner/issues/1330 + +- hosts: localhost + gather_facts: no + tasks: + # sleep significantly longer than the configured pexpect timeout; the cancel callback will inject + # additional delay before the next process status sampling interval that can cause further output to be lost; + # if all is well, we'll do another loop over the child output until it's all been consumed... + - raw: sleep 2 diff --git a/test/integration/test_runner.py b/test/integration/test_runner.py index 553d94716..e00112b08 100644 --- a/test/integration/test_runner.py +++ b/test/integration/test_runner.py @@ -4,12 +4,13 @@ import os import re import sys +import time from test.utils.common import iterate_timeout import pytest -from ansible_runner import Runner +from ansible_runner import Runner, run from ansible_runner.exceptions import AnsibleRunnerException @@ -279,3 +280,16 @@ def test_set_extra_vars(rc): with open(os.path.join(rc.artifact_dir, 'stdout')) as f: if 'hello there' in f.read(): break + + +# regression test for https://github.com/ansible/ansible-runner/issues/1330 +def test_pexpect_timeout(project_fixtures): + r = run( + private_data_dir=str(project_fixtures / 'pexpect_timeout_data_loss'), + playbook='pb.yml', + settings={"pexpect_timeout": 0.1}, # set the pexpect timeout very low + cancel_callback=lambda: time.sleep(3) or False, # induce enough delay in the child polling loop that the child will exit before being polled again + ) + + # ensure we got playbook_on_stats; if pexpect ate it, we won't... + assert any(ev for ev in r.events if ev.get('event', None) == 'playbook_on_stats')