From 2df963241466f32b45b6651ab10f37ea9da31817 Mon Sep 17 00:00:00 2001
From: Matt Davis <nitzmahone@redhat.com>
Date: Mon, 4 Dec 2023 19:55:22 -0800
Subject: [PATCH] fix pexpect child shutdown race

* ensure that any stdout data produced by the child after the last `expect()` call is consumed
* add regression test
---
 src/ansible_runner/runner.py                     | 13 +++++++++++--
 .../pexpect_timeout_data_loss/project/pb.yml     |  9 +++++++++
 test/integration/test_runner.py                  | 16 +++++++++++++++-
 3 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml

diff --git a/src/ansible_runner/runner.py b/src/ansible_runner/runner.py
index 02abba80a..1b6a103a1 100644
--- a/src/ansible_runner/runner.py
+++ b/src/ansible_runner/runner.py
@@ -297,10 +297,11 @@ def run(self):
                 child.logfile_read = stdout_handle
             except pexpect.exceptions.ExceptionPexpect as e:
                 child = collections.namedtuple(
-                    'MissingProcess', 'exitstatus isalive close'
+                    'MissingProcess', 'exitstatus isalive expect close'
                 )(
                     exitstatus=127,
                     isalive=lambda: False,
+                    expect=lambda *args, **kwargs: None,
                     close=lambda: None,
                 )
 
@@ -341,9 +342,17 @@ def run(self):
                     Runner.handle_termination(child.pid)
                     self.timed_out = True
 
+            # fix for https://github.com/ansible/ansible-runner/issues/1330
+            # Since we're (ab)using pexpect's logging callback as our source of stdout data, we need to pump the stream one last
+            # time, in case any new output was written by the child between the last return from expect and its termination. Ideally
+            # this would have an arbitrarily large timeout value as well, in case a ridiculous amount of data was written, but just
+            # invoking one last pump should cover the vast majority of real-world cases.
+            child.expect(pexpect.EOF, timeout=5)
+
+            # close the child to ensure no more output will be written before we close the stream interposers
+            child.close()
             stdout_handle.close()
             stderr_handle.close()
-            child.close()
             self.rc = child.exitstatus if not (self.timed_out or self.canceled) else 254
 
         if self.canceled:
diff --git a/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml b/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml
new file mode 100644
index 000000000..ee19ab0a2
--- /dev/null
+++ b/test/fixtures/projects/pexpect_timeout_data_loss/project/pb.yml
@@ -0,0 +1,9 @@
+# part of the regression test for https://github.com/ansible/ansible-runner/issues/1330
+
+- hosts: localhost
+  gather_facts: no
+  tasks:
+    # sleep significantly longer than the configured pexpect timeout; the cancel callback will inject
+    # additional delay before the next process status sampling interval that can cause further output to be lost;
+    # if all is well, we'll do another loop over the child output until it's all been consumed...
+    - raw: sleep 2
diff --git a/test/integration/test_runner.py b/test/integration/test_runner.py
index 553d94716..e00112b08 100644
--- a/test/integration/test_runner.py
+++ b/test/integration/test_runner.py
@@ -4,12 +4,13 @@
 import os
 import re
 import sys
+import time
 
 from test.utils.common import iterate_timeout
 
 import pytest
 
-from ansible_runner import Runner
+from ansible_runner import Runner, run
 from ansible_runner.exceptions import AnsibleRunnerException
 
 
@@ -279,3 +280,16 @@ def test_set_extra_vars(rc):
         with open(os.path.join(rc.artifact_dir, 'stdout')) as f:
             if 'hello there' in f.read():
                 break
+
+
+# regression test for https://github.com/ansible/ansible-runner/issues/1330
+def test_pexpect_timeout(project_fixtures):
+    r = run(
+        private_data_dir=str(project_fixtures / 'pexpect_timeout_data_loss'),
+        playbook='pb.yml',
+        settings={"pexpect_timeout": 0.1},  # set the pexpect timeout very low
+        cancel_callback=lambda: time.sleep(3) or False,  # induce enough delay in the child polling loop that the child will exit before being polled again
+    )
+
+    # ensure we got playbook_on_stats; if pexpect ate it, we won't...
+    assert any(ev for ev in r.events if ev.get('event', None) == 'playbook_on_stats')