Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/aks-agent/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ To release a new version, please select a new version number (usually plus 1 to

Pending
+++++++
* Fix stdin reading hang in CI/CD pipelines by using select with timeout for non-interactive mode.
* Update pytest marker registration and fix datetime.utcnow() deprecation warning in tests.
* Improve test framework with real-time stderr output visibility and subprocess timeout.

1.0.0b6
+++++++
Expand Down
29 changes: 21 additions & 8 deletions src/aks-agent/azext_aks_agent/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import os
import select
import sys

from azext_aks_agent._consts import (
Expand Down Expand Up @@ -123,7 +124,7 @@ def _should_refresh_toolsets(requested_mode: str, user_refresh_request: bool) ->
return False


# pylint: disable=too-many-locals
# pylint: disable=too-many-locals,too-many-branches
def aks_agent(
cmd,
resource_group_name,
Expand Down Expand Up @@ -177,13 +178,25 @@ def aks_agent(

# Detect and read piped input
piped_data = None
if not sys.stdin.isatty():
piped_data = sys.stdin.read().strip()
if interactive:
console.print(
"[bold yellow]Interactive mode disabled when reading piped input[/bold yellow]"
)
interactive = False
# In non-interactive mode with a prompt, we shouldn't try to read stdin
# as it may hang in CI/CD environments. Only read stdin if:
# 1. Not a TTY (indicating piped input)
# 2. Interactive mode is enabled (allows stdin reading)
should_check_stdin = not sys.stdin.isatty() and interactive

if should_check_stdin:
try:
# Use select with timeout to avoid hanging
# Check if data is available with 100ms timeout
if select.select([sys.stdin], [], [], 0.1)[0]:
piped_data = sys.stdin.read().strip()
console.print(
"[bold yellow]Interactive mode disabled when reading piped input[/bold yellow]"
)
interactive = False
except Exception: # pylint: disable=broad-exception-caught
# Continue without piped data if stdin reading fails
pass

# Determine MCP mode and smart refresh logic
use_aks_mcp = bool(use_aks_mcp)
Expand Down
129 changes: 116 additions & 13 deletions src/aks-agent/azext_aks_agent/tests/evals/test_ask_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
import os
import shlex
import subprocess
import textwrap
import sys
import threading
from datetime import datetime, timezone
from pathlib import Path
from time import perf_counter
from typing import Iterable


Expand Down Expand Up @@ -40,6 +45,26 @@
ITERATIONS = int(os.environ.get("ITERATIONS", "1"))
BRAINTRUST_UPLOADER = BraintrustUploader(os.environ)


def _log(message: str) -> None:
"""Emit a timestamped log line that pytest `-s` will surface immediately."""
timestamp = datetime.now(timezone.utc).isoformat(timespec="seconds")
print(f"[{timestamp}] {message}", flush=True)


def _summarise_command(parts: Iterable[str]) -> str:
"""Return a shell-style command string for debugging output."""
sequence = parts if isinstance(parts, list) else list(parts)
if hasattr(shlex, "join"):
return shlex.join(sequence)
# ``shlex.join`` was added in Python 3.8; keep a safe fallback just in case.
return " ".join(shlex.quote(part) for part in sequence)


def _preview_output(output: str, *, limit: int = 400) -> str:
"""Provide a trimmed preview of command output for quick debugging."""
return textwrap.shorten(output.strip(), width=limit, placeholder=" …")

pytestmark = [
pytest.mark.skipif(
not RUN_LIVE,
Expand Down Expand Up @@ -90,14 +115,59 @@ def _build_command(prompt: str, model: str, resource_group: str, cluster_name: s


def _run_cli(command: Iterable[str], env: dict[str, str]) -> str:
command_list = list(command)
command_display = _summarise_command(command_list)
_log(f"Invoking AKS Agent CLI: {command_display}")
start = perf_counter()

timeout_seconds = 600 # 10 minutes timeout

try:
result = subprocess.run( # noqa: S603
list(command),
check=True,
capture_output=True,
# Use Popen for real-time output visibility
process = subprocess.Popen( # noqa: S603
command_list,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=env,
)

# Thread to print stderr in real-time
stderr_lines = []
def print_stderr():
for line in iter(process.stderr.readline, ''):
if line:
print(f"[STDERR] {line.rstrip()}", file=sys.stderr, flush=True)
stderr_lines.append(line)

stderr_thread = threading.Thread(target=print_stderr, daemon=True)
stderr_thread.start()

# Wait with timeout
try:
stdout, _ = process.communicate(timeout=timeout_seconds)
stderr_thread.join(timeout=1)
stderr = ''.join(stderr_lines)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr_remainder = process.communicate()
stderr = ''.join(stderr_lines) + (stderr_remainder or '')
_log(f"[ERROR] CLI command timed out after {timeout_seconds}s")
pytest.fail(
f"AKS Agent CLI call timed out after {timeout_seconds}s\n"
f"Command: {command_display}\n"
f"Stdout: {stdout}\n"
f"Stderr: {stderr}"
)

if process.returncode != 0:
raise subprocess.CalledProcessError(
process.returncode, command_list, stdout, stderr
)

result = subprocess.CompletedProcess(
command_list, process.returncode, stdout, stderr
)
except subprocess.CalledProcessError as exc: # pragma: no cover - live failure path
output = exc.stdout or ""
stderr = exc.stderr or ""
Expand All @@ -109,13 +179,28 @@ def _run_cli(command: Iterable[str], env: dict[str, str]) -> str:
f"Stdout: {output}\n"
f"Stderr: {stderr}"
)
duration = perf_counter() - start
stdout_preview = _preview_output(result.stdout)
stderr_preview = _preview_output(result.stderr) if result.stderr else None
_log(
f"AKS Agent CLI completed in {duration:.1f}s with stdout preview: {stdout_preview}"
)
if stderr_preview:
_log(
f"AKS Agent CLI stderr preview: {stderr_preview}"
)
return result.stdout


def _run_commands(
commands: list[str], env: dict[str, str], label: str, scenario: Scenario
) -> None:
if not commands:
_log(f"[{label}] {scenario.name}: no commands to run")
return
for cmd in commands:
_log(f"[{label}] {scenario.name}: running shell command: {cmd}")
start = perf_counter()
try:
completed = subprocess.run( # noqa: S603
cmd,
Expand All @@ -137,9 +222,25 @@ def _run_commands(
f"Stderr: {stderr}"
)
else:
duration = perf_counter() - start
# Provide quick visibility into command results when debugging failures.
if completed.stdout:
print(f"[{label}] {scenario.name}: {completed.stdout.strip()}")
stdout_preview = _preview_output(completed.stdout)
_log(
f"[{label}] {scenario.name}: succeeded in {duration:.1f}s; stdout preview: {stdout_preview}"
)
else:
_log(
f"[{label}] {scenario.name}: succeeded in {duration:.1f}s; no stdout produced"
)
if completed.stderr:
stderr_preview = _preview_output(completed.stderr)
_log(
f"[{label}] {scenario.name}: stderr preview: {stderr_preview}"
)
_log(
f"[{label}] {scenario.name}: completed {len(commands)} command(s)"
)


def _scenario_params() -> list:
Expand All @@ -165,6 +266,7 @@ def test_ask_agent_live(
request: pytest.FixtureRequest,
) -> None:
iteration_label = f"[iteration {iteration + 1}/{ITERATIONS}]"
_log(f"{iteration_label} starting scenario {scenario.name}")
if RUN_LIVE:
env = _load_env()

Expand All @@ -178,7 +280,7 @@ def test_ask_agent_live(
env.update(scenario.env_overrides)

if iteration == 0 and scenario.before_commands and not aks_skip_setup:
print(f"{iteration_label} running setup commands for {scenario.name}")
_log(f"{iteration_label} running setup commands for {scenario.name}")
_run_commands(scenario.before_commands, env, "setup", scenario)

command = _build_command(
Expand All @@ -188,7 +290,7 @@ def test_ask_agent_live(
cluster_name=cluster_name,
)

print(f"{iteration_label} invoking AKS Agent CLI for {scenario.name}")
_log(f"{iteration_label} invoking AKS Agent CLI for {scenario.name}")
try:
raw_output = _run_cli(command, env)
answer = ""
Expand Down Expand Up @@ -216,11 +318,11 @@ def test_ask_agent_live(
classifier_rationale = classifier_result.metadata.get(
"rationale", ""
)
print(
_log(
f"{iteration_label} classifier score for {scenario.name}: {classifier_score}"
)
if classifier_score is None:
print(
_log(
f"{iteration_label} classifier returned no score for {scenario.name}; falling back to substring checks"
)
else:
Expand All @@ -230,7 +332,7 @@ def test_ask_agent_live(
if not error_message:
error_message = "Classifier judged answer incorrect"
else:
print(
_log(
f"{iteration_label} classifier unavailable for {scenario.name}; falling back to substring checks"
)

Expand Down Expand Up @@ -280,21 +382,21 @@ def test_ask_agent_live(

if GENERATE_MOCKS:
mock_path = save_mock_answer(scenario.mock_path, answer)
print(f"{iteration_label} [mock] wrote response to {mock_path}")
_log(f"{iteration_label} [mock] wrote response to {mock_path}")
finally:
if (
iteration == ITERATIONS - 1
and scenario.after_commands
and not aks_skip_cleanup
):
print(f"{iteration_label} running cleanup commands for {scenario.name}")
_log(f"{iteration_label} running cleanup commands for {scenario.name}")
_run_commands(scenario.after_commands, env, "cleanup", scenario)
else:
if GENERATE_MOCKS:
pytest.fail("GENERATE_MOCKS requires RUN_LIVE=true")
try:
answer = load_mock_answer(scenario.mock_path)
print(f"{iteration_label} replayed mock response for {scenario.name}")
_log(f"{iteration_label} replayed mock response for {scenario.name}")
except FileNotFoundError:
pytest.skip(f"Mock response missing for scenario {scenario.name}; rerun with RUN_LIVE=true GENERATE_MOCKS=true")

Expand Down Expand Up @@ -328,5 +430,6 @@ def test_ask_agent_live(
_set_user_property(request, 'braintrust_root_span_id', str(root_span_id))
if url:
_set_user_property(request, 'braintrust_experiment_url', str(url))
_log(f"{iteration_label} completed scenario {scenario.name} (passed={passed})")
if not passed:
pytest.fail(f"Scenario {scenario.name}: {error}\nAI answer:\n{answer}")
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,8 @@
import os
import sys
import unittest
from types import SimpleNamespace
from unittest.mock import MagicMock, Mock, call, patch
from unittest.mock import Mock, patch

from azext_aks_agent._consts import (CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY,
CONST_AGENT_NAME,
CONST_AGENT_NAME_ENV_KEY)
from azext_aks_agent.agent.agent import aks_agent, init_log
from azure.cli.core.util import CLIError

Expand Down
8 changes: 8 additions & 0 deletions src/aks-agent/setup.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
[bdist_wheel]
universal=1

[tool:pytest]
markers =
easy: Regression AKS Agent evals that should always pass
medium: Stretch AKS Agent evals that may fail occasionally
hard: Challenging AKS Agent evals reserved for complex scenarios
kubernetes: AKS Agent evals that exercise Kubernetes-focused flows
aks_eval: AKS Agent evaluation tests
Loading