From c3aac0f9e95e7c88cf885c870dc87a64c69751b1 Mon Sep 17 00:00:00 2001 From: mgunnala Date: Mon, 13 Jan 2025 12:33:51 -0500 Subject: [PATCH] Report status for single-config ext --- azurelinuxagent/ga/exthandlers.py | 47 +++---- tests/ga/test_extension.py | 10 +- tests_e2e/orchestrator/scripts/collect-logs | 3 +- .../ext_policy_with_dependencies.yml | 4 +- tests_e2e/tests/ext_policy/ext_policy.py | 10 +- .../ext_policy_with_dependencies.py | 68 ++++++---- .../ext_policy/policy_dependencies_cases.py | 13 +- ...ent_ext_policy-verify_operation_success.py | 117 ++++++++++++++++++ ...ent_ext_policy-verify_uninstall_success.py | 61 --------- 9 files changed, 210 insertions(+), 123 deletions(-) create mode 100755 tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py delete mode 100755 tests_e2e/tests/scripts/agent_ext_policy-verify_uninstall_success.py diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 2644df863..49eddcd85 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -297,6 +297,8 @@ class ExtHandlersHandler(object): def __init__(self, protocol): self.protocol = protocol self.ext_handlers = None + # Maintain a list of extensions that are disallowed, and always report extension status for disallowed extensions. + self.disallowed_ext_handlers = [] # The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process # extensions on goal state change, we need to maintain its state. # Setting the status to None here. This would be overridden as soon as the first GoalState is processed @@ -519,6 +521,7 @@ def handle_ext_handlers(self, goal_state_id): # back for the skipped extensions. In order to propagate the status back to CRP, we will report status back # here with an error message. if not extensions_enabled: + self.disallowed_ext_handlers.append(ext_handler) agent_conf_file_path = get_osutil().agent_conf_file_path msg = "Extension will not be processed since extension processing is disabled. To enable extension " \ "processing, set Extensions.Enabled=y in '{0}'".format(agent_conf_file_path) @@ -745,28 +748,25 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True, message=message) - @staticmethod - def __report_policy_error(ext_handler_i, error_code, report_op, message, extension): - # TODO: Consider merging this function with __handle_and_report_ext_handler_errors() above, after investigating - # the impact of this change. - # - # If extension status is present, CRP will ignore handler status and report extension status. In the case of policy errors, - # extensions are not processed, so collect_ext_status() reports transitioning status on behalf of the extension. - # However, extensions blocked by policy should fail fast, so agent should write a .status file for policy failures. - # Note that __handle_and_report_ext_handler_errors() does not create the file for single-config extensions, but changing - # it will require additional testing/investigation. As a temporary workaround, this separate function was created - # to write a status file for single-config extensions. - - # Set handler status for all extensions (with and without settings). We report the same error at both the - # handler and extension status level. + def __report_policy_error(self, ext_handler_i, error_code, report_op, message, extension): + # TODO: __handle_and_report_ext_handler_errors() does not create a status file for single-config extensions, this + # function was created as a temporary workaround. Consider merging the two functions function after assessing its impact. + + # If extension status exists, CRP ignores handler status and reports extension status. In the case of policy errors, + # we write a .status file to force CRP to fail fast - the agent will otherwise report a transitioning status. + # - For extensions without settings or uninstall errors: report at the handler level. + # - For extensions with settings (install/enable errors): report at both handler and extension levels. + + # Keep a list of disallowed extensions so that report_ext_handler_status() can report status for them. + self.disallowed_ext_handlers.append(ext_handler_i.ext_handler) + + # Set handler status for all extensions (with and without settings). ext_handler_i.set_handler_status(message=message, code=error_code) - # Create status file for extensions with settings (single and multi config). - # If status file already exists, overwrite it. If an extension was previously reporting status and is now - # blocked by a policy error, we should report the policy error. - if extension is not None: - # TODO: if extension is reporting a heartbeat, it overwrites status. Consider overwriting heartbeat, if - # it exists. + # For extensions with settings (install/enable errors), also update extension-level status. + # Overwrite any existing status file to reflect policy failures accurately. + if extension is not None and ext_handler_i.ext_handler.state == ExtensionRequestedState.Enabled: + # TODO: if extension is reporting heartbeat, it overwrites status. Consider overwriting heartbeat here ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error_code, operation=report_op, message=message, overwrite=True) @@ -1068,12 +1068,13 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): handler_state = ext_handler_i.get_handler_state() ext_handler_statuses = [] + ext_disallowed = ext_handler in self.disallowed_ext_handlers # For MultiConfig, we need to report status per extension even for Handler level failures. # If we have HandlerStatus for a MultiConfig handler and GS is requesting for it, we would report status per # extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc) - # We also need to report extension status for an uninstalled handler if extensions are disabled, because CRP - # waits for extension runtime status before failing the extension operation. - if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled(): + # We also need to report extension status for an uninstalled handler if the extension is disallowed (due to + # policy failure, extensions disabled, etc.) because CRP waits for extension runtime status before failing the operation. + if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or ext_disallowed: # Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found. # Only try to read heartbeat if HandlerState != NotInstalled. diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 299a0c3a8..b7d830ccf 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3572,7 +3572,7 @@ def test_should_fail_enable_if_extension_disallowed(self): } expected_msg = "failed to run extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension." self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed, - expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg) + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) def test_should_fail_enable_for_invalid_policy(self): policy = \ @@ -3584,7 +3584,7 @@ def test_should_fail_enable_for_invalid_policy(self): } expected_msg = "attribute 'extensionPolicies.allowListedExtensionsOnly'; must be 'boolean'" self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed, - expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg) + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) def test_should_fail_extension_if_error_thrown_during_policy_engine_init(self): policy = \ @@ -3596,7 +3596,7 @@ def test_should_fail_extension_if_error_thrown_during_policy_engine_init(self): expected_msg = "Extension will not be processed: mock exception" self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed, - expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg) + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) def test_should_fail_uninstall_if_extension_disallowed(self): policy = \ @@ -3610,7 +3610,7 @@ def test_should_fail_uninstall_if_extension_disallowed(self): } expected_msg = "failed to uninstall extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension." self._test_policy_case(policy=policy, op=ExtensionRequestedState.Uninstall, expected_status_code=ExtensionErrorCodes.PluginDisableProcessingFailed, - expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg) + expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg) def test_should_fail_enable_if_dependent_extension_disallowed(self): self._create_policy_file({ @@ -3634,7 +3634,7 @@ def test_should_fail_enable_if_dependent_extension_disallowed(self): # OtherExampleHandlerLinux should be disallowed by policy, ExampleHandlerLinux should be skipped because # dependent extension failed - self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=0, + self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=1, version="1.0.0", expected_handler_name="OSTCExtensions.OtherExampleHandlerLinux", expected_msg=("failed to run extension 'OSTCExtensions.OtherExampleHandlerLinux' " "because it is not specified as an allowed extension.")) diff --git a/tests_e2e/orchestrator/scripts/collect-logs b/tests_e2e/orchestrator/scripts/collect-logs index c221288a1..85ef29ab4 100755 --- a/tests_e2e/orchestrator/scripts/collect-logs +++ b/tests_e2e/orchestrator/scripts/collect-logs @@ -19,7 +19,8 @@ tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude=' -czf "$logs_file_name" \ /var/log \ /var/lib/waagent/ \ - $waagent_conf + $waagent_conf \ + /etc/waagent_policy.json set -euxo pipefail diff --git a/tests_e2e/test_suites/ext_policy_with_dependencies.yml b/tests_e2e/test_suites/ext_policy_with_dependencies.yml index 0c9fe8b18..a44453b52 100644 --- a/tests_e2e/test_suites/ext_policy_with_dependencies.yml +++ b/tests_e2e/test_suites/ext_policy_with_dependencies.yml @@ -6,4 +6,6 @@ tests: - "ext_policy/ext_policy_with_dependencies.py" images: "endorsed" executes_on_scale_set: true -owns_vm: false \ No newline at end of file +owns_vm: false +skip_on_images: + - "alma_9" # TODO: Currently AlmaLinux is not available for scale sets; enable this image when it is available. \ No newline at end of file diff --git a/tests_e2e/tests/ext_policy/ext_policy.py b/tests_e2e/tests/ext_policy/ext_policy.py index 555ec6853..35345d21c 100644 --- a/tests_e2e/tests/ext_policy/ext_policy.py +++ b/tests_e2e/tests/ext_policy/ext_policy.py @@ -18,6 +18,7 @@ # import json import uuid +import os from typing import List, Dict, Any from assertpy import assert_that, fail @@ -44,9 +45,12 @@ def _create_policy_file(self, policy): """ Create policy json file and copy to /etc/waagent_policy.json on test machine. """ - with open("waagent_policy.json", mode='w') as policy_file: + unique_id = uuid.uuid4() + file_path = "/tmp/waagent_policy_{0}.json".format(unique_id) + with open(file_path, mode='w') as policy_file: json.dump(policy, policy_file, indent=4) policy_file.flush() + log.info("Policy file contents: {0}".format(json.dumps(policy, indent=4))) remote_path = "/tmp/waagent_policy.json" local_path = policy_file.name @@ -54,6 +58,7 @@ def _create_policy_file(self, policy): policy_file_final_dest = "/etc/waagent_policy.json" log.info("Copying policy file to test VM [%s]", self._context.vm.name) self._ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True) + os.remove(file_path) def _operation_should_succeed(self, operation, extension_case): log.info("") @@ -234,6 +239,7 @@ def run(self): } } self._create_policy_file(policy) + self._operation_should_succeed("enable", custom_script) # Update settings to force an update to the seq no run_command.settings = {'source': f"echo '{str(uuid.uuid4())}'"} run_command_2.settings = {'source': f"echo '{str(uuid.uuid4())}'"} @@ -316,4 +322,4 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: if __name__ == "__main__": - ExtPolicy.run_from_command_line() + ExtPolicy.run_from_command_line() \ No newline at end of file diff --git a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py index f00982745..368e85fe8 100644 --- a/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py +++ b/tests_e2e/tests/ext_policy/ext_policy_with_dependencies.py @@ -20,15 +20,14 @@ # # This test adds extensions with multiple dependencies to a VMSS and checks that extensions fail and report status # as expected when blocked by extension policy. -# Note that this test is currently disabled in daily automation due to status reporting errors that need further investigation. -# TODO: re-enable this test in daily automation after investigating test failures -# import copy import json import random import re +import os import uuid +import time from datetime import datetime from typing import List, Dict, Any @@ -57,20 +56,23 @@ def __init__(self, context: AgentVmTestContext): # Cases to test different dependency scenarios _test_cases = [ - _should_fail_single_config_depends_on_disallowed_no_config, _should_fail_single_config_depends_on_disallowed_single_config, + _should_fail_single_config_depends_on_disallowed_no_config, # TODO: RunCommandHandler is unable to be uninstalled properly, so these tests are currently disabled. Investigate the # issue and enable these 3 tests. # _should_fail_single_config_depends_on_disallowed_multi_config, # _should_fail_multi_config_depends_on_disallowed_single_config, # _should_fail_multi_config_depends_on_disallowed_no_config, - _should_succeed_single_config_depends_on_no_config, - _should_succeed_single_config_depends_on_single_config + _should_succeed_single_config_depends_on_single_config, + _should_succeed_single_config_depends_on_no_config ] @staticmethod def _create_policy_file(ssh_client, policy): - with open("waagent_policy.json", mode='w') as policy_file: + # Generate a unique file name to avoid conflicts with any other tests running in parallel. + unique_id = uuid.uuid4() + file_path = "/tmp/waagent_policy_{0}.json".format(unique_id) + with open(file_path, mode='w') as policy_file: json.dump(policy, policy_file, indent=4) policy_file.flush() @@ -79,6 +81,7 @@ def _create_policy_file(ssh_client, policy): ssh_client.copy_to_node(local_path=local_path, remote_path=remote_path) policy_file_final_dest = "/etc/waagent_policy.json" ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True) + os.remove(file_path) def run(self): @@ -124,7 +127,7 @@ def run(self): for case in self._test_cases: log.info("") - log.info("Test case: {0}".format(case.__name__.replace('_', ' '))) + log.info("*** Test case: {0}".format(case.__name__.replace('_', ' '))) test_case_start = random.choice(list(ssh_clients.values())).run_command("date '+%Y-%m-%d %T'").rstrip() if self._scenario_start == datetime.min: self._scenario_start = test_case_start @@ -189,11 +192,10 @@ def run(self): log.info("Extensions failed as expected. Expected errors: '{0}'. Actual errors: '{1}'.".format(expected_errors, e)) log.info("") - # After each test, clean up failed extensions to leave VMSS in a good state for the next test. - # If there are leftover failed extensions, CRP will attempt to uninstall them in the next test, but uninstall - # will be disallowed by policy. Since CRP waits for a 90 minute timeout for uninstall, the operation will - # timeout and fail without an appropriate error message, and the whole test case will fail. - # To clean up, we first update the policy to allow all, then remove the extensions. + # Clean up failed extensions to leave VMSS in a good state for the next test. CRP will attempt to uninstall + # leftover extensions in the next test, but uninstall will be disallowed and reach timeout unexpectedly. + # CRP also won't allow deletion of an extension that is dependent on another failed extension, so we first + # update policy to allow all, re-enable all extensions, and then delete them. log.info("Starting cleanup for test case...") allow_all_policy = \ { @@ -205,20 +207,41 @@ def run(self): for ssh_client in ssh_clients.values(): self._create_policy_file(ssh_client, allow_all_policy) + log.info("Trying to re-enable before deleting extensions...") + for ext in extensions: + ext["properties"].update({ + "forceUpdateTag": str(uuid.uuid4()) + }) + ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ + 'extensions'] = extensions + enable_start_time = random.choice(list(ssh_clients.values())).run_command("date '+%Y-%m-%d %T'").rstrip() + try: + rg_client.deploy_template(template=ext_template) + except Exception as err: + # Known issue - CRP returns a stale status for no-config extensions, because it does not wait for a new + # sequence number. AzureMonitorLinuxAgent is the only no-config extension we test. For this extension + # only, swallow the CRP error and check agent log instead to confirm that extensions were enabled + # successfully. + if VmExtensionIds.AzureMonitorLinuxAgent in deletion_order: + log.info("CRP returned error when re-enabling extensions after allowing. Checking agent log to see if enable succeeded. " + "Error: {0}".format(err)) + time.sleep(60) # Give extensions some time to finish processing. + extension_list = ' '.join([str(e) for e in deletion_order]) + command = (f"agent_ext_policy-verify_operation_success.py --after-timestamp '{enable_start_time}' " + f"--operation 'enable' --extension-list {extension_list}") + for ssh_client in ssh_clients.values(): + ssh_client.run_command(command, use_sudo=True) + log.info("Agent reported successful status for all extensions, enable succeeded.") + else: + fail("Failed to re-enable extensions after allowing with policy.") + + # Delete all extensions in dependency order. for ext_to_delete in deletion_order: ext_name_to_delete = ext_to_delete.type try: self._context.vmss.delete_extension(ext_name_to_delete) except Exception as crp_err: - # Known issue - CRP returns stale status in cases of dependency failures. Even if the deletion succeeds, - # CRP may return a failure. We swallow the error and instead, verify that the agent does not report - # status for the uninstalled extension. - log.info("CRP returned an error for deletion operation, may be a false error. Checking agent status file to determine if operation succeeded. Exception: {0}".format(crp_err)) - try: - for ssh_client in ssh_clients.values(): - ssh_client.run_command(f"agent_ext_policy-verify_uninstall_success.py --extension-name '{ext_to_delete}'") - except Exception as agent_err: - fail("Unable to successfully uninstall extension {0}. Exception: {1}".format(ext_name_to_delete, agent_err)) + fail("Failed to uninstall extension {0}. Exception: {1}".format(ext_name_to_delete, crp_err)) log.info("Successfully uninstalled extension {0}".format(ext_name_to_delete)) log.info("Successfully removed all extensions from VMSS") @@ -228,7 +251,6 @@ def run(self): for ssh_client in ssh_clients.values(): ssh_client.run_command("update-waagent-conf Debug.EnableExtensionPolicy=n", use_sudo=True) - def get_ignore_errors_before_timestamp(self) -> datetime: # Ignore errors in the agent log before the first test case starts if self._scenario_start == datetime.min: diff --git a/tests_e2e/tests/ext_policy/policy_dependencies_cases.py b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py index 5de3136d4..55c7daf95 100644 --- a/tests_e2e/tests/ext_policy/policy_dependencies_cases.py +++ b/tests_e2e/tests/ext_policy/policy_dependencies_cases.py @@ -58,7 +58,7 @@ def _should_fail_single_config_depends_on_disallowed_single_config(): } } expected_errors = [ - "Extension will not be processed: failed to run extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because it is not specified in the allowlist", + "Extension will not be processed: failed to run extension 'Microsoft.OSTCExtensions.VMAccessForLinux' because it is not specified as an allowed extension", "'CustomScript' is marked as failed since it depends upon the VM Extension 'VMAccessForLinux' which has failed" ] deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.VmAccess] @@ -69,7 +69,6 @@ def _should_fail_single_config_depends_on_disallowed_no_config(): template = [ __get_extension_template(VmExtensionIds.AzureMonitorLinuxAgent), __get_extension_template(VmExtensionIds.CustomScript, depends_on=["AzureMonitorLinuxAgent"]) - ] policy = \ { @@ -84,7 +83,7 @@ def _should_fail_single_config_depends_on_disallowed_no_config(): } } expected_errors = [ - "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified in the allowlist", + "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified as an allowed extension", "'CustomScript' is marked as failed since it depends upon the VM Extension 'AzureMonitorLinuxAgent' which has failed" ] deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.AzureMonitorLinuxAgent] @@ -109,7 +108,7 @@ def _should_fail_single_config_depends_on_disallowed_multi_config(): } } expected_errors = [ - "Extension will not be processed: failed to run extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because it is not specified in the allowlist", + "Extension will not be processed: failed to run extension 'Microsoft.CPlat.Core.RunCommandHandlerLinux' because it is not specified as an allowed extension", "'CustomScript' is marked as failed since it depends upon the VM Extension 'RunCommandHandlerLinux' which has failed" ] deletion_order = [VmExtensionIds.CustomScript, VmExtensionIds.RunCommandHandler] @@ -134,7 +133,7 @@ def _should_fail_multi_config_depends_on_disallowed_single_config(): } } expected_errors = [ - "Extension will not be processed: failed to run extension 'Microsoft.Azure.Extensions.CustomScript' because it is not specified in the allowlist", + "Extension will not be processed: failed to run extension 'Microsoft.Azure.Extensions.CustomScript' because it is not specified as an allowed extension", "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Extensions.CustomScript failed'." ] deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.CustomScript] @@ -159,7 +158,7 @@ def _should_fail_multi_config_depends_on_disallowed_no_config(): } } expected_errors = [ - "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified in the allowlist", + "Extension will not be processed: failed to run extension 'Microsoft.Azure.Monitor.AzureMonitorLinuxAgent' because it is not specified as an allowed extension", "VM has reported a failure when processing extension 'RunCommandHandlerLinux' (publisher 'Microsoft.CPlat.Core' and type 'RunCommandHandlerLinux'). Error message: 'Skipping processing of extensions since execution of dependent extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent failed'." ] deletion_order = [VmExtensionIds.RunCommandHandler, VmExtensionIds.AzureMonitorLinuxAgent] @@ -210,4 +209,4 @@ def _should_succeed_single_config_depends_on_single_config(): } expected_errors = [] deletion_order = [VmExtensionIds.VmAccess, VmExtensionIds.CustomScript] - return policy, template, expected_errors, deletion_order + return policy, template, expected_errors, deletion_order \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py b/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py new file mode 100755 index 000000000..2e382cebe --- /dev/null +++ b/tests_e2e/tests/scripts/agent_ext_policy-verify_operation_success.py @@ -0,0 +1,117 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +import argparse + +from assertpy import fail +from pathlib import Path +from datetime import datetime +import time +import re + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.agent_log import AgentLog + +# This script verifies the success of an operation using the agent log. +# Enable: check that the agent has reported a successful status for the specified list of extensions +# Uninstall: check that the agent has not reported any status for the specified list of extensions +# +# Usage: +# agent_ext_policy-verify_operation_success.py --extension-list "A" "B" --operation "enable" --after-timestamp "2025-01-13 11:21:40" + + +def __get_last_reported_status(after_timestamp): + # Get last reported status from the agent log file. If after_timestamp is specified, return only the status reported + # after that timestamp, and raise error if not found after 2 tries. + agent_log = AgentLog(Path('/var/log/waagent.log')) + + retries = 2 + for attempt in range(retries): + phrase = "All extensions in the goal state have reached a terminal state" + latest_status = None + for record in agent_log.read(): + if record.timestamp < after_timestamp: + continue + + if phrase in record.message: + if latest_status is None: + latest_status = record + else: + if latest_status.timestamp < record.timestamp: + latest_status = record + + if latest_status is not None: + log.info("Latest status: {0}".format(latest_status.message)) + return latest_status + + log.info("Unable to find handler status in agent log on attempt {0}. Retrying...".format(attempt + 1)) + time.sleep(30) + + return None + + +def check_extension_reported_successful_status(status_message, ext_name: str): + # Extract extension statuses from the agent record + pattern = r"\(u?'(" + re.escape(ext_name) + r")', u?'([^']+)'\)" + match = re.search(pattern, status_message) + if match is None: + fail("Agent did not report any status for extension {0}, enable failed.".format(ext_name)) + else: + status_code = match.group(2).lower() + log.info("Status code: {0}".format(status_code)) + if status_code not in ["success", "ready"]: + fail("Agent did not report a successful status for extension {0}, enable failed. Status: {1}".format(ext_name, status_code)) + else: + log.info("Agent reported a successful status for extension {0}, enable succeeded.".format(ext_name)) + +def main(): + + parser = argparse.ArgumentParser() + parser.add_argument('--extension-list', dest='extension_list', required=True, nargs='+', + help='Extension name(s) to process. Provide a single name or a space-separated list of names.') + + parser.add_argument('--operation', dest='operation', required=True, choices=['enable', 'uninstall']) + parser.add_argument("--after-timestamp", dest='after_timestamp', required=False) + args = parser.parse_args() + + if args.after_timestamp is not None: + after_datetime = datetime.strptime(args.after_timestamp, '%Y-%m-%d %H:%M:%S') + else: + after_datetime = datetime.min + + status = __get_last_reported_status(after_datetime) + if status is None: + fail("Unable to find extension status in agent log.") + + if args.operation == "enable": + log.info("Checking agent status file to verify that extensions were enabled successfully.") + for extension in args.extension_list: + check_extension_reported_successful_status(status.message, extension) + + elif args.operation == "uninstall": + log.info("Checking agent log to verify that status is not reported for uninstalled extensions.") + for extension in args.extension_list: + if extension in status.message: + fail("Agent reported status for extension {0}, uninstall failed.".format(extension)) + else: + log.info("Agent did not report status for extension {0}, uninstall succeeded.".format(extension)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_ext_policy-verify_uninstall_success.py b/tests_e2e/tests/scripts/agent_ext_policy-verify_uninstall_success.py deleted file mode 100755 index 8bcf725ba..000000000 --- a/tests_e2e/tests/scripts/agent_ext_policy-verify_uninstall_success.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env pypy3 - -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Verify if the agent reported update status to CRP via status file -# -import argparse -import glob -import json - -from assertpy import fail - -from tests_e2e.tests.lib.logging import log - - -def extension_found_in_agent_status_file(ext_name: str) -> bool: - # Check if the provided extension name is present in the agent status file, under handlerAggregateStatus. - # If the name is not present, the uninstall operation was successful. - # TODO: a new status file is created for each goal state. To make this a reliable check, make sure to correlate - # the status file to the goal state being checked. Alternatively, consider checking waagent.log. - agent_status_file = "/var/lib/waagent/history/*/waagent_status.json" - file_paths = glob.glob(agent_status_file, recursive=True) - for file in file_paths: - with open(file, 'r') as f: - data = json.load(f) - log.info("Agent status file (%s): %s", file, data) - handler_status = data["aggregateStatus"]["handlerAggregateStatus"] - if any(handler["handlerName"].lower() == ext_name.lower() for handler in handler_status): - return True - return False - - -def main(): - - parser = argparse.ArgumentParser() - parser.add_argument('--extension-name', dest='name', required=True) - args = parser.parse_args() - - log.info("Checking agent status file to verify that the uninstalled extension is not present in reported status") - if extension_found_in_agent_status_file(args.name): - fail("Handler status was found in the status file for extension {0}, uninstall failed.".format(args.name)) - else: - log.info("Handler status was not found in the status file for extension {0}, uninstall succeeded.".format(args.name)) - - -if __name__ == "__main__": - main()