Skip to content

Commit

Permalink
Report status for single-config ext
Browse files Browse the repository at this point in the history
  • Loading branch information
mgunnala committed Jan 13, 2025
1 parent fc2de23 commit c3aac0f
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 123 deletions.
47 changes: 24 additions & 23 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ class ExtHandlersHandler(object):
def __init__(self, protocol):
self.protocol = protocol
self.ext_handlers = None
# Maintain a list of extensions that are disallowed, and always report extension status for disallowed extensions.
self.disallowed_ext_handlers = []
# The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process
# extensions on goal state change, we need to maintain its state.
# Setting the status to None here. This would be overridden as soon as the first GoalState is processed
Expand Down Expand Up @@ -519,6 +521,7 @@ def handle_ext_handlers(self, goal_state_id):
# back for the skipped extensions. In order to propagate the status back to CRP, we will report status back
# here with an error message.
if not extensions_enabled:
self.disallowed_ext_handlers.append(ext_handler)
agent_conf_file_path = get_osutil().agent_conf_file_path
msg = "Extension will not be processed since extension processing is disabled. To enable extension " \
"processing, set Extensions.Enabled=y in '{0}'".format(agent_conf_file_path)
Expand Down Expand Up @@ -745,28 +748,25 @@ def __handle_and_report_ext_handler_errors(ext_handler_i, error, report_op, mess
add_event(name=name, version=handler_version, op=report_op, is_success=False, log_event=True,
message=message)

@staticmethod
def __report_policy_error(ext_handler_i, error_code, report_op, message, extension):
# TODO: Consider merging this function with __handle_and_report_ext_handler_errors() above, after investigating
# the impact of this change.
#
# If extension status is present, CRP will ignore handler status and report extension status. In the case of policy errors,
# extensions are not processed, so collect_ext_status() reports transitioning status on behalf of the extension.
# However, extensions blocked by policy should fail fast, so agent should write a .status file for policy failures.
# Note that __handle_and_report_ext_handler_errors() does not create the file for single-config extensions, but changing
# it will require additional testing/investigation. As a temporary workaround, this separate function was created
# to write a status file for single-config extensions.

# Set handler status for all extensions (with and without settings). We report the same error at both the
# handler and extension status level.
def __report_policy_error(self, ext_handler_i, error_code, report_op, message, extension):
# TODO: __handle_and_report_ext_handler_errors() does not create a status file for single-config extensions, this
# function was created as a temporary workaround. Consider merging the two functions function after assessing its impact.

# If extension status exists, CRP ignores handler status and reports extension status. In the case of policy errors,
# we write a .status file to force CRP to fail fast - the agent will otherwise report a transitioning status.
# - For extensions without settings or uninstall errors: report at the handler level.
# - For extensions with settings (install/enable errors): report at both handler and extension levels.

# Keep a list of disallowed extensions so that report_ext_handler_status() can report status for them.
self.disallowed_ext_handlers.append(ext_handler_i.ext_handler)

# Set handler status for all extensions (with and without settings).
ext_handler_i.set_handler_status(message=message, code=error_code)

# Create status file for extensions with settings (single and multi config).
# If status file already exists, overwrite it. If an extension was previously reporting status and is now
# blocked by a policy error, we should report the policy error.
if extension is not None:
# TODO: if extension is reporting a heartbeat, it overwrites status. Consider overwriting heartbeat, if
# it exists.
# For extensions with settings (install/enable errors), also update extension-level status.
# Overwrite any existing status file to reflect policy failures accurately.
if extension is not None and ext_handler_i.ext_handler.state == ExtensionRequestedState.Enabled:
# TODO: if extension is reporting heartbeat, it overwrites status. Consider overwriting heartbeat here
ext_handler_i.create_status_file(extension, status=ExtensionStatusValue.error, code=error_code,
operation=report_op, message=message, overwrite=True)

Expand Down Expand Up @@ -1068,12 +1068,13 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed):

handler_state = ext_handler_i.get_handler_state()
ext_handler_statuses = []
ext_disallowed = ext_handler in self.disallowed_ext_handlers
# For MultiConfig, we need to report status per extension even for Handler level failures.
# If we have HandlerStatus for a MultiConfig handler and GS is requesting for it, we would report status per
# extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc)
# We also need to report extension status for an uninstalled handler if extensions are disabled, because CRP
# waits for extension runtime status before failing the extension operation.
if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled():
# We also need to report extension status for an uninstalled handler if the extension is disallowed (due to
# policy failure, extensions disabled, etc.) because CRP waits for extension runtime status before failing the operation.
if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or ext_disallowed:

# Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found.
# Only try to read heartbeat if HandlerState != NotInstalled.
Expand Down
10 changes: 5 additions & 5 deletions tests/ga/test_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3572,7 +3572,7 @@ def test_should_fail_enable_if_extension_disallowed(self):
}
expected_msg = "failed to run extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension."
self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed,
expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg)
expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg)

def test_should_fail_enable_for_invalid_policy(self):
policy = \
Expand All @@ -3584,7 +3584,7 @@ def test_should_fail_enable_for_invalid_policy(self):
}
expected_msg = "attribute 'extensionPolicies.allowListedExtensionsOnly'; must be 'boolean'"
self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled, expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed,
expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg)
expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg)

def test_should_fail_extension_if_error_thrown_during_policy_engine_init(self):
policy = \
Expand All @@ -3596,7 +3596,7 @@ def test_should_fail_extension_if_error_thrown_during_policy_engine_init(self):
expected_msg = "Extension will not be processed: mock exception"
self._test_policy_case(policy=policy, op=ExtensionRequestedState.Enabled,
expected_status_code=ExtensionErrorCodes.PluginEnableProcessingFailed,
expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg)
expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg)

def test_should_fail_uninstall_if_extension_disallowed(self):
policy = \
Expand All @@ -3610,7 +3610,7 @@ def test_should_fail_uninstall_if_extension_disallowed(self):
}
expected_msg = "failed to uninstall extension 'OSTCExtensions.ExampleHandlerLinux' because it is not specified as an allowed extension."
self._test_policy_case(policy=policy, op=ExtensionRequestedState.Uninstall, expected_status_code=ExtensionErrorCodes.PluginDisableProcessingFailed,
expected_handler_status='NotReady', expected_ext_count=0, expected_status_msg=expected_msg)
expected_handler_status='NotReady', expected_ext_count=1, expected_status_msg=expected_msg)

def test_should_fail_enable_if_dependent_extension_disallowed(self):
self._create_policy_file({
Expand All @@ -3634,7 +3634,7 @@ def test_should_fail_enable_if_dependent_extension_disallowed(self):

# OtherExampleHandlerLinux should be disallowed by policy, ExampleHandlerLinux should be skipped because
# dependent extension failed
self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=0,
self._assert_handler_status(protocol.report_vm_status, expected_status="NotReady", expected_ext_count=1,
version="1.0.0", expected_handler_name="OSTCExtensions.OtherExampleHandlerLinux",
expected_msg=("failed to run extension 'OSTCExtensions.OtherExampleHandlerLinux' "
"because it is not specified as an allowed extension."))
Expand Down
3 changes: 2 additions & 1 deletion tests_e2e/orchestrator/scripts/collect-logs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude='
-czf "$logs_file_name" \
/var/log \
/var/lib/waagent/ \
$waagent_conf
$waagent_conf \
/etc/waagent_policy.json

set -euxo pipefail

Expand Down
4 changes: 3 additions & 1 deletion tests_e2e/test_suites/ext_policy_with_dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ tests:
- "ext_policy/ext_policy_with_dependencies.py"
images: "endorsed"
executes_on_scale_set: true
owns_vm: false
owns_vm: false
skip_on_images:
- "alma_9" # TODO: Currently AlmaLinux is not available for scale sets; enable this image when it is available.
10 changes: 8 additions & 2 deletions tests_e2e/tests/ext_policy/ext_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#
import json
import uuid
import os
from typing import List, Dict, Any
from assertpy import assert_that, fail

Expand All @@ -44,16 +45,20 @@ def _create_policy_file(self, policy):
"""
Create policy json file and copy to /etc/waagent_policy.json on test machine.
"""
with open("waagent_policy.json", mode='w') as policy_file:
unique_id = uuid.uuid4()
file_path = "/tmp/waagent_policy_{0}.json".format(unique_id)
with open(file_path, mode='w') as policy_file:
json.dump(policy, policy_file, indent=4)
policy_file.flush()
log.info("Policy file contents: {0}".format(json.dumps(policy, indent=4)))

remote_path = "/tmp/waagent_policy.json"
local_path = policy_file.name
self._ssh_client.copy_to_node(local_path=local_path, remote_path=remote_path)
policy_file_final_dest = "/etc/waagent_policy.json"
log.info("Copying policy file to test VM [%s]", self._context.vm.name)
self._ssh_client.run_command(f"mv {remote_path} {policy_file_final_dest}", use_sudo=True)
os.remove(file_path)

def _operation_should_succeed(self, operation, extension_case):
log.info("")
Expand Down Expand Up @@ -234,6 +239,7 @@ def run(self):
}
}
self._create_policy_file(policy)
self._operation_should_succeed("enable", custom_script)
# Update settings to force an update to the seq no
run_command.settings = {'source': f"echo '{str(uuid.uuid4())}'"}
run_command_2.settings = {'source': f"echo '{str(uuid.uuid4())}'"}
Expand Down Expand Up @@ -316,4 +322,4 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]:


if __name__ == "__main__":
ExtPolicy.run_from_command_line()
ExtPolicy.run_from_command_line()
Loading

0 comments on commit c3aac0f

Please sign in to comment.