From eb59fe74c895e86b32fec6953342551a6228b0a1 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam Date: Sun, 8 Dec 2024 21:40:33 -0800 Subject: [PATCH] random update time --- azurelinuxagent/ga/agent_update_handler.py | 5 +- azurelinuxagent/ga/ga_version_updater.py | 3 +- azurelinuxagent/ga/rsm_version_updater.py | 2 +- .../ga/self_update_version_updater.py | 66 +++++++++++-------- tests/ga/test_agent_update_handler.py | 55 ++++++++++++---- tests/ga/test_update.py | 25 +++++-- 6 files changed, 106 insertions(+), 50 deletions(-) diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index ee6a44f9f..c54c58cb5 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -214,7 +214,8 @@ def run(self, goal_state, ext_gs_updated): # Always agent uses self-update for initial update regardless vm enrolled into RSM or not # So ignoring the check for updater switch for the initial goal state/update - if not self._is_initial_update(): + initial_update = self._is_initial_update() + if not initial_update: # Updater will return True or False if we need to switch the updater # If self-updater receives RSM update enabled, it will switch to RSM updater @@ -243,7 +244,7 @@ def run(self, goal_state, ext_gs_updated): self._updater.retrieve_agent_version(agent_family, goal_state) - if not self._updater.is_retrieved_version_allowed_to_update(agent_family): + if not self._updater.is_retrieved_version_allowed_to_update(agent_family, initial_update): return self._updater.log_new_agent_update_message() agent = self._updater.download_and_get_new_agent(self._protocol, agent_family, goal_state) diff --git a/azurelinuxagent/ga/ga_version_updater.py b/azurelinuxagent/ga/ga_version_updater.py index 82a621eac..6b1358907 100644 --- a/azurelinuxagent/ga/ga_version_updater.py +++ b/azurelinuxagent/ga/ga_version_updater.py @@ -63,10 +63,11 @@ def retrieve_agent_version(self, agent_family, goal_state): """ raise NotImplementedError - def is_retrieved_version_allowed_to_update(self, agent_family): + def is_retrieved_version_allowed_to_update(self, agent_family, initial_update): """ Checks all base condition if new version allow to update. @param agent_family: agent family + @param initial_update: True if it's initial update else False @return: True if allowed to update else False """ raise NotImplementedError diff --git a/azurelinuxagent/ga/rsm_version_updater.py b/azurelinuxagent/ga/rsm_version_updater.py index 366f1d703..584862663 100644 --- a/azurelinuxagent/ga/rsm_version_updater.py +++ b/azurelinuxagent/ga/rsm_version_updater.py @@ -84,7 +84,7 @@ def retrieve_agent_version(self, agent_family, goal_state): """ self._version = FlexibleVersion(agent_family.version) - def is_retrieved_version_allowed_to_update(self, agent_family): + def is_retrieved_version_allowed_to_update(self, agent_family, initial_update): """ Once version retrieved from goal state, we check if we allowed to update for that version allow update If new version not same as current version, not below than daemon version and if version is from rsm request diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py index 5a839851d..bc9e0a10c 100644 --- a/azurelinuxagent/ga/self_update_version_updater.py +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -17,6 +17,7 @@ # Requires Python 2.6+ and Openssl 1.0+ import datetime +import random from azurelinuxagent.common import conf, logger from azurelinuxagent.common.event import add_event, WALAEventOperation @@ -38,7 +39,8 @@ class SelfUpdateVersionUpdater(GAVersionUpdater): def __init__(self, gs_id): super(SelfUpdateVersionUpdater, self).__init__(gs_id) self._last_attempted_manifest_download_time = datetime.datetime.min - self._last_attempted_self_update_time = datetime.datetime.min + self._next_update_time = datetime.datetime.min + self._update_time_refreshed = False @staticmethod def _get_largest_version(agent_manifest): @@ -61,34 +63,36 @@ def _get_agent_upgrade_type(version): return SelfUpdateType.Regular @staticmethod - def _get_next_process_time(last_val, frequency, now): + def _get_next_process_time(upgrade_type, now): """ - Get the next upgrade time + Returns random time in between 0 to 24hrs(regular) or 6hrs(hotfix) from now """ - return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency) + if upgrade_type == SelfUpdateType.Hotfix: + frequency = conf.get_self_update_hotfix_frequency() + else: + frequency = conf.get_self_update_regular_frequency() + return now + datetime.timedelta(seconds=random.randint(0, frequency)) - def _is_new_agent_allowed_update(self): + def _new_agent_allowed_now_to_update(self): """ - This method ensure that update is allowed only once per (hotfix/Regular) upgrade frequency + This method called when new update detected and computes random time for next update. + If the current time on or after upgrade time, we allow the update. + + Note: After we allow the update, and it's not successful, the next update time will be recalculated. """ now = datetime.datetime.utcnow() upgrade_type = self._get_agent_upgrade_type(self._version) - if upgrade_type == SelfUpdateType.Hotfix: - next_update_time = self._get_next_process_time(self._last_attempted_self_update_time, - conf.get_self_update_hotfix_frequency(), now) - else: - next_update_time = self._get_next_process_time(self._last_attempted_self_update_time, - conf.get_self_update_regular_frequency(), now) - if self._version > CURRENT_VERSION: - message = "Self-update discovered new {0} upgrade WALinuxAgent-{1}; Will upgrade on or after {2}".format( - upgrade_type, str(self._version), next_update_time.strftime(logger.Logger.LogTimeFormatInUTC)) - logger.info(message) - add_event(op=WALAEventOperation.AgentUpgrade, message=message, log_event=False) - - if next_update_time <= now: - # Update the last upgrade check time even if no new agent is available for upgrade - self._last_attempted_self_update_time = now + if not self._update_time_refreshed: + self._next_update_time = self._get_next_process_time(upgrade_type, now) + self._update_time_refreshed = True + message = "Self-update discovered new {0} upgrade WALinuxAgent-{1}; Will upgrade on or after {2}".format( + upgrade_type, str(self._version), self._next_update_time.strftime(logger.Logger.LogTimeFormatInUTC)) + logger.info(message) + add_event(op=WALAEventOperation.AgentUpgrade, message=message, log_event=False) + + if self._next_update_time <= now: + self._update_time_refreshed = False return True return False @@ -148,16 +152,24 @@ def retrieve_agent_version(self, agent_family, goal_state): largest_version = self._get_largest_version(self._agent_manifest) self._version = largest_version - def is_retrieved_version_allowed_to_update(self, agent_family): + def is_retrieved_version_allowed_to_update(self, agent_family, initial_update): """ - checks update is spread per (as specified in the conf.get_self_update_hotfix_frequency() or conf.get_self_update_regular_frequency()) - or if version below than current version - return false when we don't allow updates. + we don't allow new version update, if + 1) The version is not greater than current version + 2) if current time is before next update time + + Allow the update, if + 1) Initial update + 2) If current time is on or after next update time """ - if not self._is_new_agent_allowed_update(): + if self._version <= CURRENT_VERSION: return False - if self._version <= CURRENT_VERSION: + # very first update need to proceed without any delay + if initial_update: + return True + + if not self._new_agent_allowed_now_to_update(): return False return True diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index c2d01a424..c19e975a1 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -1,6 +1,8 @@ import contextlib import json import os +import random +import time from azurelinuxagent.common import conf from azurelinuxagent.common.event import WALAEventOperation @@ -29,7 +31,7 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) @contextlib.contextmanager - def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, initial_update_attempted=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None): + def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, initial_update_attempted=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None, mock_random_update_time=True): # Default to DATA_FILE of test_data parameter raises the pylint warning # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) test_data = DATA_FILE if test_data is None else test_data @@ -61,14 +63,25 @@ def put_handler(url, *args, **_): if initial_update_attempted: open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close() + original_randint = random.randint + + def _mock_random_update_time(a, b): + if mock_random_update_time: + return 0 + # if normal/hotfix frequency is 1 second, return 0.001 to simulate the update time + if b == 1: + return 0.001 + return original_randint(a, b) + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): - with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): - with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: - agent_update_handler = get_agent_update_handler(protocol) - agent_update_handler._protocol = protocol - yield agent_update_handler, mock_telemetry + with patch("azurelinuxagent.ga.self_update_version_updater.random.randint", side_effect=_mock_random_update_time): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): + with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: + agent_update_handler = get_agent_update_handler(protocol) + agent_update_handler._protocol = protocol + yield agent_update_handler, mock_telemetry def _assert_agent_directories_available(self, versions): for version in versions: @@ -130,7 +143,7 @@ def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) - def test_it_should_not_update_to_largest_version_if_time_window_not_elapsed(self): + def test_it_should_not_update_to_largest_version_if_manifest_download_time_not_elapsed(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() @@ -146,14 +159,30 @@ def test_it_should_not_update_to_largest_version_if_time_window_not_elapsed(self self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_update_to_largest_version_if_time_window_elapsed(self): + def test_it_should_not_do_self_update_if_update_time_is_not_elapsed(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with self._get_agent_update_handler(test_data=data_file, mock_random_update_time=False) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_update_to_largest_version_after_time_window_elapsed(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" - with patch("azurelinuxagent.common.conf.get_self_update_hotfix_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_self_update_regular_frequency", return_value=0.001): - with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with patch("azurelinuxagent.common.conf.get_self_update_hotfix_frequency", return_value=1): + with patch("azurelinuxagent.common.conf.get_self_update_regular_frequency", return_value=1): + with self._get_agent_update_handler(test_data=data_file, mock_random_update_time=False) as (agent_update_handler, mock_telemetry): with self.assertRaises(AgentUpgradeExitException) as context: agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), @@ -161,6 +190,8 @@ def test_it_should_update_to_largest_version_if_time_window_elapsed(self): agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() + # sleeping for update window to elapse + time.sleep(0.1) agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self._assert_update_discovered_from_agent_manifest(mock_telemetry, inc=2, version="99999.0.0.0") self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 2f201d215..346906b3e 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -7,6 +7,7 @@ import glob import json import os +import random import re import shutil import stat @@ -1326,7 +1327,7 @@ def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequ @contextlib.contextmanager def __get_update_handler(self, iterations=1, test_data=None, - reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0, initial_update_attempted=True): + reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=10, normal_frequency=10, initial_update_attempted=True, mock_random_update_time=True): if initial_update_attempted: open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close() @@ -1354,11 +1355,22 @@ def put_handler(url, *args, **_): protocol.aggregate_status = json.loads(args[0]) return MockHttpResponse(status=201) + original_randint = random.randint + + def _mock_random_update_time(a, b): + if mock_random_update_time: + return 0 + if b == 1: # if normal/hotfix frequency is 1 second, return 0.001 to simulate the update time + return 0.001 + return original_randint(a, b) + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) with self.create_conf_mocks(autoupdate_frequency, hotfix_frequency, normal_frequency): - with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: - update_handler._protocol = protocol - yield update_handler, mock_telemetry + with patch("azurelinuxagent.ga.self_update_version_updater.random.randint", + side_effect=_mock_random_update_time): + with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: + update_handler._protocol = protocol + yield update_handler, mock_telemetry def __assert_exit_code_successful(self, update_handler): self.assertEqual(0, update_handler.get_exit_code(), "Exit code should be 0") @@ -1605,8 +1617,7 @@ def reload_conf(url, protocol): data_file = wire_protocol_data.DATA_FILE.copy() # This is to fail the agent update at first attempt so that agent doesn't go through update data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - hotfix_frequency=10, normal_frequency=10) as (update_handler, _): + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, mock_random_update_time=False) as (update_handler, _): update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) @@ -1645,7 +1656,7 @@ def reload_conf(url, protocol): data_file = wire_protocol_data.DATA_FILE.copy() data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - hotfix_frequency=0.001, normal_frequency=0.001) as (update_handler, mock_telemetry): + hotfix_frequency=1, normal_frequency=1, mock_random_update_time=False) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True)