Skip to content

Commit

Permalink
random update time
Browse files Browse the repository at this point in the history
  • Loading branch information
nagworld9 committed Dec 10, 2024
1 parent dd2deb7 commit eb59fe7
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 50 deletions.
5 changes: 3 additions & 2 deletions azurelinuxagent/ga/agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ def run(self, goal_state, ext_gs_updated):

# Always agent uses self-update for initial update regardless vm enrolled into RSM or not
# So ignoring the check for updater switch for the initial goal state/update
if not self._is_initial_update():
initial_update = self._is_initial_update()
if not initial_update:

# Updater will return True or False if we need to switch the updater
# If self-updater receives RSM update enabled, it will switch to RSM updater
Expand Down Expand Up @@ -243,7 +244,7 @@ def run(self, goal_state, ext_gs_updated):

self._updater.retrieve_agent_version(agent_family, goal_state)

if not self._updater.is_retrieved_version_allowed_to_update(agent_family):
if not self._updater.is_retrieved_version_allowed_to_update(agent_family, initial_update):
return
self._updater.log_new_agent_update_message()
agent = self._updater.download_and_get_new_agent(self._protocol, agent_family, goal_state)
Expand Down
3 changes: 2 additions & 1 deletion azurelinuxagent/ga/ga_version_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,11 @@ def retrieve_agent_version(self, agent_family, goal_state):
"""
raise NotImplementedError

def is_retrieved_version_allowed_to_update(self, agent_family):
def is_retrieved_version_allowed_to_update(self, agent_family, initial_update):
"""
Checks all base condition if new version allow to update.
@param agent_family: agent family
@param initial_update: True if it's initial update else False
@return: True if allowed to update else False
"""
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/ga/rsm_version_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def retrieve_agent_version(self, agent_family, goal_state):
"""
self._version = FlexibleVersion(agent_family.version)

def is_retrieved_version_allowed_to_update(self, agent_family):
def is_retrieved_version_allowed_to_update(self, agent_family, initial_update):
"""
Once version retrieved from goal state, we check if we allowed to update for that version
allow update If new version not same as current version, not below than daemon version and if version is from rsm request
Expand Down
66 changes: 39 additions & 27 deletions azurelinuxagent/ga/self_update_version_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Requires Python 2.6+ and Openssl 1.0+

import datetime
import random

from azurelinuxagent.common import conf, logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
Expand All @@ -38,7 +39,8 @@ class SelfUpdateVersionUpdater(GAVersionUpdater):
def __init__(self, gs_id):
super(SelfUpdateVersionUpdater, self).__init__(gs_id)
self._last_attempted_manifest_download_time = datetime.datetime.min
self._last_attempted_self_update_time = datetime.datetime.min
self._next_update_time = datetime.datetime.min
self._update_time_refreshed = False

@staticmethod
def _get_largest_version(agent_manifest):
Expand All @@ -61,34 +63,36 @@ def _get_agent_upgrade_type(version):
return SelfUpdateType.Regular

@staticmethod
def _get_next_process_time(last_val, frequency, now):
def _get_next_process_time(upgrade_type, now):
"""
Get the next upgrade time
Returns random time in between 0 to 24hrs(regular) or 6hrs(hotfix) from now
"""
return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency)
if upgrade_type == SelfUpdateType.Hotfix:
frequency = conf.get_self_update_hotfix_frequency()
else:
frequency = conf.get_self_update_regular_frequency()
return now + datetime.timedelta(seconds=random.randint(0, frequency))

def _is_new_agent_allowed_update(self):
def _new_agent_allowed_now_to_update(self):
"""
This method ensure that update is allowed only once per (hotfix/Regular) upgrade frequency
This method called when new update detected and computes random time for next update.
If the current time on or after upgrade time, we allow the update.
Note: After we allow the update, and it's not successful, the next update time will be recalculated.
"""
now = datetime.datetime.utcnow()
upgrade_type = self._get_agent_upgrade_type(self._version)
if upgrade_type == SelfUpdateType.Hotfix:
next_update_time = self._get_next_process_time(self._last_attempted_self_update_time,
conf.get_self_update_hotfix_frequency(), now)
else:
next_update_time = self._get_next_process_time(self._last_attempted_self_update_time,
conf.get_self_update_regular_frequency(), now)

if self._version > CURRENT_VERSION:
message = "Self-update discovered new {0} upgrade WALinuxAgent-{1}; Will upgrade on or after {2}".format(
upgrade_type, str(self._version), next_update_time.strftime(logger.Logger.LogTimeFormatInUTC))
logger.info(message)
add_event(op=WALAEventOperation.AgentUpgrade, message=message, log_event=False)

if next_update_time <= now:
# Update the last upgrade check time even if no new agent is available for upgrade
self._last_attempted_self_update_time = now
if not self._update_time_refreshed:
self._next_update_time = self._get_next_process_time(upgrade_type, now)
self._update_time_refreshed = True
message = "Self-update discovered new {0} upgrade WALinuxAgent-{1}; Will upgrade on or after {2}".format(
upgrade_type, str(self._version), self._next_update_time.strftime(logger.Logger.LogTimeFormatInUTC))
logger.info(message)
add_event(op=WALAEventOperation.AgentUpgrade, message=message, log_event=False)

if self._next_update_time <= now:
self._update_time_refreshed = False
return True
return False

Expand Down Expand Up @@ -148,16 +152,24 @@ def retrieve_agent_version(self, agent_family, goal_state):
largest_version = self._get_largest_version(self._agent_manifest)
self._version = largest_version

def is_retrieved_version_allowed_to_update(self, agent_family):
def is_retrieved_version_allowed_to_update(self, agent_family, initial_update):
"""
checks update is spread per (as specified in the conf.get_self_update_hotfix_frequency() or conf.get_self_update_regular_frequency())
or if version below than current version
return false when we don't allow updates.
we don't allow new version update, if
1) The version is not greater than current version
2) if current time is before next update time
Allow the update, if
1) Initial update
2) If current time is on or after next update time
"""
if not self._is_new_agent_allowed_update():
if self._version <= CURRENT_VERSION:
return False

if self._version <= CURRENT_VERSION:
# very first update need to proceed without any delay
if initial_update:
return True

if not self._new_agent_allowed_now_to_update():
return False

return True
Expand Down
55 changes: 43 additions & 12 deletions tests/ga/test_agent_update_handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import contextlib
import json
import os
import random
import time

from azurelinuxagent.common import conf
from azurelinuxagent.common.event import WALAEventOperation
Expand Down Expand Up @@ -29,7 +31,7 @@ def setUp(self):
clear_singleton_instances(ProtocolUtil)

@contextlib.contextmanager
def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, initial_update_attempted=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None):
def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, initial_update_attempted=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None, mock_random_update_time=True):
# Default to DATA_FILE of test_data parameter raises the pylint warning
# W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value)
test_data = DATA_FILE if test_data is None else test_data
Expand Down Expand Up @@ -61,14 +63,25 @@ def put_handler(url, *args, **_):
if initial_update_attempted:
open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close()

original_randint = random.randint

def _mock_random_update_time(a, b):
if mock_random_update_time:
return 0
# if normal/hotfix frequency is 1 second, return 0.001 to simulate the update time
if b == 1:
return 0.001
return original_randint(a, b)

with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled):
with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency):
with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"):
with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True):
with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry:
agent_update_handler = get_agent_update_handler(protocol)
agent_update_handler._protocol = protocol
yield agent_update_handler, mock_telemetry
with patch("azurelinuxagent.ga.self_update_version_updater.random.randint", side_effect=_mock_random_update_time):
with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"):
with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True):
with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry:
agent_update_handler = get_agent_update_handler(protocol)
agent_update_handler._protocol = protocol
yield agent_update_handler, mock_telemetry

def _assert_agent_directories_available(self, versions):
for version in versions:
Expand Down Expand Up @@ -130,7 +143,7 @@ def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self):
self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"])
self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason))

def test_it_should_not_update_to_largest_version_if_time_window_not_elapsed(self):
def test_it_should_not_update_to_largest_version_if_manifest_download_time_not_elapsed(self):
self.prepare_agents(count=1)

data_file = DATA_FILE.copy()
Expand All @@ -146,21 +159,39 @@ def test_it_should_not_update_to_largest_version_if_time_window_not_elapsed(self
self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")),
"New agent directory should not be found")

def test_it_should_update_to_largest_version_if_time_window_elapsed(self):
def test_it_should_not_do_self_update_if_update_time_is_not_elapsed(self):
self.prepare_agents(count=1)

data_file = DATA_FILE.copy()
data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml"
with self._get_agent_update_handler(test_data=data_file, mock_random_update_time=False) as (agent_update_handler, _):
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")),
"New agent directory should not be found")
agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml")
agent_update_handler._protocol.mock_wire_data.set_incarnation(2)
agent_update_handler._protocol.client.update_goal_state()
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")),
"New agent directory should not be found")

def test_it_should_update_to_largest_version_after_time_window_elapsed(self):
self.prepare_agents(count=1)

data_file = DATA_FILE.copy()
data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml"
with patch("azurelinuxagent.common.conf.get_self_update_hotfix_frequency", return_value=0.001):
with patch("azurelinuxagent.common.conf.get_self_update_regular_frequency", return_value=0.001):
with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
with patch("azurelinuxagent.common.conf.get_self_update_hotfix_frequency", return_value=1):
with patch("azurelinuxagent.common.conf.get_self_update_regular_frequency", return_value=1):
with self._get_agent_update_handler(test_data=data_file, mock_random_update_time=False) as (agent_update_handler, mock_telemetry):
with self.assertRaises(AgentUpgradeExitException) as context:
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")),
"New agent directory should not be found")
agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml")
agent_update_handler._protocol.mock_wire_data.set_incarnation(2)
agent_update_handler._protocol.client.update_goal_state()
# sleeping for update window to elapse
time.sleep(0.1)
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
self._assert_update_discovered_from_agent_manifest(mock_telemetry, inc=2, version="99999.0.0.0")
self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"])
Expand Down
25 changes: 18 additions & 7 deletions tests/ga/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import glob
import json
import os
import random
import re
import shutil
import stat
Expand Down Expand Up @@ -1326,7 +1327,7 @@ def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequ

@contextlib.contextmanager
def __get_update_handler(self, iterations=1, test_data=None,
reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0, initial_update_attempted=True):
reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=10, normal_frequency=10, initial_update_attempted=True, mock_random_update_time=True):

if initial_update_attempted:
open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close()
Expand Down Expand Up @@ -1354,11 +1355,22 @@ def put_handler(url, *args, **_):
protocol.aggregate_status = json.loads(args[0])
return MockHttpResponse(status=201)

original_randint = random.randint

def _mock_random_update_time(a, b):
if mock_random_update_time:
return 0
if b == 1: # if normal/hotfix frequency is 1 second, return 0.001 to simulate the update time
return 0.001
return original_randint(a, b)

protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler)
with self.create_conf_mocks(autoupdate_frequency, hotfix_frequency, normal_frequency):
with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry:
update_handler._protocol = protocol
yield update_handler, mock_telemetry
with patch("azurelinuxagent.ga.self_update_version_updater.random.randint",
side_effect=_mock_random_update_time):
with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry:
update_handler._protocol = protocol
yield update_handler, mock_telemetry

def __assert_exit_code_successful(self, update_handler):
self.assertEqual(0, update_handler.get_exit_code(), "Exit code should be 0")
Expand Down Expand Up @@ -1605,8 +1617,7 @@ def reload_conf(url, protocol):
data_file = wire_protocol_data.DATA_FILE.copy()
# This is to fail the agent update at first attempt so that agent doesn't go through update
data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml"
with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf,
hotfix_frequency=10, normal_frequency=10) as (update_handler, _):
with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, mock_random_update_time=False) as (update_handler, _):
update_handler._protocol.mock_wire_data.set_incarnation(2)
update_handler.run(debug=True)

Expand Down Expand Up @@ -1645,7 +1656,7 @@ def reload_conf(url, protocol):
data_file = wire_protocol_data.DATA_FILE.copy()
data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml"
with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf,
hotfix_frequency=0.001, normal_frequency=0.001) as (update_handler, mock_telemetry):
hotfix_frequency=1, normal_frequency=1, mock_random_update_time=False) as (update_handler, mock_telemetry):
update_handler._protocol.mock_wire_data.set_incarnation(2)
update_handler.run(debug=True)

Expand Down

0 comments on commit eb59fe7

Please sign in to comment.