diff --git a/manager/integration/tests/common.py b/manager/integration/tests/common.py index bebd66f450..27fd9f5641 100644 --- a/manager/integration/tests/common.py +++ b/manager/integration/tests/common.py @@ -194,6 +194,7 @@ SETTING_SNAPSHOT_FAST_REPLICA_REBUILD_ENABLED = "fast-replica-rebuild-enabled" SETTING_CONCURRENT_VOLUME_BACKUP_RESTORE = \ "concurrent-volume-backup-restore-per-node-limit" +SETTING_NODE_SELECTOR = "system-managed-components-node-selector" SNAPSHOT_DATA_INTEGRITY_IGNORED = "ignored" SNAPSHOT_DATA_INTEGRITY_DISABLED = "disabled" @@ -206,6 +207,8 @@ KUBERNETES_STATUS_LABEL = "KubernetesStatus" +MASTER_NODE_TAINT = "node-role.kubernetes.io/master=true:NoExecute;node-role.kubernetes.io/master=true:NoSchedule" # NOQA + # https://github.com/kubernetes/kubernetes/blob/a9f0db16614ae62563ead2018f1692407bd93d8f/pkg/apis/scheduling/types.go#L29 # NOQA PRIORITY_CLASS_MAX = 1000000000 PRIORITY_CLASS_MIN = 1 @@ -3009,19 +3012,23 @@ def wait_longhorn_node_zone_reset(client): assert lh_node.zone == '' -def set_k8s_node_zone_label(core_api, node_name, zone_name): - k8s_zone_label = get_k8s_zone_label() - +def set_k8s_node_label(core_api, node_name, key, value): payload = { "metadata": { "labels": { - k8s_zone_label: zone_name} + key: value} } } core_api.patch_node(node_name, body=payload) +def set_k8s_node_zone_label(core_api, node_name, zone_name): + k8s_zone_label = get_k8s_zone_label() + + set_k8s_node_label(core_api, node_name, k8s_zone_label, zone_name) + + def get_k8s_zone_label(): ver_api = get_version_api_client() k8s_ver_data = ver_api.get_code() @@ -3161,6 +3168,9 @@ def reset_settings(client): for setting in client.list_setting(): setting_name = setting.name setting_default_value = setting.definition.default + if setting_name == "taint-toleration": + setting_default_value = MASTER_NODE_TAINT + setting_readonly = setting.definition.readOnly # We don't provide the setup for the storage network, hence there is no @@ -4864,6 +4874,9 @@ def get_engine_image_status_value(client, ei_name): def update_setting(client, name, value): setting = client.by_id_setting(name) + if name == "taint-toleration": + value = value + ";" + MASTER_NODE_TAINT + client.update(setting, value=value) @@ -5189,13 +5202,17 @@ def delete_support_bundle(node_id, name, client): return requests.delete(support_bundle_url) -def download_support_bundle(node_id, name, client): # NOQA +def download_support_bundle(node_id, name, client, target_path=""): # NOQA url = get_support_bundle_url(client) support_bundle_url = '{}/{}/{}'.format(url, node_id, name) download_url = '{}/download'.format(support_bundle_url) r = requests.get(download_url, allow_redirects=True, timeout=300) r.raise_for_status() + if target_path != "": + with open(target_path, 'wb') as f: + f.write(r.content) + def get_all_support_bundle_manager_deployments(apps_api): # NOQA name_prefix = 'longhorn-support-bundle-manager' diff --git a/manager/integration/tests/node.py b/manager/integration/tests/node.py new file mode 100644 index 0000000000..ea8bac3b27 --- /dev/null +++ b/manager/integration/tests/node.py @@ -0,0 +1,56 @@ +import pytest + +from kubernetes import client as k8sclient + +from common import get_self_host_id + + +@pytest.fixture( + params=[("foo/bar", "test", "NoSchedule"), + ("foo", "", "NoSchedule")] +) +def taint_nodes_exclude_self(request): + taint = k8sclient.V1Taint( + key=request.param[0], + value=request.param[1], + effect=request.param[2], + ) + + self_host_id = get_self_host_id() + + api = k8sclient.CoreV1Api() + node_items = api.list_node().items + saved_nodes = [] + for node in node_items: + if node.metadata.name == self_host_id: + continue + + saved_nodes.append(node) + + if node.spec.taints is None: + taints = [taint] + else: + taints = node.spec.taints + [taint] + payload = { + "spec": { + "taints": taints + } + } + api.patch_node(node.metadata.name, body=payload) + + yield saved_nodes + + for node in saved_nodes: + if node.metadata.name == self_host_id: + continue + + if node.spec.taints is None: + taints = [] + else: + taints = node.spec.taints + payload = { + "spec": { + "taints": taints + } + } + api.patch_node(node.metadata.name, body=payload) diff --git a/manager/integration/tests/test_support_bundle.py b/manager/integration/tests/test_support_bundle.py index b26215f893..5e72abae6f 100644 --- a/manager/integration/tests/test_support_bundle.py +++ b/manager/integration/tests/test_support_bundle.py @@ -1,18 +1,28 @@ import pytest +import os +import zipfile + +from tempfile import TemporaryDirectory + +from node import taint_nodes_exclude_self # NOQA from common import apps_api # NOQA from common import client # NOQA +from common import core_api # NOQA from common import check_all_support_bundle_managers_deleted from common import create_support_bundle from common import delete_and_wait_deployment from common import download_support_bundle from common import get_all_support_bundle_manager_deployments +from common import set_k8s_node_label from common import update_setting from common import wait_for_support_bundle_cleanup from common import wait_for_support_bundle_state +from common import SETTING_NODE_SELECTOR from common import SETTING_SUPPORT_BUNDLE_FAILED_LIMIT +from common import SETTING_TAINT_TOLERATION @pytest.mark.support_bundle # NOQA @@ -141,3 +151,114 @@ def create_failed_support_bundles(client, apps_api, number=1): # NOQA namespace=deployments[0].metadata.namespace ) wait_for_support_bundle_state("Error", node_id, name, client) + + +@pytest.mark.support_bundle # NOQA +def test_support_bundle_agent_with_node_selector(client, core_api, request): # NOQA + """ + Scenario: support bundle agent should respect node selector + + Issue: https://github.com/longhorn/longhorn/issues/5614 + + Given there are some nodes labeled + And "system-managed-components-node-selector" is set with node label + + When a support bundle is generated + + Then should be able to download the support bundle successfully + And support bundle should include only the labeled nodes in node collection + + """ + nodes = client.list_node() + labeled_nodes = [nodes[1], nodes[2]] + for node in labeled_nodes: + set_k8s_node_label(core_api, node.name, "foo", "bar") + + def finalizer(): + for node in labeled_nodes: + set_k8s_node_label(core_api, node.name, "foo", None) + update_setting(client, SETTING_NODE_SELECTOR, None) + request.addfinalizer(finalizer) + + update_setting(client, SETTING_NODE_SELECTOR, "foo:bar") + + resp = create_support_bundle(client) + node_id = resp['id'] + name = resp['name'] + + wait_for_support_bundle_state("ReadyForDownload", node_id, name, client) + + # The temporary directory will be automatically deleted outside of the + # "with" context manager. + with TemporaryDirectory(prefix="supportbundle-") as temp_dir: + download_path = f'{temp_dir}/{0}.zip'.format(name) + download_support_bundle(node_id, name, client, + target_path=download_path) + + with zipfile.ZipFile(download_path, 'r') as zip: + node_names = [f"{node.name}" for node in labeled_nodes] + check_bundled_nodes_matches(node_names, zip, temp_dir) + + wait_for_support_bundle_cleanup(client) + check_all_support_bundle_managers_deleted() + + +def check_bundled_nodes_matches(node_names, zip, temp_dir): + expect_node_zips = [f"{node}.zip" for node in node_names] + bundle_name = os.path.dirname(zip.namelist()[0]) + bundle_node_dir = f'{bundle_name}/nodes' + bundle_nodes = [ + f for f in zip.namelist() if f.startswith(bundle_node_dir) + ] + + for node in bundle_nodes: + zip.extract(node, f'{temp_dir}') + + node_zips = os.listdir(f'{temp_dir}/{bundle_name}/nodes') + assert set(node_zips) == set(expect_node_zips), \ + f'Nodes zipped in bundle do not match. \n' \ + f'Expect = {expect_node_zips}\n' \ + f'Got = {node_zips}\n' + + +@pytest.mark.support_bundle # NOQA +def test_support_bundle_agent_with_taint_toleration(client, core_api, taint_nodes_exclude_self): # NOQA + """ + Scenario: support bundle agent should respect taint toleration + + Issue: https://github.com/longhorn/longhorn/issues/5614 + + Given there are some tainted nodes in the cluster + And Longhorn tolerates the tainted nodes with setting "taint-toleration" + + When a support bundle is generated + + Then should be able to download the support bundle successfully + And support bundle should include all tainted nodes in node collection + + """ + # The taint-toleration is set up to match the "taint_nodes_exclude_self" + # fixture. + update_setting(client, SETTING_TAINT_TOLERATION, + "foo/bar=test:NoSchedule; foo:NoSchedule") + + resp = create_support_bundle(client) + node_id = resp['id'] + name = resp['name'] + + wait_for_support_bundle_state("ReadyForDownload", node_id, name, client) + + # The temporary directory will be automatically deleted outside of the + # "with" context manager. + with TemporaryDirectory(prefix="supportbundle-") as temp_dir: + download_path = f'{temp_dir}/{0}.zip'.format(name) + download_support_bundle(node_id, name, client, + target_path=download_path) + + with zipfile.ZipFile(download_path, 'r') as zip: + nodes = core_api.list_node() + node_names = [f"{node.metadata.name}" for node in nodes.items] + check_bundled_nodes_matches(node_names, zip, temp_dir) + + wait_for_support_bundle_cleanup(client) + check_all_support_bundle_managers_deleted()