Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(support-bundle): agent with node-selector and taint-toleration #1286

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions manager/integration/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@
SETTING_SNAPSHOT_FAST_REPLICA_REBUILD_ENABLED = "fast-replica-rebuild-enabled"
SETTING_CONCURRENT_VOLUME_BACKUP_RESTORE = \
"concurrent-volume-backup-restore-per-node-limit"
SETTING_NODE_SELECTOR = "system-managed-components-node-selector"

SNAPSHOT_DATA_INTEGRITY_IGNORED = "ignored"
SNAPSHOT_DATA_INTEGRITY_DISABLED = "disabled"
Expand All @@ -206,6 +207,8 @@

KUBERNETES_STATUS_LABEL = "KubernetesStatus"

MASTER_NODE_TAINT = "node-role.kubernetes.io/master=true:NoExecute;node-role.kubernetes.io/master=true:NoSchedule" # NOQA

# https://github.com/kubernetes/kubernetes/blob/a9f0db16614ae62563ead2018f1692407bd93d8f/pkg/apis/scheduling/types.go#L29 # NOQA
PRIORITY_CLASS_MAX = 1000000000
PRIORITY_CLASS_MIN = 1
Expand Down Expand Up @@ -3009,19 +3012,23 @@ def wait_longhorn_node_zone_reset(client):
assert lh_node.zone == ''


def set_k8s_node_zone_label(core_api, node_name, zone_name):
k8s_zone_label = get_k8s_zone_label()

def set_k8s_node_label(core_api, node_name, key, value):
payload = {
"metadata": {
"labels": {
k8s_zone_label: zone_name}
key: value}
}
}

core_api.patch_node(node_name, body=payload)


def set_k8s_node_zone_label(core_api, node_name, zone_name):
k8s_zone_label = get_k8s_zone_label()

set_k8s_node_label(core_api, node_name, k8s_zone_label, zone_name)


def get_k8s_zone_label():
ver_api = get_version_api_client()
k8s_ver_data = ver_api.get_code()
Expand Down Expand Up @@ -3161,6 +3168,9 @@ def reset_settings(client):
for setting in client.list_setting():
setting_name = setting.name
setting_default_value = setting.definition.default
if setting_name == "taint-toleration":
setting_default_value = MASTER_NODE_TAINT

setting_readonly = setting.definition.readOnly

# We don't provide the setup for the storage network, hence there is no
Expand Down Expand Up @@ -4864,6 +4874,9 @@ def get_engine_image_status_value(client, ei_name):

def update_setting(client, name, value):
setting = client.by_id_setting(name)
if name == "taint-toleration":
value = value + ";" + MASTER_NODE_TAINT

client.update(setting, value=value)


Expand Down Expand Up @@ -5189,13 +5202,17 @@ def delete_support_bundle(node_id, name, client):
return requests.delete(support_bundle_url)


def download_support_bundle(node_id, name, client): # NOQA
def download_support_bundle(node_id, name, client, target_path=""): # NOQA
url = get_support_bundle_url(client)
support_bundle_url = '{}/{}/{}'.format(url, node_id, name)
download_url = '{}/download'.format(support_bundle_url)
r = requests.get(download_url, allow_redirects=True, timeout=300)
r.raise_for_status()

if target_path != "":
with open(target_path, 'wb') as f:
f.write(r.content)


def get_all_support_bundle_manager_deployments(apps_api): # NOQA
name_prefix = 'longhorn-support-bundle-manager'
Expand Down
56 changes: 56 additions & 0 deletions manager/integration/tests/node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pytest

from kubernetes import client as k8sclient

from common import get_self_host_id


@pytest.fixture(
params=[("foo/bar", "test", "NoSchedule"),
("foo", "", "NoSchedule")]
)
def taint_nodes_exclude_self(request):
taint = k8sclient.V1Taint(
key=request.param[0],
value=request.param[1],
effect=request.param[2],
)

self_host_id = get_self_host_id()

api = k8sclient.CoreV1Api()
node_items = api.list_node().items
saved_nodes = []
for node in node_items:
if node.metadata.name == self_host_id:
continue

saved_nodes.append(node)

if node.spec.taints is None:
taints = [taint]
else:
taints = node.spec.taints + [taint]
payload = {
"spec": {
"taints": taints
}
}
api.patch_node(node.metadata.name, body=payload)

yield saved_nodes

for node in saved_nodes:
if node.metadata.name == self_host_id:
continue

if node.spec.taints is None:
taints = []
else:
taints = node.spec.taints
payload = {
"spec": {
"taints": taints
}
}
api.patch_node(node.metadata.name, body=payload)
121 changes: 121 additions & 0 deletions manager/integration/tests/test_support_bundle.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
import pytest
import os
import zipfile

from tempfile import TemporaryDirectory

from node import taint_nodes_exclude_self # NOQA

from common import apps_api # NOQA
from common import client # NOQA
from common import core_api # NOQA

from common import check_all_support_bundle_managers_deleted
from common import create_support_bundle
from common import delete_and_wait_deployment
from common import download_support_bundle
from common import get_all_support_bundle_manager_deployments
from common import set_k8s_node_label
from common import update_setting
from common import wait_for_support_bundle_cleanup
from common import wait_for_support_bundle_state

from common import SETTING_NODE_SELECTOR
from common import SETTING_SUPPORT_BUNDLE_FAILED_LIMIT
from common import SETTING_TAINT_TOLERATION


@pytest.mark.support_bundle # NOQA
Expand Down Expand Up @@ -141,3 +151,114 @@ def create_failed_support_bundles(client, apps_api, number=1): # NOQA
namespace=deployments[0].metadata.namespace
)
wait_for_support_bundle_state("Error", node_id, name, client)


@pytest.mark.support_bundle # NOQA
def test_support_bundle_agent_with_node_selector(client, core_api, request): # NOQA
"""
Scenario: support bundle agent should respect node selector

Issue: https://github.com/longhorn/longhorn/issues/5614

Given there are some nodes labeled
And "system-managed-components-node-selector" is set with node label

When a support bundle is generated

Then should be able to download the support bundle successfully
And support bundle should include only the labeled nodes in node collection

"""
nodes = client.list_node()
labeled_nodes = [nodes[1], nodes[2]]
for node in labeled_nodes:
set_k8s_node_label(core_api, node.name, "foo", "bar")

def finalizer():
for node in labeled_nodes:
set_k8s_node_label(core_api, node.name, "foo", None)
update_setting(client, SETTING_NODE_SELECTOR, None)
request.addfinalizer(finalizer)

update_setting(client, SETTING_NODE_SELECTOR, "foo:bar")

resp = create_support_bundle(client)
node_id = resp['id']
name = resp['name']

wait_for_support_bundle_state("ReadyForDownload", node_id, name, client)

# The temporary directory will be automatically deleted outside of the
# "with" context manager.
with TemporaryDirectory(prefix="supportbundle-") as temp_dir:
download_path = f'{temp_dir}/{0}.zip'.format(name)
download_support_bundle(node_id, name, client,
target_path=download_path)

with zipfile.ZipFile(download_path, 'r') as zip:
node_names = [f"{node.name}" for node in labeled_nodes]
check_bundled_nodes_matches(node_names, zip, temp_dir)

wait_for_support_bundle_cleanup(client)
check_all_support_bundle_managers_deleted()


def check_bundled_nodes_matches(node_names, zip, temp_dir):
expect_node_zips = [f"{node}.zip" for node in node_names]
bundle_name = os.path.dirname(zip.namelist()[0])
bundle_node_dir = f'{bundle_name}/nodes'
bundle_nodes = [
f for f in zip.namelist() if f.startswith(bundle_node_dir)
]

for node in bundle_nodes:
zip.extract(node, f'{temp_dir}')

node_zips = os.listdir(f'{temp_dir}/{bundle_name}/nodes')
assert set(node_zips) == set(expect_node_zips), \
f'Nodes zipped in bundle do not match. \n' \
f'Expect = {expect_node_zips}\n' \
f'Got = {node_zips}\n'


@pytest.mark.support_bundle # NOQA
def test_support_bundle_agent_with_taint_toleration(client, core_api, taint_nodes_exclude_self): # NOQA
"""
Scenario: support bundle agent should respect taint toleration

Issue: https://github.com/longhorn/longhorn/issues/5614

Given there are some tainted nodes in the cluster
And Longhorn tolerates the tainted nodes with setting "taint-toleration"

When a support bundle is generated

Then should be able to download the support bundle successfully
And support bundle should include all tainted nodes in node collection

"""
# The taint-toleration is set up to match the "taint_nodes_exclude_self"
# fixture.
update_setting(client, SETTING_TAINT_TOLERATION,
"foo/bar=test:NoSchedule; foo:NoSchedule")

resp = create_support_bundle(client)
node_id = resp['id']
name = resp['name']

wait_for_support_bundle_state("ReadyForDownload", node_id, name, client)

# The temporary directory will be automatically deleted outside of the
# "with" context manager.
with TemporaryDirectory(prefix="supportbundle-") as temp_dir:
download_path = f'{temp_dir}/{0}.zip'.format(name)
download_support_bundle(node_id, name, client,
target_path=download_path)

with zipfile.ZipFile(download_path, 'r') as zip:
nodes = core_api.list_node()
node_names = [f"{node.metadata.name}" for node in nodes.items]
check_bundled_nodes_matches(node_names, zip, temp_dir)

wait_for_support_bundle_cleanup(client)
check_all_support_bundle_managers_deleted()