Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(robot): migrate test_replica_auto_balance_disk_in_pressure #2167

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion e2e/keywords/common.resource
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Set test environment
${host_provider}= Get Environment Variable HOST_PROVIDER
${disk_path}= Set Variable If "${host_provider}" == "harvester" /dev/vdc /dev/xvdh
FOR ${worker_node} IN @{worker_nodes}
add_disk ${worker_node} block ${disk_path}
add_disk block-disk ${worker_node} block ${disk_path}
END

Cleanup test resources
Expand Down
32 changes: 31 additions & 1 deletion e2e/keywords/node.resource
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ Documentation Node Keywords

Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/node_keywords.py
Library ../libs/keywords/volume_keywords.py

*** Keywords ***
Add ${disk_type} type disk ${disk_path} for all worker nodes
${worker_nodes}= get_worker_nodes
FOR ${worker_node} IN @{worker_nodes}
add_disk ${worker_node} ${disk_type} ${disk_path}
add_disk ${disk_type}-disk ${worker_node} ${disk_type} ${disk_path}
END

Set node ${node_id} with
Expand All @@ -31,3 +32,32 @@ Disable node ${node_id} default disk
Enable node ${node_id} default disk
${node_name} = get_node_by_index ${node_id}
enable_default_disk ${node_name}

Disable disk ${disk_id} scheduling on node ${node_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
disable_disk ${node_name} ${disk_name}

Enable disk ${disk_id} scheduling on node ${node_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
enable_disk ${node_name} ${disk_name}

Check node ${node_id} disk ${disk_id} is in pressure
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
wait_for_disk_in_pressure ${node_name} ${disk_name}

Check node ${node_id} disk ${disk_id} is not in pressure
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
wait_for_disk_not_in_pressure ${node_name} ${disk_name}

Create ${disk_size} Gi disk ${disk_id} on node ${node_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
create_volume ${disk_name} size=${disk_size}Gi numberOfReplicas=1
attach_volume ${disk_name} ${node_name}
wait_for_volume_healthy ${disk_name}
${mount_path} = mount_disk ${disk_name} ${node_name}
add_disk ${disk_name} ${node_name} filesystem ${mount_path}
8 changes: 8 additions & 0 deletions e2e/keywords/replica.resource
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@ Documentation Longhorn replica related keywords

Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/replica_keywords.py
Library ../libs/keywords/node_keywords.py

*** Keywords ***
Volume ${volume_id} replica ${setting_name} should be ${setting_value}
${volume_name} = generate_name_with_suffix volume ${volume_id}
validate_replica_setting ${volume_name} ${setting_name} ${setting_value}

There should be replicas running on node ${node_id} disk ${disk_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
${disk_uuid} = get_disk_uuid ${node_name} ${disk_name}
${replicas} = get_replicas volume_name= node_name=${node_name} disk_uuid=${disk_uuid}
Should Be True len(${replicas}) > 0
8 changes: 8 additions & 0 deletions e2e/keywords/statefulset.resource
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name}
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
create_statefulset ${statefulset_name} ${volume_type} ${sc_name}

Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} storageclass and size ${size} Mi
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
create_statefulset ${statefulset_name} ${volume_type} ${sc_name} ${size}Mi

Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} storageclass and size ${size} Gi
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
create_statefulset ${statefulset_name} ${volume_type} ${sc_name} ${size}Gi

Scale statefulset ${statefulset_id} to ${replicaset_size}
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
scale_statefulset ${statefulset_name} ${replicaset_size}
Expand Down
10 changes: 10 additions & 0 deletions e2e/keywords/workload.resource
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Library ../libs/keywords/volume_keywords.py
Library ../libs/keywords/workload_keywords.py
Library ../libs/keywords/host_keywords.py
Library ../libs/keywords/k8s_keywords.py
Library ../libs/keywords/replica_keywords.py

*** Keywords ***
Create pod ${pod_id} using volume ${volume_id}
Expand Down Expand Up @@ -213,3 +214,12 @@ Delete Longhorn ${workload_kind} ${workload_name} pod
${pod_name} = get_workload_pod_name ${workload_name} longhorn-system
Log ${pod_name}
delete_pod ${pod_name} longhorn-system

Check volume of ${workload_kind} ${workload_id} replica on node ${node_id} disk ${disk_id}
${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id}
${volume_name} = get_workload_volume_name ${workload_name}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
${disk_uuid} = get_disk_uuid ${node_name} ${disk_name}
${replicas} = get_replicas volume_name=${volume_name} node_name=${node_name} disk_uuid=${disk_uuid}
Should Be True len(${replicas}) > 0
29 changes: 26 additions & 3 deletions e2e/libs/keywords/node_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@ def __init__(self):
def list_node_names_by_role(self, role):
return self.node.list_node_names_by_role(role)

def add_disk(self, node_name, type, path):
logging(f"Adding {type} type disk {path} to node {node_name}")
def mount_disk(self, disk_name, node_name):
logging(f"Mount device /dev/longhorn/{disk_name} on node {node_name}")
return self.node.mount_disk(disk_name, node_name)

def add_disk(self, disk_name, node_name, type, path):
logging(f"Adding {type} type disk {disk_name} {path} to node {node_name}")
disk = {
f"{type}-disk": {
f"{disk_name}": {
"diskType": type,
"path": path,
"allowScheduling": True
Expand All @@ -38,6 +42,13 @@ def set_node(self, node_name, allowScheduling=True, evictionRequested=False):
logging(f"Setting node {node_name}; scheduling={allowScheduling}; evictionRequested={evictionRequested}")
self.node.set_node(node_name, allowScheduling, evictionRequested)

def disable_disk(self, node_name, disk_name):
self.node.set_disk_scheduling(node_name, disk_name, allowScheduling=False)

def enable_disk(self, node_name, disk_name):
self.node.set_disk_scheduling(node_name, disk_name, allowScheduling=True)


def disable_node_scheduling(self, node_name):
self.node.set_node_scheduling(node_name, allowScheduling=False)

Expand All @@ -52,3 +63,15 @@ def reset_node_schedule(self):

def check_node_is_not_schedulable(self, node_name):
self.node.check_node_schedulable(node_name, schedulable="False")

def is_disk_in_pressure(self, node_name, disk_name):
return self.node.is_disk_in_pressure(node_name, disk_name)

def wait_for_disk_in_pressure(self, node_name, disk_name):
self.node.wait_for_disk_in_pressure(node_name, disk_name)

def wait_for_disk_not_in_pressure(self, node_name, disk_name):
self.node.wait_for_disk_not_in_pressure(node_name, disk_name)

def get_disk_uuid(self, node_name, disk_name):
return self.node.get_disk_uuid(node_name, disk_name)
3 changes: 3 additions & 0 deletions e2e/libs/keywords/replica_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ def __init__(self):

def validate_replica_setting(self, volume_name, setting_name, value):
return self.replica.validate_replica_setting(volume_name, setting_name, value)

def get_replicas(self, volume_name=None, node_name=None, disk_uuid=None):
return self.replica.get(volume_name, node_name, disk_uuid)
6 changes: 3 additions & 3 deletions e2e/libs/keywords/statefulset_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def cleanup_statefulsets(self):
for statefulset in statefulsets.items:
self.delete_statefulset(statefulset.metadata.name)

def create_statefulset(self, name, volume_type="RWO", sc_name="longhorn"):
logging(f'Creating {volume_type} statefulset {name} with {sc_name} storageclass')
create_statefulset(name, volume_type, sc_name)
def create_statefulset(self, name, volume_type="RWO", sc_name="longhorn", size=None):
logging(f'Creating {volume_type} statefulset {name} with {sc_name} storageclass and size = {size}')
create_statefulset(name, volume_type, sc_name, size)

def delete_statefulset(self, name):
logging(f'Deleting statefulset {name}')
Expand Down
62 changes: 57 additions & 5 deletions e2e/libs/node/node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import time
import re
import os

from kubernetes import client
from robot.libraries.BuiltIn import BuiltIn
Expand All @@ -9,15 +10,27 @@
from utility.utility import get_longhorn_client
from utility.utility import get_retry_count_and_interval
from utility.utility import logging

from node_exec import NodeExec

class Node:

DEFAULT_DISK_PATH = "/var/lib/longhorn/"
DEFAULT_VOLUME_PATH = "/dev/longhorn/"

def __init__(self):
self.retry_count, self.retry_interval = get_retry_count_and_interval()

def mount_disk(self, disk_name, node_name):
mount_path = os.path.join(self.DEFAULT_DISK_PATH, disk_name)
device_path = os.path.join(self.DEFAULT_VOLUME_PATH, disk_name)
cmd = f"mkdir -p {mount_path}"
res = NodeExec(node_name).issue_cmd(cmd)
cmd = f"mkfs.ext4 {device_path}"
res = NodeExec(node_name).issue_cmd(cmd)
cmd = f"mount {device_path} {mount_path}"
res = NodeExec(node_name).issue_cmd(cmd)
return mount_path

Comment on lines +23 to +33
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Handle potential exceptions during disk mounting.

The mount_disk method executes system commands that may fail. It's advisable to handle exceptions or check the result of each command to ensure robustness.

Apply this diff to handle exceptions:

 def mount_disk(self, disk_name, node_name):
     mount_path = os.path.join(self.DEFAULT_DISK_PATH, disk_name)
     device_path = os.path.join(self.DEFAULT_VOLUME_PATH, disk_name)
     cmd = f"mkdir -p {mount_path}"
-    NodeExec(node_name).issue_cmd(cmd)
+    if not NodeExec(node_name).issue_cmd(cmd):
+        raise Exception(f"Failed to create mount path {mount_path} on node {node_name}")
     cmd = f"mkfs.ext4 {device_path}"
-    NodeExec(node_name).issue_cmd(cmd)
+    if not NodeExec(node_name).issue_cmd(cmd):
+        raise Exception(f"Failed to format device {device_path} on node {node_name}")
     cmd = f"mount {device_path} {mount_path}"
-    NodeExec(node_name).issue_cmd(cmd)
+    if not NodeExec(node_name).issue_cmd(cmd):
+        raise Exception(f"Failed to mount {device_path} to {mount_path} on node {node_name}")
     return mount_path
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def mount_disk(self, disk_name, node_name):
mount_path = os.path.join(self.DEFAULT_DISK_PATH, disk_name)
device_path = os.path.join(self.DEFAULT_VOLUME_PATH, disk_name)
cmd = f"mkdir -p {mount_path}"
res = NodeExec(node_name).issue_cmd(cmd)
cmd = f"mkfs.ext4 {device_path}"
res = NodeExec(node_name).issue_cmd(cmd)
cmd = f"mount {device_path} {mount_path}"
res = NodeExec(node_name).issue_cmd(cmd)
return mount_path
def mount_disk(self, disk_name, node_name):
mount_path = os.path.join(self.DEFAULT_DISK_PATH, disk_name)
device_path = os.path.join(self.DEFAULT_VOLUME_PATH, disk_name)
cmd = f"mkdir -p {mount_path}"
if not NodeExec(node_name).issue_cmd(cmd):
raise Exception(f"Failed to create mount path {mount_path} on node {node_name}")
cmd = f"mkfs.ext4 {device_path}"
if not NodeExec(node_name).issue_cmd(cmd):
raise Exception(f"Failed to format device {device_path} on node {node_name}")
cmd = f"mount {device_path} {mount_path}"
if not NodeExec(node_name).issue_cmd(cmd):
raise Exception(f"Failed to mount {device_path} to {mount_path} on node {node_name}")
return mount_path
🧰 Tools
🪛 Ruff

31-31: Local variable res is assigned to but never used

Remove assignment to unused variable res

(F841)

def update_disks(self, node_name, disks):
node = get_longhorn_client().by_id_node(node_name)
for _ in range(self.retry_count):
Expand All @@ -37,9 +50,9 @@ def wait_for_disk_update(self, node_name, disk_num):
disks = node.disks
for d in disks:
if disks[d]["diskUUID"] == "" or \
not disks[d]["conditions"] or \
disks[d]["conditions"]["Ready"]["status"] != "True" or \
disks[d]["conditions"]["Schedulable"]["status"] != "True":
(disks[d]["allowScheduling"] and
(not disks[d]["conditions"] or
disks[d]["conditions"]["Ready"]["status"] != "True")):
all_updated = False
break
if all_updated:
Expand All @@ -59,15 +72,20 @@ def reset_disks(self, node_name):
for disk_name, disk in iter(node.disks.items()):
if disk.path != self.DEFAULT_DISK_PATH:
disk.allowScheduling = False
logging(f"Disabling scheduling disk {disk_name} on node {node_name}")
else:
disk.allowScheduling = True
logging(f"Enabling scheduling disk {disk_name} on node {node_name}")
self.update_disks(node_name, node.disks)

disks = {}
for disk_name, disk in iter(node.disks.items()):
if disk.path == self.DEFAULT_DISK_PATH:
disks[disk_name] = disk
disk.allowScheduling = True
logging(f"Keeping disk {disk_name} on node {node_name}")
else:
logging(f"Try to remove disk {disk_name} from node {node_name}")
logging(f"Removing disk {disk_name} from node {node_name}")
self.update_disks(node_name, disks)

def is_accessing_node_by_index(self, node):
Expand Down Expand Up @@ -183,6 +201,14 @@ def set_default_disk_scheduling(self, node_name, allowScheduling):
disk.allowScheduling = allowScheduling
self.update_disks(node_name, node.disks)

def set_disk_scheduling(self, node_name, disk_name, allowScheduling):
node = get_longhorn_client().by_id_node(node_name)

for name, disk in iter(node.disks.items()):
if name == disk_name:
disk.allowScheduling = allowScheduling
self.update_disks(node_name, node.disks)

Comment on lines +204 to +211
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Check for existence of the specified disk before updating scheduling.

In set_disk_scheduling, there is no check to confirm if the specified disk_name exists in node.disks. This may lead to a KeyError if the disk does not exist.

Apply this diff to add a check for disk existence:

 def set_disk_scheduling(self, node_name, disk_name, allowScheduling):
     node = get_longhorn_client().by_id_node(node_name)

+    if disk_name not in node.disks:
+        raise KeyError(f"Disk '{disk_name}' not found on node '{node_name}'")

     for name, disk in iter(node.disks.items()):
         if name == disk_name:
             disk.allowScheduling = allowScheduling
     self.update_disks(node_name, node.disks)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def set_disk_scheduling(self, node_name, disk_name, allowScheduling):
node = get_longhorn_client().by_id_node(node_name)
for name, disk in iter(node.disks.items()):
if name == disk_name:
disk.allowScheduling = allowScheduling
self.update_disks(node_name, node.disks)
def set_disk_scheduling(self, node_name, disk_name, allowScheduling):
node = get_longhorn_client().by_id_node(node_name)
if disk_name not in node.disks:
raise KeyError(f"Disk '{disk_name}' not found on node '{node_name}'")
for name, disk in iter(node.disks.items()):
if name == disk_name:
disk.allowScheduling = allowScheduling
self.update_disks(node_name, node.disks)

def check_node_schedulable(self, node_name, schedulable):
node = get_longhorn_client().by_id_node(node_name)
for _ in range(self.retry_count):
Expand All @@ -194,3 +220,29 @@ def check_node_schedulable(self, node_name, schedulable):
def is_node_schedulable(self, node_name):
node = get_longhorn_client().by_id_node(node_name)
return node["conditions"]["Schedulable"]["status"]

def is_disk_in_pressure(self, node_name, disk_name):
node = get_longhorn_client().by_id_node(node_name)
return node["disks"][disk_name]["conditions"]["Schedulable"]["reason"] == "DiskPressure"
Comment on lines +224 to +226
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling for missing disk conditions.

The method is_disk_in_pressure may raise a KeyError if the expected keys are missing in the disk's conditions. It's safer to handle such cases to prevent unexpected exceptions.

Apply this diff to safely access the dictionary keys:

 def is_disk_in_pressure(self, node_name, disk_name):
     node = get_longhorn_client().by_id_node(node_name)
-    return node["disks"][disk_name]["conditions"]["Schedulable"]["reason"] == "DiskPressure"
+    disk = node["disks"].get(disk_name, {})
+    conditions = disk.get("conditions", {})
+    schedulable = conditions.get("Schedulable", {})
+    reason = schedulable.get("reason", "")
+    return reason == "DiskPressure"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def is_disk_in_pressure(self, node_name, disk_name):
node = get_longhorn_client().by_id_node(node_name)
return node["disks"][disk_name]["conditions"]["Schedulable"]["reason"] == "DiskPressure"
def is_disk_in_pressure(self, node_name, disk_name):
node = get_longhorn_client().by_id_node(node_name)
disk = node["disks"].get(disk_name, {})
conditions = disk.get("conditions", {})
schedulable = conditions.get("Schedulable", {})
reason = schedulable.get("reason", "")
return reason == "DiskPressure"


def wait_for_disk_in_pressure(self, node_name, disk_name):
for i in range(self.retry_count):
is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
logging(f"Waiting for disk {disk_name} on node {node_name} in pressure ... ({i})")
if is_in_pressure:
break
time.sleep(self.retry_interval)
assert self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} in pressure failed: {get_longhorn_client().by_id_node(node_name)}"

def wait_for_disk_not_in_pressure(self, node_name, disk_name):
for i in range(self.retry_count):
is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
logging(f"Waiting for disk {disk_name} on node {node_name} not in pressure ... ({i})")
if not is_in_pressure:
break
time.sleep(self.retry_interval)
assert not self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} not in pressure failed: {get_longhorn_client().by_id_node(node_name)}"

Comment on lines +228 to +245
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Handle potential infinite loops in wait methods.

In wait_for_disk_in_pressure and wait_for_disk_not_in_pressure, if the condition is never met, the method might loop indefinitely. Consider adding a timeout or maximum retry limit.

Ensure that the loop respects self.retry_count and raises an exception when the condition isn't met within the retries.

Apply this diff to enforce maximum retries:

 def wait_for_disk_in_pressure(self, node_name, disk_name):
     for i in range(self.retry_count):
         is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
         logging(f"Waiting for disk {disk_name} on node {node_name} in pressure ... ({i})")
         if is_in_pressure:
             break
         time.sleep(self.retry_interval)
-    assert self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} in pressure failed: {get_longhorn_client().by_id_node(node_name)}"
+    else:
+        raise TimeoutError(f"Disk {disk_name} on node {node_name} did not enter pressure state within the expected time.")

 def wait_for_disk_not_in_pressure(self, node_name, disk_name):
     for i in range(self.retry_count):
         is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
         logging(f"Waiting for disk {disk_name} on node {node_name} not in pressure ... ({i})")
         if not is_in_pressure:
             break
         time.sleep(self.retry_interval)
-    assert not self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} not in pressure failed: {get_longhorn_client().by_id_node(node_name)}"
+    else:
+        raise TimeoutError(f"Disk {disk_name} on node {node_name} did not exit pressure state within the expected time.")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def wait_for_disk_in_pressure(self, node_name, disk_name):
for i in range(self.retry_count):
is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
logging(f"Waiting for disk {disk_name} on node {node_name} in pressure ... ({i})")
if is_in_pressure:
break
time.sleep(self.retry_interval)
assert self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} in pressure failed: {get_longhorn_client().by_id_node(node_name)}"
def wait_for_disk_not_in_pressure(self, node_name, disk_name):
for i in range(self.retry_count):
is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
logging(f"Waiting for disk {disk_name} on node {node_name} not in pressure ... ({i})")
if not is_in_pressure:
break
time.sleep(self.retry_interval)
assert not self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} not in pressure failed: {get_longhorn_client().by_id_node(node_name)}"
def wait_for_disk_in_pressure(self, node_name, disk_name):
for i in range(self.retry_count):
is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
logging(f"Waiting for disk {disk_name} on node {node_name} in pressure ... ({i})")
if is_in_pressure:
break
time.sleep(self.retry_interval)
else:
raise TimeoutError(f"Disk {disk_name} on node {node_name} did not enter pressure state within the expected time.")
def wait_for_disk_not_in_pressure(self, node_name, disk_name):
for i in range(self.retry_count):
is_in_pressure = self.is_disk_in_pressure(node_name, disk_name)
logging(f"Waiting for disk {disk_name} on node {node_name} not in pressure ... ({i})")
if not is_in_pressure:
break
time.sleep(self.retry_interval)
else:
raise TimeoutError(f"Disk {disk_name} on node {node_name} did not exit pressure state within the expected time.")

def get_disk_uuid(self, node_name, disk_name):
node = get_longhorn_client().by_id_node(node_name)
return node["disks"][disk_name]["diskUUID"]
Comment on lines +246 to +248
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Ensure disk existence before accessing disk UUID.

In get_disk_uuid, accessing node["disks"][disk_name]["diskUUID"] without checking may lead to a KeyError if the disk does not exist.

Apply this diff to add a check for disk existence:

 def get_disk_uuid(self, node_name, disk_name):
     node = get_longhorn_client().by_id_node(node_name)
-    return node["disks"][disk_name]["diskUUID"]
+    try:
+        return node["disks"][disk_name]["diskUUID"]
+    except KeyError:
+        raise KeyError(f"Disk '{disk_name}' not found on node '{node_name}'")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def get_disk_uuid(self, node_name, disk_name):
node = get_longhorn_client().by_id_node(node_name)
return node["disks"][disk_name]["diskUUID"]
def get_disk_uuid(self, node_name, disk_name):
node = get_longhorn_client().by_id_node(node_name)
try:
return node["disks"][disk_name]["diskUUID"]
except KeyError:
raise KeyError(f"Disk '{disk_name}' not found on node '{node_name}'")

2 changes: 1 addition & 1 deletion e2e/libs/replica/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
class Base(ABC):

@abstractmethod
def get(self, volume_name, node_name):
def get(self, volume_name, node_name, disk_uuid):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Implementation inconsistency found in derived classes

The verification reveals that not all implementing classes have been properly updated with the new disk_uuid parameter:

  • e2e/libs/replica/crd.py is missing the get() method implementation entirely
  • e2e/libs/replica/replica.py has inconsistent signature with optional disk_uuid=None
  • e2e/libs/replica/rest.py matches the base class signature correctly

The abstract base class requires a mandatory disk_uuid parameter, but implementations are either missing or inconsistent. This needs to be fixed to maintain the contract defined by the base class.

🔗 Analysis chain

LGTM! Verify implementations in derived classes.

The addition of disk_uuid parameter to the abstract get() method aligns well with the PR's objective of enhancing disk management capabilities.

Let's verify that all implementing classes have been updated with the new parameter:

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Check if all implementations of the get() method include the disk_uuid parameter
# Expected: All implementing classes should have the updated signature

# Search for class definitions that inherit from Base
ast-grep --pattern 'class $_ (Base):'

# Search for get method implementations
ast-grep --pattern 'def get(self, volume_name, node_name, $_):'

Length of output: 105


Script:

#!/bin/bash
# Let's try a different approach using ripgrep to find implementations

# First, find the base class definition
echo "=== Base class definition ==="
rg -l "class.*Base.*:" e2e/libs/replica/

# Then find all implementations of get method in replica-related files
echo -e "\n=== Get method implementations ==="
rg "def get\(self, volume_name, node_name" -A 1 e2e/libs/replica/

Length of output: 822

return NotImplemented

@abstractmethod
Expand Down
6 changes: 4 additions & 2 deletions e2e/libs/replica/crd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ class CRD(Base):
def __init__(self):
self.obj_api = client.CustomObjectsApi()

def get(self, volume_name, node_name=None):
def get(self, volume_name=None, node_name=None, disk_uuid=None):
label_selector = []
if volume_name != "":
if volume_name:
label_selector.append(f"longhornvolume={volume_name}")
if node_name:
label_selector.append(f"longhornnode={node_name}")
if disk_uuid:
label_selector.append(f"longhorndiskuuid={disk_uuid}")

replicas = self.obj_api.list_namespaced_custom_object(
group="longhorn.io",
Expand Down
4 changes: 2 additions & 2 deletions e2e/libs/replica/replica.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def __init__(self):
def delete(self, volume_name="", node_name=""):
return self.replica.delete(volume_name, node_name)

def get(self, volume_name, node_name):
return self.replica.get(volume_name, node_name)
def get(self, volume_name, node_name, disk_uuid=None):
return self.replica.get(volume_name, node_name, disk_uuid)

def wait_for_rebuilding_start(self, volume_name, node_name):
return self.replica.wait_for_rebuilding_start(volume_name,node_name)
Expand Down
2 changes: 1 addition & 1 deletion e2e/libs/replica/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Rest(Base):
def __init__(self):
pass

def get(self, volume_name, node_name):
def get(self, volume_name, node_name, disk_uuid):
return NotImplemented
Comment on lines +15 to 16
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Breaking change detected in method signature

The verification reveals that while the base class Base and implementation class CRD have been properly updated to include the disk_uuid parameter, the Replica class implementation has a mismatched signature where disk_uuid is marked as optional with a default value (disk_uuid=None). This inconsistency with the abstract base class could lead to runtime issues.

  • e2e/libs/replica/replica.py: Update the get method signature to match the base class by removing the default value:
    def get(self, volume_name, node_name, disk_uuid):
🔗 Analysis chain

Verify implementations of the updated method signature

The addition of the disk_uuid parameter is aligned with the test migration objectives. However, since this is an abstract method, we should ensure all implementing classes have been updated accordingly.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Find all implementations of the Rest class to verify they implement
# the updated get method signature

# Search for classes that inherit from Rest
ast-grep --pattern 'class $_ (Rest):'

# Search for get method implementations with the new signature
ast-grep --pattern 'def get(self, volume_name, node_name, disk_uuid):'

Length of output: 112


Script:

#!/bin/bash
# Let's try a different approach using ripgrep to find implementations

# Search for class definitions inheriting from Rest
rg "class.*\(Rest\)" -l

# Search for all get method implementations in the codebase
rg "def get\(.*volume_name.*node_name.*\):" -l

# Get content of the base class to understand the inheritance
cat e2e/libs/replica/rest.py

Length of output: 3259


Script:

#!/bin/bash
# Let's examine the implementation in other files that might be affected

# Check the content of files that might have the get method implementation
cat e2e/libs/replica/crd.py
cat e2e/libs/replica/base.py
cat e2e/libs/replica/replica.py

Length of output: 3895


def delete(self, volume_name, node_name):
Expand Down
6 changes: 5 additions & 1 deletion e2e/libs/workload/statefulset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from utility.utility import logging


def create_statefulset(statefulset_name, volume_type, sc_name):
def create_statefulset(statefulset_name, volume_type, sc_name, size):
filepath = "./templates/workload/statefulset.yaml"
with open(filepath, 'r') as f:
namespace = 'default'
Expand All @@ -30,6 +30,10 @@ def create_statefulset(statefulset_name, volume_type, sc_name):
if volume_type == 'RWX':
manifest_dict['spec']['volumeClaimTemplates'][0]['spec']['accessModes'][0] = 'ReadWriteMany'

# correct request storage size
if size:
manifest_dict['spec']['volumeClaimTemplates'][0]['spec']['resources']['requests']['storage'] = size
Comment on lines +33 to +35
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add validation and improve error handling for storage size.

The current implementation could be more robust:

  1. No validation of size format (e.g., '1Gi', '500Mi')
  2. No error handling for invalid sizes
  3. Nested dictionary access could fail if manifest structure changes

Consider this safer implementation:

-        # correct request storage size
-        if size:
-            manifest_dict['spec']['volumeClaimTemplates'][0]['spec']['resources']['requests']['storage'] = size
+        # Set PVC storage size if specified
+        if size:
+            # Validate size format
+            if not isinstance(size, str) or not any(size.endswith(unit) for unit in ['Ki', 'Mi', 'Gi', 'Ti']):
+                raise ValueError(f"Invalid storage size format: {size}. Expected format: '<number>Ki|Mi|Gi|Ti'")
+            
+            try:
+                template = manifest_dict['spec']['volumeClaimTemplates'][0]
+                template.setdefault('spec', {}).setdefault('resources', {}).setdefault('requests', {})['storage'] = size
+            except (KeyError, IndexError) as e:
+                raise ValueError(f"Invalid manifest structure: {str(e)}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# correct request storage size
if size:
manifest_dict['spec']['volumeClaimTemplates'][0]['spec']['resources']['requests']['storage'] = size
# Set PVC storage size if specified
if size:
# Validate size format
if not isinstance(size, str) or not any(size.endswith(unit) for unit in ['Ki', 'Mi', 'Gi', 'Ti']):
raise ValueError(f"Invalid storage size format: {size}. Expected format: '<number>Ki|Mi|Gi|Ti'")
try:
template = manifest_dict['spec']['volumeClaimTemplates'][0]
template.setdefault('spec', {}).setdefault('resources', {}).setdefault('requests', {})['storage'] = size
except (KeyError, IndexError) as e:
raise ValueError(f"Invalid manifest structure: {str(e)}")


api = client.AppsV1Api()
statefulset = api.create_namespaced_stateful_set(
body=manifest_dict,
Expand Down
Loading