Skip to content

Commit 68cae96

Browse files
address PR comments
1 parent dd6ead8 commit 68cae96

File tree

2 files changed

+24
-87
lines changed

2 files changed

+24
-87
lines changed

lisa/microsoft/testsuites/cpu/cpusuite.py

Lines changed: 21 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
from __future__ import annotations
44

55
import random
6-
import re
76
import time
8-
from typing import List, Optional, cast
7+
from typing import List, cast
98

109
from assertpy import assert_that
1110
from microsoft.testsuites.cpu.common import (
@@ -29,17 +28,8 @@
2928
)
3029
from lisa.environment import Environment
3130
from lisa.node import RemoteNode
32-
from lisa.tools import (
33-
Ethtool,
34-
Fio,
35-
Iperf3,
36-
KernelConfig,
37-
Kill,
38-
Lscpu,
39-
Lsvmbus,
40-
Modprobe,
41-
Reboot,
42-
)
31+
from lisa.tools import Ethtool, Fio, Iperf3, Kill, Lscpu, Lsvmbus, Reboot
32+
from lisa.tools.lsvmbus import HV_NETVSC_CLASS_ID
4333
from lisa.util import SkippedException
4434

4535

@@ -177,70 +167,12 @@ def verify_cpu_offline_network_workload(
177167
def _clamp_channels(self, val: int) -> int:
178168
return max(1, min(int(val), 64))
179169

180-
def _read_max_supported_structured(
181-
self, node: Node, log: Optional[Logger]
182-
) -> Optional[int]:
183-
try:
184-
info = node.tools[Ethtool].get_device_channels_info("eth0", True)
185-
candidates = [
186-
getattr(info, "max_combined", None),
187-
getattr(info, "max_channels", None),
188-
getattr(info, "max_current", None),
189-
]
190-
191-
for c in candidates:
192-
if isinstance(c, str) and c.isdigit():
193-
c = int(c)
194-
if isinstance(c, int) and c > 1:
195-
val = self._clamp_channels(c)
196-
if log:
197-
log.debug(
198-
"Structured max channels found: %d (candidates=%s)",
199-
val,
200-
candidates,
201-
)
202-
return val
203-
return None
204-
205-
except Exception as e:
206-
if log:
207-
log.debug("Structured channels info failed: %s", e)
208-
return None
209-
210-
def _read_max_supported_raw(
211-
self, node: Node, log: Optional[Logger]
212-
) -> Optional[int]:
213-
try:
214-
r = node.execute("ethtool -l eth0", sudo=True)
215-
raw = r.stdout.strip()
216-
if log:
217-
log.debug("Raw `ethtool -l` output:\n%s", raw)
218-
219-
block = re.search(r"(?is)pre-?set\s+maximums:?(.*?)(?:current|$)", raw)
220-
if block:
221-
m = re.search(r"(?im)^\s*Combined\s*:\s*(\d+)\s*$", block.group(1))
222-
if m:
223-
return self._clamp_channels(int(m.group(1)))
224-
225-
ms = re.findall(r"(?im)^\s*Combined\s*:\s*(\d+)\s*$", raw)
226-
if ms:
227-
return self._clamp_channels(max(int(x) for x in ms))
228-
229-
ms2 = re.findall(r"(?im)(?:Maximum|Combined maximum)\s*:\s*(\d+)", raw)
230-
if ms2:
231-
return self._clamp_channels(max(int(x) for x in ms2))
232-
233-
return None
234-
235-
except Exception as e:
236-
if log:
237-
log.debug("Raw ethtool -l parse failed: %s", e)
238-
return None
239-
240170
def _read_max_supported(self, node: Node) -> int:
241171
"""
242172
Return conservative device max 'combined' channels for eth0.
243-
Order: ethtool(max_combined/max_channels) -> lsvmbus(queue count) -> threads.
173+
Fallback strategy: Try to collect all possible candidates from ethtool fields
174+
(max_combined, max_channels, max_current, current_channels); if none are found,
175+
fall back to lsvmbus queue count; if that fails, fall back to thread count.
244176
Always clamp to [1, 64].
245177
"""
246178
try:
@@ -252,24 +184,31 @@ def _read_max_supported(self, node: Node) -> int:
252184
try:
253185
candidates.append(int(v))
254186
except Exception:
187+
# Ignore values that cannot be converted to int
188+
# (may be missing or malformed)
255189
pass
256190
cur = getattr(info, "current_channels", None)
257191
if cur is not None:
258192
try:
259193
candidates.append(int(cur))
260194
except Exception:
195+
# Ignore values that cannot be converted to int
196+
# (may be missing or malformed)
261197
pass
262198
if candidates:
263199
return max(1, min(max(candidates), 64))
264200
except Exception:
201+
# Ignore ethtool exceptions to allow fallback to lsvmbus method
265202
pass
266203

267204
try:
268205
chans = node.tools[Lsvmbus].get_device_channels(force_run=True)
269206
for ch in chans:
270-
if ch.class_id == "f8615163-df3e-46c5-913f-f2d2f965ed0e": # hv_netvsc
207+
if ch.class_id == HV_NETVSC_CLASS_ID:
271208
return max(1, min(len(ch.channel_vp_map), 64))
272209
except Exception:
210+
# Ignore lsvmbus exceptions to allow fallback to threads method
211+
# (lsvmbus may not be available)
273212
pass
274213

275214
threads = node.tools[Lscpu].get_thread_count()
@@ -336,14 +275,14 @@ def _pick_target_not_eq_current(
336275
return min(max(tgt, lower), upper)
337276

338277
def _verify_no_irq_on_offline(
339-
self, node: Node, offline: List[int], expect_len: int
278+
self, node: Node, offline: List[str], expect_len: int
340279
) -> None:
341280
"""
342281
Assert NIC channel count and that no IRQ is routed to offline CPUs.
343282
"""
344283
chans = node.tools[Lsvmbus].get_device_channels(force_run=True)
345284
for ch in chans:
346-
if ch.class_id == "f8615163-df3e-46c5-913f-f2d2f965ed0e":
285+
if ch.class_id == HV_NETVSC_CLASS_ID:
347286
assert_that(ch.channel_vp_map).is_length(expect_len)
348287
for vp in ch.channel_vp_map:
349288
assert_that(vp.target_cpu).is_not_in(offline)
@@ -424,10 +363,9 @@ def verify_cpu_offline_channel_add(self, log: Logger, node: Node) -> None:
424363

425364
# ---------- Phase 2: CPUs back online ----------
426365
set_cpu_state_serial(log, node, idle, CPUState.ONLINE)
427-
if node.tools[KernelConfig].is_built_as_module("CONFIG_HYPERV_NET"):
428-
node.tools[Modprobe].reload("hv_netvsc")
429-
else:
430-
node.tools[Reboot].reboot()
366+
# Always reboot to ensure network stack is properly reinitialized
367+
# after CPU hotplug operations to avoid SSH connection issues
368+
node.tools[Reboot].reboot()
431369

432370
threads2 = node.tools[Lscpu].get_thread_count()
433371
dev_max2 = self._read_max_supported(node)
@@ -449,14 +387,12 @@ def verify_cpu_offline_channel_add(self, log: Logger, node: Node) -> None:
449387
dev_max2,
450388
)
451389

452-
self._verify_no_irq_on_offline(node, idle, new2)
453-
454390
finally:
455391
# ---------- Cleanup: always restore ----------
456392
try:
457393
set_cpu_state_serial(log, node, idle, CPUState.ONLINE)
458394
except Exception as e:
459-
log.warning("Failed to bring CPUs online during cleanup: %s", e)
395+
log.error("Failed to bring CPUs online during cleanup: %s", e)
460396

461397
try:
462398
# Re-read device cap for a safe restore
@@ -467,6 +403,4 @@ def verify_cpu_offline_channel_add(self, log: Logger, node: Node) -> None:
467403
node.tools[Ethtool].change_device_channels_info("eth0", safe_origin)
468404
log.debug("Restored channels to origin value: %d", safe_origin)
469405
except Exception as e:
470-
log.warning(
471-
"Restore channels failed (target=%d): %s", origin_channels, e
472-
)
406+
log.error("Restore channels failed (target=%d): %s", origin_channels, e)

lisa/tools/lsvmbus.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
# \r\n\r\n
3131
PATTERN_VMBUS_DEVICE = re.compile(r"(VMBUS ID[\w\W]*?)(?=VMBUS ID|\Z)", re.MULTILINE)
3232

33+
# VMBus device class IDs
34+
HV_NETVSC_CLASS_ID = "f8615163-df3e-46c5-913f-f2d2f965ed0e"
35+
3336

3437
class ChannelVPMap:
3538
def __init__(self, vmbus_id: str, rel_id: str, cpu: str) -> None:

0 commit comments

Comments
 (0)