33from __future__ import annotations
44
55import random
6- import re
76import time
8- from typing import List , Optional , cast
7+ from typing import List , cast
98
109from assertpy import assert_that
1110from microsoft .testsuites .cpu .common import (
2928)
3029from lisa .environment import Environment
3130from lisa .node import RemoteNode
32- from lisa .tools import (
33- Ethtool ,
34- Fio ,
35- Iperf3 ,
36- KernelConfig ,
37- Kill ,
38- Lscpu ,
39- Lsvmbus ,
40- Modprobe ,
41- Reboot ,
42- )
31+ from lisa .tools import Ethtool , Fio , Iperf3 , Kill , Lscpu , Lsvmbus , Reboot
32+ from lisa .tools .lsvmbus import HV_NETVSC_CLASS_ID
4333from lisa .util import SkippedException
4434
4535
@@ -177,70 +167,12 @@ def verify_cpu_offline_network_workload(
177167 def _clamp_channels (self , val : int ) -> int :
178168 return max (1 , min (int (val ), 64 ))
179169
180- def _read_max_supported_structured (
181- self , node : Node , log : Optional [Logger ]
182- ) -> Optional [int ]:
183- try :
184- info = node .tools [Ethtool ].get_device_channels_info ("eth0" , True )
185- candidates = [
186- getattr (info , "max_combined" , None ),
187- getattr (info , "max_channels" , None ),
188- getattr (info , "max_current" , None ),
189- ]
190-
191- for c in candidates :
192- if isinstance (c , str ) and c .isdigit ():
193- c = int (c )
194- if isinstance (c , int ) and c > 1 :
195- val = self ._clamp_channels (c )
196- if log :
197- log .debug (
198- "Structured max channels found: %d (candidates=%s)" ,
199- val ,
200- candidates ,
201- )
202- return val
203- return None
204-
205- except Exception as e :
206- if log :
207- log .debug ("Structured channels info failed: %s" , e )
208- return None
209-
210- def _read_max_supported_raw (
211- self , node : Node , log : Optional [Logger ]
212- ) -> Optional [int ]:
213- try :
214- r = node .execute ("ethtool -l eth0" , sudo = True )
215- raw = r .stdout .strip ()
216- if log :
217- log .debug ("Raw `ethtool -l` output:\n %s" , raw )
218-
219- block = re .search (r"(?is)pre-?set\s+maximums:?(.*?)(?:current|$)" , raw )
220- if block :
221- m = re .search (r"(?im)^\s*Combined\s*:\s*(\d+)\s*$" , block .group (1 ))
222- if m :
223- return self ._clamp_channels (int (m .group (1 )))
224-
225- ms = re .findall (r"(?im)^\s*Combined\s*:\s*(\d+)\s*$" , raw )
226- if ms :
227- return self ._clamp_channels (max (int (x ) for x in ms ))
228-
229- ms2 = re .findall (r"(?im)(?:Maximum|Combined maximum)\s*:\s*(\d+)" , raw )
230- if ms2 :
231- return self ._clamp_channels (max (int (x ) for x in ms2 ))
232-
233- return None
234-
235- except Exception as e :
236- if log :
237- log .debug ("Raw ethtool -l parse failed: %s" , e )
238- return None
239-
240170 def _read_max_supported (self , node : Node ) -> int :
241171 """
242172 Return conservative device max 'combined' channels for eth0.
243- Order: ethtool(max_combined/max_channels) -> lsvmbus(queue count) -> threads.
173+ Fallback strategy: Try to collect all possible candidates from ethtool fields
174+ (max_combined, max_channels, max_current, current_channels); if none are found,
175+ fall back to lsvmbus queue count; if that fails, fall back to thread count.
244176 Always clamp to [1, 64].
245177 """
246178 try :
@@ -252,24 +184,31 @@ def _read_max_supported(self, node: Node) -> int:
252184 try :
253185 candidates .append (int (v ))
254186 except Exception :
187+ # Ignore values that cannot be converted to int
188+ # (may be missing or malformed)
255189 pass
256190 cur = getattr (info , "current_channels" , None )
257191 if cur is not None :
258192 try :
259193 candidates .append (int (cur ))
260194 except Exception :
195+ # Ignore values that cannot be converted to int
196+ # (may be missing or malformed)
261197 pass
262198 if candidates :
263199 return max (1 , min (max (candidates ), 64 ))
264200 except Exception :
201+ # Ignore ethtool exceptions to allow fallback to lsvmbus method
265202 pass
266203
267204 try :
268205 chans = node .tools [Lsvmbus ].get_device_channels (force_run = True )
269206 for ch in chans :
270- if ch .class_id == "f8615163-df3e-46c5-913f-f2d2f965ed0e" : # hv_netvsc
207+ if ch .class_id == HV_NETVSC_CLASS_ID :
271208 return max (1 , min (len (ch .channel_vp_map ), 64 ))
272209 except Exception :
210+ # Ignore lsvmbus exceptions to allow fallback to threads method
211+ # (lsvmbus may not be available)
273212 pass
274213
275214 threads = node .tools [Lscpu ].get_thread_count ()
@@ -336,14 +275,14 @@ def _pick_target_not_eq_current(
336275 return min (max (tgt , lower ), upper )
337276
338277 def _verify_no_irq_on_offline (
339- self , node : Node , offline : List [int ], expect_len : int
278+ self , node : Node , offline : List [str ], expect_len : int
340279 ) -> None :
341280 """
342281 Assert NIC channel count and that no IRQ is routed to offline CPUs.
343282 """
344283 chans = node .tools [Lsvmbus ].get_device_channels (force_run = True )
345284 for ch in chans :
346- if ch .class_id == "f8615163-df3e-46c5-913f-f2d2f965ed0e" :
285+ if ch .class_id == HV_NETVSC_CLASS_ID :
347286 assert_that (ch .channel_vp_map ).is_length (expect_len )
348287 for vp in ch .channel_vp_map :
349288 assert_that (vp .target_cpu ).is_not_in (offline )
@@ -424,10 +363,9 @@ def verify_cpu_offline_channel_add(self, log: Logger, node: Node) -> None:
424363
425364 # ---------- Phase 2: CPUs back online ----------
426365 set_cpu_state_serial (log , node , idle , CPUState .ONLINE )
427- if node .tools [KernelConfig ].is_built_as_module ("CONFIG_HYPERV_NET" ):
428- node .tools [Modprobe ].reload ("hv_netvsc" )
429- else :
430- node .tools [Reboot ].reboot ()
366+ # Always reboot to ensure network stack is properly reinitialized
367+ # after CPU hotplug operations to avoid SSH connection issues
368+ node .tools [Reboot ].reboot ()
431369
432370 threads2 = node .tools [Lscpu ].get_thread_count ()
433371 dev_max2 = self ._read_max_supported (node )
@@ -449,14 +387,12 @@ def verify_cpu_offline_channel_add(self, log: Logger, node: Node) -> None:
449387 dev_max2 ,
450388 )
451389
452- self ._verify_no_irq_on_offline (node , idle , new2 )
453-
454390 finally :
455391 # ---------- Cleanup: always restore ----------
456392 try :
457393 set_cpu_state_serial (log , node , idle , CPUState .ONLINE )
458394 except Exception as e :
459- log .warning ("Failed to bring CPUs online during cleanup: %s" , e )
395+ log .error ("Failed to bring CPUs online during cleanup: %s" , e )
460396
461397 try :
462398 # Re-read device cap for a safe restore
@@ -467,6 +403,4 @@ def verify_cpu_offline_channel_add(self, log: Logger, node: Node) -> None:
467403 node .tools [Ethtool ].change_device_channels_info ("eth0" , safe_origin )
468404 log .debug ("Restored channels to origin value: %d" , safe_origin )
469405 except Exception as e :
470- log .warning (
471- "Restore channels failed (target=%d): %s" , origin_channels , e
472- )
406+ log .error ("Restore channels failed (target=%d): %s" , origin_channels , e )
0 commit comments