From 3ba24234e1ad7252ff30b6f0d0228337e55ec32a Mon Sep 17 00:00:00 2001 From: mihirpat1 <112018033+mihirpat1@users.noreply.github.com> Date: Sun, 23 Nov 2025 22:54:35 -0800 Subject: [PATCH] [202505] [sff-mgr] Disable SFF manager support for all CMIS transceivers (#710) (#712) Description For certain CMIS transceivers which have transceiver type as "QSFP+ or later with CMIS", SFF manager also starts controlling Tx disable in addition to CMIS manager. Hence, SFF manager needs to skip handling such CMIS transceivers. Following code needs to be modified to addressed this sonic-platform-daemons/sonic-xcvrd/xcvrd/sff_mgr.py Lines 387 to 389 in 69ce387 # Procced only for QSFP28/QSFP+ transceiver if not (xcvr_type.startswith('QSFP28') or xcvr_type.startswith('QSFP+')): continue Motivation and Context For certain CMIS transceivers which have transceiver type as "QSFP+ or later with CMIS", the SFF manager was also controlling Tx disable in addition to the CMIS manager. This created a conflict where both managers would attempt to control the same transceiver, leading to potential race conditions and unexpected behavior. The previous type check only filtered based on the string prefix "QSFP28" or "QSFP+", which would incorrectly include CMIS modules that report as "QSFP+ or later with CMIS". Added explicit CMIS API check using common.is_cmis_api() to the transceiver filtering logic How Has This Been Tested? Verified CMIS transceivers with "QSFP+ or later with CMIS" type are now properly skipped by SFF manager Confirmed QSFP28 module continue to be handled correctly Unrelated change The pipeline pool has been updated to sonictest since sonic-common is not supported anymore. Additional Information (Optional) MSFT ADO - 35947863 --------- Signed-off-by: Mihir Patel --- azure-pipelines.yml | 2 +- sonic-xcvrd/tests/test_xcvrd.py | 41 +++++++++++++++++++++++++++++++++ sonic-xcvrd/xcvrd/sff_mgr.py | 33 +++++++++++++++----------- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index aec8d8d..0917d53 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,7 +18,7 @@ pr: include: - '*' -pool: sonic-common +pool: sonictest resources: containers: diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 05facdd..53e5f14 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -2170,6 +2170,47 @@ def test_SffManagerTask_get_admin_status(self, mock_get_cfg_port_tbl): mock_get_cfg_port_tbl.assert_called_once_with(0) mock_get_cfg_port_tbl.return_value.hget.assert_called_once_with(lport, 'admin_status') + @patch('xcvrd.xcvrd.helper_logger') + @patch('xcvrd.xcvrd.platform_chassis') + @patch('xcvrd.sff_mgr.PortChangeObserver') + def test_SffManagerTask_xcvr_api_none_in_task_worker(self, mock_observer, mock_chassis, mock_logger): + """Test the full task_worker flow when xcvr API is None""" + mock_observer_instance = MagicMock() + mock_observer_instance.handle_port_update_event = MagicMock(side_effect=[True, False]) + mock_observer.return_value = mock_observer_instance + + # Setup mock SFP that returns None for get_xcvr_api + mock_sfp = MagicMock() + mock_sfp.get_presence = MagicMock(return_value=True) + mock_sfp.get_xcvr_api = MagicMock(return_value=None) + mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) + + sff_manager_task = SffManagerTask(DEFAULT_NAMESPACE, + threading.Event(), + mock_chassis, + mock_logger) + + # Setup port_dict with necessary data + sff_manager_task.port_dict['Ethernet0'] = { + 'index': 1, + 'type': 'QSFP28', + 'subport': '0', + 'lanes': ['1', '2', '3', '4'], + 'host_tx_ready': 'true', + 'admin_status': 'up', + 'asic_id': 0 + } + + # Mock task_stopping_event to stop after processing once + sff_manager_task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + + # Run task_worker - it should handle the None API gracefully + sff_manager_task.task_worker() + + # Verify error was logged + assert any("skipping sff_mgr since no xcvr api!" in str(call) + for call in mock_logger.log_error.call_args_list) + @patch('xcvrd.xcvrd.helper_logger') @patch('xcvrd.xcvrd.platform_chassis') @patch('xcvrd.sff_mgr.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock())) diff --git a/sonic-xcvrd/xcvrd/sff_mgr.py b/sonic-xcvrd/xcvrd/sff_mgr.py index f23776e..3cc5404 100644 --- a/sonic-xcvrd/xcvrd/sff_mgr.py +++ b/sonic-xcvrd/xcvrd/sff_mgr.py @@ -14,6 +14,7 @@ from .xcvrd_utilities.port_event_helper import PortChangeObserver from .xcvrd_utilities.xcvr_table_helper import XcvrTableHelper + from xcvrd import xcvrd from sonic_platform_base.sonic_xcvr.api.public.sff8472 import Sff8472Api except ImportError as e: raise ImportError(str(e) + " - required module not found") @@ -348,8 +349,25 @@ def task_worker(self): # TRANSCEIVER_INFO table's XCVR_TYPE is not ready, meaning xcvr is not present continue + # double-check the HW presence before moving forward + sfp = self.platform_chassis.get_sfp(pport) + if not sfp.get_presence(): + self.log_error("{}: module not present!".format(lport)) + del self.port_dict[lport][self.XCVR_TYPE] + continue + try: + # Skip if XcvrApi is not supported + api = sfp.get_xcvr_api() + if api is None: + self.log_error( + "{}: skipping sff_mgr since no xcvr api!".format(lport)) + continue + except (AttributeError, NotImplementedError): + # Skip if these essential routines are not available + continue + # Procced only for QSFP28/QSFP+ transceiver - if not (xcvr_type.startswith('QSFP28') or xcvr_type.startswith('QSFP+')): + if not (xcvr_type.startswith('QSFP28') or xcvr_type.startswith('QSFP+')) or xcvrd.is_cmis_api(api): continue # Handle the case that host_tx_ready value in the local cache hasn't @@ -403,20 +421,7 @@ def task_worker(self): data[self.HOST_TX_READY], host_tx_ready_changed, data[self.ADMIN_STATUS], admin_status_changed)) - # double-check the HW presence before moving forward - sfp = self.platform_chassis.get_sfp(pport) - if not sfp.get_presence(): - self.log_error("{}: module not present!".format(lport)) - del self.port_dict[lport][self.XCVR_TYPE] - continue try: - # Skip if XcvrApi is not supported - api = sfp.get_xcvr_api() - if api is None: - self.log_error( - "{}: skipping sff_mgr since no xcvr api!".format(lport)) - continue - # Skip if it's not a paged memory device if api.is_flat_memory(): self.log_notice(