diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index ce49723..06385de 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -13,10 +13,15 @@ import time from datetime import datetime import sonic_platform -from sonic_py_common import daemon_base, logger +from sonic_py_common import daemon_base, logger, device_info, multi_asic from sonic_py_common.task_base import ProcessTaskBase from swsscommon import swsscommon +try: + from sonic_platform_base.sonic_sfp.sfputilhelper import SfpUtilHelper +except ImportError: + SfpUtilHelper = None + # TODO: Once we no longer support Python 2, we can eliminate this and get the # name using the 'name' field (e.g., `signal.SIGINT.name`) starting with Python 3.5 @@ -27,6 +32,9 @@ SYSLOG_IDENTIFIER = 'thermalctld' NOT_AVAILABLE = 'N/A' CHASSIS_INFO_KEY = 'chassis 1' PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO' +TRANSCEIVER_DOM_TEMPERATURE_TABLE = 'TRANSCEIVER_DOM_TEMPERATURE' +TRANSCEIVER_DOM_THRESHOLD_TABLE = 'TRANSCEIVER_DOM_THRESHOLD' +TRANSCEIVER_DOM_SENSOR_TABLE = 'TRANSCEIVER_DOM_SENSOR' INVALID_SLOT_OR_DPU = -1 ERR_UNKNOWN = 1 @@ -536,9 +544,15 @@ class TemperatureUpdater(logger.Logger): state_db = daemon_base.db_connect("STATE_DB") self.table = swsscommon.Table(state_db, TemperatureUpdater.TEMPER_INFO_TABLE_NAME) self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE) + self.xcvr_dom_temp_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_TEMPERATURE_TABLE) + self.xcvr_dom_threshold_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_THRESHOLD_TABLE) + self.xcvr_dom_sensor_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) self.chassis_table = None self.all_thermals = set() + # Initialize SfpUtilHelper for port index to logical port name mapping + self.sfp_util = self._init_sfp_util_helper() + self.is_chassis_system = chassis.is_modular_chassis() self.is_smartswitch_dpu = chassis.is_smartswitch() and chassis.is_dpu() self.is_chassis_upd_required = self.is_chassis_system or self.is_smartswitch_dpu @@ -566,6 +580,52 @@ class TemperatureUpdater(logger.Logger): for pek in phy_entity_keys: self.phy_entity_table._del(pek) + def _init_sfp_util_helper(self): + """ + Initialize SfpUtilHelper and read port table mappings. + This provides the physical_to_logical mapping for SFP temperature lookup. + + :return: SfpUtilHelper instance or None if initialization fails + """ + if SfpUtilHelper is None: + self.log_warning("SfpUtilHelper not available, SFP temperature from Redis disabled") + return None + + try: + sfp_util = SfpUtilHelper() + if multi_asic.is_multi_asic(): + (_, hwsku_path) = device_info.get_paths_to_platform_and_hwsku_dirs() + sfp_util.read_all_porttab_mappings(hwsku_path, multi_asic.get_num_asics()) + else: + port_config_file_path = device_info.get_path_to_port_config_file() + sfp_util.read_porttab_mappings(port_config_file_path, 0) + return sfp_util + except SystemExit: + self.log_warning("Failed to initialize SfpUtilHelper: port config not available") + return None + except Exception as e: + self.log_warning("Failed to initialize SfpUtilHelper: {}".format(e)) + return None + + def _get_port_name_by_index(self, sfp_index): + """ + Get logical port name for a given SFP index (0-based). + Uses SfpUtilHelper.get_physical_to_logical() API. + + :param sfp_index: SFP index (0-based) + :return: Logical port name or None if not found + """ + if self.sfp_util is None: + return None + + # SFP index is 0-based, but physical port index is 1-based + physical_index = sfp_index + 1 + logical_ports = self.sfp_util.get_physical_to_logical(physical_index) + if logical_ports and len(logical_ports) > 0: + # Return the first logical port (for breakout, this is the primary port with DOM data) + return logical_ports[0] + return None + def _log_on_status_changed(self, normal_status, normal_log, abnormal_log): """ Log when any status changed @@ -610,7 +670,12 @@ class TemperatureUpdater(logger.Logger): return available_thermals.add((thermal, parent_name, thermal_index)) - self._refresh_temperature_status(parent_name, thermal, thermal_index) + # TODO: This Redis-based approach for reading SFP temperature is temporary. + # It will be removed once all platforms migrate to handling optics temperature + # outside of thermalctld (e.g., via xcvrd or platform-specific daemons). + port_name = self._get_port_name_by_index(sfp_index) + if port_name: + self._refresh_temperature_status(parent_name, thermal, thermal_index, is_sfp=True, port_name=port_name) # As there are no modules present in DPU, this IF condition is not updated to consider DPU chassis if self.is_chassis_system: @@ -631,7 +696,12 @@ class TemperatureUpdater(logger.Logger): return available_thermals.add((thermal, sfp_name, thermal_index)) - self._refresh_temperature_status(sfp_name, thermal, thermal_index) + # TODO: This Redis-based approach for reading SFP temperature is temporary. + # It will be removed once all platforms migrate to handling optics temperature + # outside of thermalctld (e.g., via xcvrd or platform-specific daemons). + port_name = self._get_port_name_by_index(sfp_index) + if port_name: + self._refresh_temperature_status(sfp_name, thermal, thermal_index, is_sfp=True, port_name=port_name) for psu_index, psu in enumerate(module.get_all_psus()): if psu.get_presence(): @@ -649,12 +719,17 @@ class TemperatureUpdater(logger.Logger): self.log_debug("End temperature updating") - def _refresh_temperature_status(self, parent_name, thermal, thermal_index): + def _refresh_temperature_status(self, parent_name, thermal, thermal_index, is_sfp=False, port_name=None): """ - Get temperature status by platform API and write to database + Get temperature status and write to database. + For regular thermals, reads from platform API. + For SFP thermals (is_sfp=True), reads from Redis tables populated by xcvrd. + :param parent_name: Name of parent device of the thermal object :param thermal: Object representing a platform thermal zone :param thermal_index: Index of the thermal object in platform chassis + :param is_sfp: True if this is an SFP thermal reading from Redis + :param port_name: Port name for Redis lookup (required if is_sfp=True) :return: """ try: @@ -664,7 +739,7 @@ class TemperatureUpdater(logger.Logger): # for SFP thermal, they don't need save entity info because snmp can deduce the relation from TRANSCEIVER_DOM_SENSOR # and as we save logical port in TRANSCEIVER_INFO table, for split cable, a SFP thermal might have multiple parent # logical port - if 'SFP' not in parent_name: + if 'SFP' not in parent_name and not is_sfp: update_entity_info(self.phy_entity_table, parent_name, name, thermal, thermal_index + 1) if name not in self.temperature_status_dict: @@ -678,35 +753,45 @@ class TemperatureUpdater(logger.Logger): low_critical_threshold = NOT_AVAILABLE maximum_temperature = NOT_AVAILABLE minimum_temperature = NOT_AVAILABLE - temperature = try_get(thermal.get_temperature) - is_replaceable = try_get(thermal.is_replaceable, False) - if temperature != NOT_AVAILABLE: - temperature_status.set_temperature(name, temperature) - minimum_temperature = try_get(thermal.get_minimum_recorded) - maximum_temperature = try_get(thermal.get_maximum_recorded) - high_threshold = try_get(thermal.get_high_threshold) - low_threshold = try_get(thermal.get_low_threshold) - high_critical_threshold = try_get(thermal.get_high_critical_threshold) - low_critical_threshold = try_get(thermal.get_low_critical_threshold) + + if is_sfp: + # Read SFP temperature and thresholds from Redis + temperature = self._get_sfp_temperature_from_db(port_name) + is_replaceable = try_get(thermal.is_replaceable, True) + high_threshold, low_threshold, high_critical_threshold, low_critical_threshold = \ + self._get_sfp_thresholds_from_db(port_name) + else: + # Read from platform API + temperature = try_get(thermal.get_temperature) + is_replaceable = try_get(thermal.is_replaceable, False) + if temperature != NOT_AVAILABLE: + minimum_temperature = try_get(thermal.get_minimum_recorded) + maximum_temperature = try_get(thermal.get_maximum_recorded) + high_threshold = try_get(thermal.get_high_threshold) + low_threshold = try_get(thermal.get_low_threshold) + high_critical_threshold = try_get(thermal.get_high_critical_threshold) + low_critical_threshold = try_get(thermal.get_low_critical_threshold) warning = False - if temperature != NOT_AVAILABLE and temperature_status.set_over_temperature(temperature, high_threshold): - self._log_on_status_changed(not temperature_status.over_temperature, - 'High temperature warning cleared: {} temperature restored to {}C, high threshold {}C'. - format(name, temperature, high_threshold), - 'High temperature warning: {} current temperature {}C, high threshold {}C'. - format(name, temperature, high_threshold) - ) - warning = warning | temperature_status.over_temperature - - if temperature != NOT_AVAILABLE and temperature_status.set_under_temperature(temperature, low_threshold): - self._log_on_status_changed(not temperature_status.under_temperature, - 'Low temperature warning cleared: {} temperature restored to {}C, low threshold {}C'. - format(name, temperature, low_threshold), - 'Low temperature warning: {} current temperature {}C, low threshold {}C'. - format(name, temperature, low_threshold) - ) - warning = warning | temperature_status.under_temperature + if temperature != NOT_AVAILABLE: + temperature_status.set_temperature(name, temperature) + if temperature_status.set_over_temperature(temperature, high_threshold): + self._log_on_status_changed(not temperature_status.over_temperature, + 'High temperature warning cleared: {} temperature restored to {}C, high threshold {}C'. + format(name, temperature, high_threshold), + 'High temperature warning: {} current temperature {}C, high threshold {}C'. + format(name, temperature, high_threshold) + ) + warning = warning | temperature_status.over_temperature + + if temperature_status.set_under_temperature(temperature, low_threshold): + self._log_on_status_changed(not temperature_status.under_temperature, + 'Low temperature warning cleared: {} temperature restored to {}C, low threshold {}C'. + format(name, temperature, low_threshold), + 'Low temperature warning: {} current temperature {}C, low threshold {}C'. + format(name, temperature, low_threshold) + ) + warning = warning | temperature_status.under_temperature fvs = swsscommon.FieldValuePairs( [('temperature', str(temperature)), @@ -729,10 +814,91 @@ class TemperatureUpdater(logger.Logger): def _remove_thermal_from_db(self, thermal, parent_name, thermal_index): name = try_get(thermal.get_name, '{} Thermal {}'.format(parent_name, thermal_index + 1)) - self.table._del(name) + try: + self.table._del(name) + except Exception: + pass if self.chassis_table is not None: - self.chassis_table._del(name) + try: + self.chassis_table._del(name) + except Exception: + pass + + def _get_sfp_temperature_from_db(self, port_name): + """ + Get SFP temperature from Redis. First tries TRANSCEIVER_DOM_TEMPERATURE table, + then falls back to TRANSCEIVER_DOM_SENSOR table. Both are populated by xcvrd daemon. + + :param port_name: Port name (e.g., 'Ethernet0') + :return: Temperature value as float, or NOT_AVAILABLE if not found + """ + # First try TRANSCEIVER_DOM_TEMPERATURE table + try: + status, fvs = self.xcvr_dom_temp_tbl.get(port_name) + if status: + for field, value in fvs: + if field == 'temperature': + if value and value != 'N/A' and value != 'N/A C': + temp_str = value.split()[0] if ' ' in value else value + return float(temp_str) + except Exception as e: + self.log_debug("Failed to get SFP temperature for {} from DOM_TEMPERATURE: {}".format(port_name, e)) + + # Fallback to TRANSCEIVER_DOM_SENSOR table + try: + status, fvs = self.xcvr_dom_sensor_tbl.get(port_name) + if status: + for field, value in fvs: + if field == 'temperature': + if value and value != 'N/A' and value != 'N/A C': + temp_str = value.split()[0] if ' ' in value else value + return float(temp_str) + return NOT_AVAILABLE + except Exception as e: + self.log_debug("Failed to get SFP temperature for {} from DOM_SENSOR: {}".format(port_name, e)) + return NOT_AVAILABLE + + def _get_sfp_thresholds_from_db(self, port_name): + """ + Get SFP temperature thresholds from Redis. First tries TRANSCEIVER_DOM_THRESHOLD table, + then falls back to TRANSCEIVER_DOM_SENSOR table. + + :param port_name: Port name (e.g., 'Ethernet0') + :return: Tuple of (high_threshold, low_threshold, high_critical_threshold, low_critical_threshold) + """ + high_threshold = NOT_AVAILABLE + low_threshold = NOT_AVAILABLE + high_critical_threshold = NOT_AVAILABLE + low_critical_threshold = NOT_AVAILABLE + + fvs_dict = {} + try: + # First try TRANSCEIVER_DOM_THRESHOLD table + status, fvs = self.xcvr_dom_threshold_tbl.get(port_name) + if status: + fvs_dict = dict(fvs) + # Fallback to TRANSCEIVER_DOM_SENSOR table if no thresholds found + if not fvs_dict or 'temphighwarning' not in fvs_dict: + status, fvs = self.xcvr_dom_sensor_tbl.get(port_name) + if status: + fvs_dict = dict(fvs) + except Exception as e: + self.log_debug("Failed to get SFP thresholds for {} from DB: {}".format(port_name, e)) + + try: + if 'temphighwarning' in fvs_dict and fvs_dict['temphighwarning'] not in ('N/A', ''): + high_threshold = float(fvs_dict['temphighwarning'].split()[0]) if ' ' in fvs_dict['temphighwarning'] else float(fvs_dict['temphighwarning']) + if 'templowwarning' in fvs_dict and fvs_dict['templowwarning'] not in ('N/A', ''): + low_threshold = float(fvs_dict['templowwarning'].split()[0]) if ' ' in fvs_dict['templowwarning'] else float(fvs_dict['templowwarning']) + if 'temphighalarm' in fvs_dict and fvs_dict['temphighalarm'] not in ('N/A', ''): + high_critical_threshold = float(fvs_dict['temphighalarm'].split()[0]) if ' ' in fvs_dict['temphighalarm'] else float(fvs_dict['temphighalarm']) + if 'templowalarm' in fvs_dict and fvs_dict['templowalarm'] not in ('N/A', ''): + low_critical_threshold = float(fvs_dict['templowalarm'].split()[0]) if ' ' in fvs_dict['templowalarm'] else float(fvs_dict['templowalarm']) + except Exception as e: + self.log_debug("Failed to parse SFP thresholds for {}: {}".format(port_name, e)) + + return high_threshold, low_threshold, high_critical_threshold, low_critical_threshold class ThermalMonitor(ProcessTaskBase): diff --git a/sonic-thermalctld/tests/mock_platform.py b/sonic-thermalctld/tests/mock_platform.py index 1200967..f67eb0e 100644 --- a/sonic-thermalctld/tests/mock_platform.py +++ b/sonic-thermalctld/tests/mock_platform.py @@ -175,7 +175,7 @@ def get_serial(self): def get_status(self): return self._status - + def get_powergood_status(self): return self._status @@ -472,5 +472,17 @@ def get_dpu_id(self): return self._dpu_id class MockModule(module_base.ModuleBase): - def __init__(self): + def __init__(self, index=1): super(MockModule, self).__init__() + self._name = 'Module {}'.format(index) + self._sfp_list = [] + self._psu_list = [] + + def get_name(self): + return self._name + + def get_all_sfps(self): + return self._sfp_list + + def get_all_psus(self): + return self._psu_list diff --git a/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py b/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py index 13c49de..8aad4d4 100644 --- a/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py +++ b/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py @@ -28,6 +28,9 @@ def get(self, key): def get_size(self): return (len(self.mock_dict)) + def getKeys(self): + return list(self.mock_dict.keys()) + class FieldValuePairs: fv_dict = {} diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index 94630d8..ee357e2 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -288,6 +288,7 @@ def test_update_module_fans(self): else: fan_updater.log_warning.assert_called_with("Failed to update module fan status - Exception('Test message',)") + class TestThermalMonitor(object): """ Test cases to cover functionality in ThermalMonitor class @@ -427,6 +428,7 @@ def test_over_temper(self): chassis = MockChassis() chassis.make_over_temper_thermal() temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + temperature_updater.log_warning.reset_mock() temperature_updater.update() thermal_list = chassis.get_all_thermals() assert temperature_updater.log_warning.call_count == 1 @@ -441,6 +443,7 @@ def test_under_temper(self): chassis = MockChassis() chassis.make_under_temper_thermal() temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + temperature_updater.log_warning.reset_mock() temperature_updater.update() thermal_list = chassis.get_all_thermals() assert temperature_updater.log_warning.call_count == 1 @@ -458,6 +461,7 @@ def test_update_psu_thermals(self): psu._thermal_list.append(mock_thermal) chassis._psu_list.append(psu) temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + temperature_updater.log_warning.reset_mock() temperature_updater.update() assert temperature_updater.log_warning.call_count == 0 @@ -472,24 +476,33 @@ def test_update_psu_thermals(self): temperature_updater.log_warning.assert_called_with("Failed to update thermal status for PSU 1 Thermal 1 - Exception('Test message',)") def test_update_sfp_thermals(self): + """Test SFP thermal processing with Redis-based temperature reading""" chassis = MockChassis() sfp = MockSfp() mock_thermal = MockThermal() sfp._thermal_list.append(mock_thermal) chassis._sfp_list.append(sfp) temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + + # Reset warning count after init (init may log SfpUtilHelper warning) + temperature_updater.log_warning.reset_mock() + + # With sfp_util as None (default), no Redis reading happens, no warnings temperature_updater.update() assert temperature_updater.log_warning.call_count == 0 - mock_thermal.get_temperature = mock.MagicMock(side_effect=Exception("Test message")) - temperature_updater.update() - assert temperature_updater.log_warning.call_count == 1 + # With sfp_util mocked and port_name available, Redis reading is attempted + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '55.5')]) + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (True, []) + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() - # TODO: Clean this up once we no longer need to support Python 2 - if sys.version_info.major == 3: - temperature_updater.log_warning.assert_called_with("Failed to update thermal status for SFP 1 Thermal 1 - Exception('Test message')") - else: - temperature_updater.log_warning.assert_called_with("Failed to update thermal status for SFP 1 Thermal 1 - Exception('Test message',)") + temperature_updater.update() + # Verify Redis table was queried + temperature_updater.xcvr_dom_temp_tbl.get.assert_called_with('Ethernet0') def test_update_thermal_with_exception(self): chassis = MockChassis() @@ -499,6 +512,7 @@ def test_update_thermal_with_exception(self): chassis.get_all_thermals().append(thermal) temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + temperature_updater.log_warning.reset_mock() temperature_updater.update() assert temperature_updater.log_warning.call_count == 2 @@ -527,6 +541,476 @@ def test_update_module_thermals(self): temperature_updater.update() assert len(temperature_updater.all_thermals) == 0 + def test_sfp_temperature_from_redis(self): + """Test reading SFP temperature from Redis tables and verify TEMPERATURE_INFO is populated correctly""" + chassis = MockChassis() + sfp = MockSfp() + sfp._name = 'Ethernet0' + thermal = MockThermal() + thermal._name = 'xSFP module 1 Temp' + sfp._thermal_list.append(thermal) + chassis._sfp_list.append(sfp) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Mock the SfpUtilHelper to return correct port mapping + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + + # Mock the Redis tables to return temperature data + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '55.5')]) + + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (True, [ + ('temphighwarning', '70.0'), + ('templowwarning', '-5.0'), + ('temphighalarm', '75.0'), + ('templowalarm', '-10.0') + ]) + + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + + # Use a real Table object to capture the set() calls + temperature_updater.table = Table("STATE_DB", "TEMPERATURE_INFO") + + temperature_updater.update() + + # Verify temperature was read from Redis + temperature_updater.xcvr_dom_temp_tbl.get.assert_called_with('Ethernet0') + temperature_updater.xcvr_dom_threshold_tbl.get.assert_called_with('Ethernet0') + + # Verify TEMPERATURE_INFO table was populated with correct values + assert 'xSFP module 1 Temp' in temperature_updater.table.mock_dict + stored_data = temperature_updater.table.mock_dict['xSFP module 1 Temp'] + + # Verify parsed temperature value + assert stored_data['temperature'] == '55.5' + # Verify parsed threshold values + assert stored_data['high_threshold'] == '70.0' + assert stored_data['low_threshold'] == '-5.0' + assert stored_data['critical_high_threshold'] == '75.0' + assert stored_data['critical_low_threshold'] == '-10.0' + # Verify warning status (should be False since 55.5 is within thresholds) + assert stored_data['warning_status'] == 'False' + # Verify other expected fields exist + assert 'minimum_temperature' in stored_data + assert 'maximum_temperature' in stored_data + assert 'is_replaceable' in stored_data + assert 'timestamp' in stored_data + + def test_sfp_temperature_warning_status(self): + """Test that warning_status is True when temperature exceeds high threshold""" + chassis = MockChassis() + sfp = MockSfp() + thermal = MockThermal() + thermal._name = 'xSFP module 1 Temp' + sfp._thermal_list.append(thermal) + chassis._sfp_list.append(sfp) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + + # Temperature exceeds high threshold (80 > 70) + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '80.0')]) + + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (True, [ + ('temphighwarning', '70.0'), + ('templowwarning', '-5.0') + ]) + + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.table = Table("STATE_DB", "TEMPERATURE_INFO") + + temperature_updater.update() + + stored_data = temperature_updater.table.mock_dict['xSFP module 1 Temp'] + assert stored_data['temperature'] == '80.0' + assert stored_data['warning_status'] == 'True' + + def test_sfp_temperature_fallback_to_dom_sensor(self): + """Test fallback to TRANSCEIVER_DOM_SENSOR table when DOM_TEMPERATURE is not available""" + chassis = MockChassis() + sfp = MockSfp() + sfp._name = 'Ethernet0' + thermal = MockThermal() + thermal._name = 'xSFP module 1 Temp' + sfp._thermal_list.append(thermal) + chassis._sfp_list.append(sfp) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Mock the SfpUtilHelper + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + + # Mock DOM_TEMPERATURE table to return no data + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (False, []) + + # Mock DOM_THRESHOLD table to return no data + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (False, []) + + # Mock DOM_SENSOR table to return temperature data (fallback) + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_sensor_tbl.get.return_value = (True, [ + ('temperature', '60.0'), + ('temphighwarning', '75.0'), + ('templowwarning', '-5.0'), + ('temphighalarm', '80.0'), + ('templowalarm', '-10.0') + ]) + + # Use a real Table object to capture the set() calls + temperature_updater.table = Table("STATE_DB", "TEMPERATURE_INFO") + + temperature_updater.update() + + # Verify fallback to DOM_SENSOR table was called + temperature_updater.xcvr_dom_sensor_tbl.get.assert_called() + + # Verify TEMPERATURE_INFO table was populated with fallback values + assert 'xSFP module 1 Temp' in temperature_updater.table.mock_dict + stored_data = temperature_updater.table.mock_dict['xSFP module 1 Temp'] + + # Verify temperature from DOM_SENSOR fallback + assert stored_data['temperature'] == '60.0' + # Verify thresholds from DOM_SENSOR fallback + assert stored_data['high_threshold'] == '75.0' + assert stored_data['low_threshold'] == '-5.0' + assert stored_data['critical_high_threshold'] == '80.0' + assert stored_data['critical_low_threshold'] == '-10.0' + + def test_sfp_temperature_no_sfp_util(self): + """Test that SFP temperature is skipped when SfpUtilHelper is not available""" + chassis = MockChassis() + sfp = MockSfp() + sfp._thermal_list.append(MockThermal()) + chassis._sfp_list.append(sfp) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Reset warning count after init (init may log SfpUtilHelper warning) + temperature_updater.log_warning.reset_mock() + + # Set sfp_util to None (simulating import failure) + temperature_updater.sfp_util = None + + # Should not raise exception + temperature_updater.update() + assert temperature_updater.log_warning.call_count == 0 + + def test_get_port_name_by_index(self): + """Test _get_port_name_by_index method""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Mock the SfpUtilHelper + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0', 'Ethernet1'] + + # Test getting port name for index 0 (physical index 1) + port_name = temperature_updater._get_port_name_by_index(0) + assert port_name == 'Ethernet0' + temperature_updater.sfp_util.get_physical_to_logical.assert_called_with(1) + + # Test with no mapping found + temperature_updater.sfp_util.get_physical_to_logical.return_value = None + port_name = temperature_updater._get_port_name_by_index(5) + assert port_name is None + + # Test with sfp_util not available + temperature_updater.sfp_util = None + port_name = temperature_updater._get_port_name_by_index(0) + assert port_name is None + + def test_get_sfp_temperature_from_db(self): + """Test _get_sfp_temperature_from_db method""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Mock the Redis tables + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + + # Test reading from DOM_TEMPERATURE table + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '55.5')]) + temp = temperature_updater._get_sfp_temperature_from_db('Ethernet0') + assert temp == 55.5 + + # Test fallback to DOM_SENSOR table + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (False, []) + temperature_updater.xcvr_dom_sensor_tbl.get.return_value = (True, [('temperature', '60.0')]) + temp = temperature_updater._get_sfp_temperature_from_db('Ethernet0') + assert temp == 60.0 + + # Test with N/A value + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', 'N/A')]) + temperature_updater.xcvr_dom_sensor_tbl.get.return_value = (False, []) + temp = temperature_updater._get_sfp_temperature_from_db('Ethernet0') + assert temp == thermalctld.NOT_AVAILABLE + + # Test with temperature value containing unit suffix + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '55.5 C')]) + temp = temperature_updater._get_sfp_temperature_from_db('Ethernet0') + assert temp == 55.5 + + def test_sfp_temperature_na_value(self): + """Test that N/A temperature is stored correctly in TEMPERATURE_INFO""" + chassis = MockChassis() + sfp = MockSfp() + thermal = MockThermal() + thermal._name = 'xSFP module 1 Temp' + sfp._thermal_list.append(thermal) + chassis._sfp_list.append(sfp) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + + # Return N/A temperature + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', 'N/A')]) + + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (False, []) + + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_sensor_tbl.get.return_value = (False, []) + + temperature_updater.table = Table("STATE_DB", "TEMPERATURE_INFO") + + temperature_updater.update() + + assert 'xSFP module 1 Temp' in temperature_updater.table.mock_dict + stored_data = temperature_updater.table.mock_dict['xSFP module 1 Temp'] + + # Verify N/A temperature is stored correctly + assert stored_data['temperature'] == 'N/A' + # Verify thresholds are also N/A when not available + assert stored_data['high_threshold'] == 'N/A' + assert stored_data['low_threshold'] == 'N/A' + # Warning status should be False when temperature is N/A + assert stored_data['warning_status'] == 'False' + + def test_sfp_temperature_with_unit_suffix(self): + """Test parsing temperature values with unit suffix (e.g., '55.5 C')""" + chassis = MockChassis() + sfp = MockSfp() + thermal = MockThermal() + thermal._name = 'xSFP module 1 Temp' + sfp._thermal_list.append(thermal) + chassis._sfp_list.append(sfp) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + + # Temperature with unit suffix + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '55.5 C')]) + + # Thresholds with unit suffix + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (True, [ + ('temphighwarning', '70.0 C'), + ('templowwarning', '-5.0 C'), + ('temphighalarm', '75.0 C'), + ('templowalarm', '-10.0 C') + ]) + + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.table = Table("STATE_DB", "TEMPERATURE_INFO") + + temperature_updater.update() + + stored_data = temperature_updater.table.mock_dict['xSFP module 1 Temp'] + + # Verify unit suffix is stripped from values + assert stored_data['temperature'] == '55.5' + assert stored_data['high_threshold'] == '70.0' + assert stored_data['low_threshold'] == '-5.0' + assert stored_data['critical_high_threshold'] == '75.0' + assert stored_data['critical_low_threshold'] == '-10.0' + + def test_init_sfp_util_helper_multi_asic(self): + """Test _init_sfp_util_helper with multi-asic configuration""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + with mock.patch.object(thermalctld, 'SfpUtilHelper') as mock_sfp_util_class, \ + mock.patch.object(thermalctld.multi_asic, 'is_multi_asic', return_value=True), \ + mock.patch.object(thermalctld.multi_asic, 'get_num_asics', return_value=2), \ + mock.patch.object(thermalctld.device_info, 'get_paths_to_platform_and_hwsku_dirs', return_value=('/platform', '/hwsku')): + mock_sfp_util_instance = mock.MagicMock() + mock_sfp_util_class.return_value = mock_sfp_util_instance + + result = temperature_updater._init_sfp_util_helper() + + assert result is mock_sfp_util_instance + mock_sfp_util_instance.read_all_porttab_mappings.assert_called_once_with('/hwsku', 2) + + def test_init_sfp_util_helper_system_exit(self): + """Test _init_sfp_util_helper handles SystemExit from read_porttab_mappings""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + with mock.patch.object(thermalctld, 'SfpUtilHelper') as mock_sfp_util_class, \ + mock.patch.object(thermalctld.multi_asic, 'is_multi_asic', return_value=False), \ + mock.patch.object(thermalctld.device_info, 'get_path_to_port_config_file', return_value='/path/to/port_config.ini'): + mock_sfp_util_instance = mock.MagicMock() + mock_sfp_util_instance.read_porttab_mappings.side_effect = SystemExit(1) + mock_sfp_util_class.return_value = mock_sfp_util_instance + + result = temperature_updater._init_sfp_util_helper() + + assert result is None + temperature_updater.log_warning.assert_called() + + def test_init_sfp_util_helper_exception(self): + """Test _init_sfp_util_helper handles generic Exception""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + with mock.patch.object(thermalctld, 'SfpUtilHelper') as mock_sfp_util_class, \ + mock.patch.object(thermalctld.multi_asic, 'is_multi_asic', return_value=False), \ + mock.patch.object(thermalctld.device_info, 'get_path_to_port_config_file', return_value='/path/to/port_config.ini'): + mock_sfp_util_instance = mock.MagicMock() + mock_sfp_util_instance.read_porttab_mappings.side_effect = Exception("File not found") + mock_sfp_util_class.return_value = mock_sfp_util_instance + + result = temperature_updater._init_sfp_util_helper() + + assert result is None + temperature_updater.log_warning.assert_called() + + def test_init_sfp_util_helper_not_available(self): + """Test _init_sfp_util_helper when SfpUtilHelper import failed""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Save original and set to None + original_sfp_util_helper = thermalctld.SfpUtilHelper + thermalctld.SfpUtilHelper = None + + try: + result = temperature_updater._init_sfp_util_helper() + assert result is None + temperature_updater.log_warning.assert_called() + finally: + thermalctld.SfpUtilHelper = original_sfp_util_helper + + def test_modular_chassis_sfp_thermals(self): + """Test SFP thermal updates on modular chassis with modules""" + chassis = MockChassis() + chassis.set_modular_chassis(True) + chassis.set_my_slot(1) + + # Create a module with SFP + module = MockModule(1) + module._name = 'Module 1' + sfp = MockSfp() + thermal = MockThermal() + thermal._name = 'Module 1 xSFP module 1 Temp' + sfp._thermal_list.append(thermal) + module._sfp_list.append(sfp) + chassis._module_list.append(module) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + temperature_updater.sfp_util = mock.MagicMock() + temperature_updater.sfp_util.get_physical_to_logical.return_value = ['Ethernet0'] + + # Mock Redis tables + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (True, [('temperature', '45.0')]) + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (True, [ + ('temphighwarning', '70.0'), + ('templowwarning', '-5.0'), + ('temphighalarm', '75.0'), + ('templowalarm', '-10.0') + ]) + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.table = Table("STATE_DB", "TEMPERATURE_INFO") + + temperature_updater.update() + + # Verify module SFP thermal was updated + assert 'Module 1 xSFP module 1 Temp' in temperature_updater.table.mock_dict + + def test_remove_thermal_from_db_exceptions(self): + """Test _remove_thermal_from_db handles exceptions gracefully""" + chassis = MockChassis() + chassis.set_modular_chassis(True) + chassis.set_my_slot(1) + + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + # Mock table to raise exception on _del + temperature_updater.table = mock.MagicMock() + temperature_updater.table._del.side_effect = Exception("Redis error") + + temperature_updater.chassis_table = mock.MagicMock() + temperature_updater.chassis_table._del.side_effect = Exception("Chassis DB error") + + # Create a mock thermal + thermal = MockThermal() + thermal._name = 'Test Thermal' + + # Should not raise exception + temperature_updater._remove_thermal_from_db(thermal, 'Test Parent', 0) + + def test_get_sfp_temperature_from_db_exception_dom_temp(self): + """Test _get_sfp_temperature_from_db handles exception from DOM_TEMPERATURE table""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.side_effect = Exception("Redis error") + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_sensor_tbl.get.return_value = (True, [('temperature', '50.0')]) + + # Should fallback to DOM_SENSOR + temp = temperature_updater._get_sfp_temperature_from_db('Ethernet0') + assert temp == 50.0 + + def test_get_sfp_temperature_from_db_exception_dom_sensor(self): + """Test _get_sfp_temperature_from_db handles exception from DOM_SENSOR table""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + temperature_updater.xcvr_dom_temp_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_temp_tbl.get.return_value = (False, []) + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_sensor_tbl.get.side_effect = Exception("Redis error") + + temp = temperature_updater._get_sfp_temperature_from_db('Ethernet0') + assert temp == thermalctld.NOT_AVAILABLE + + def test_get_sfp_thresholds_from_db_exception(self): + """Test _get_sfp_thresholds_from_db handles parsing exceptions""" + chassis = MockChassis() + temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event()) + + temperature_updater.xcvr_dom_threshold_tbl = mock.MagicMock() + # Return invalid threshold value that will cause float() to fail + temperature_updater.xcvr_dom_threshold_tbl.get.return_value = (True, [ + ('temphighwarning', 'invalid_float'), + ]) + temperature_updater.xcvr_dom_sensor_tbl = mock.MagicMock() + temperature_updater.xcvr_dom_sensor_tbl.get.return_value = (False, []) + + # Should return N/A values without raising exception + high, low, high_crit, low_crit = temperature_updater._get_sfp_thresholds_from_db('Ethernet0') + assert high == thermalctld.NOT_AVAILABLE + assert low == thermalctld.NOT_AVAILABLE + assert high_crit == thermalctld.NOT_AVAILABLE + assert low_crit == thermalctld.NOT_AVAILABLE + # DPU chassis-related tests def test_dpu_chassis_thermals():