diff --git a/tests/platform_tests/test_intf_fec.py b/tests/platform_tests/test_intf_fec.py index 95572effdd..8cee26599f 100644 --- a/tests/platform_tests/test_intf_fec.py +++ b/tests/platform_tests/test_intf_fec.py @@ -189,10 +189,13 @@ def get_fec_histogram(duthost, intf_name): return fec_hist -def validate_fec_histogram(duthost, intf_name): +def validate_fec_histogram(duthost, intf_name, init, prev=None): """ @Summary: Validate FEC histogram critical bins for any errors. Fail the test if bin value > 0 + for a stable link over last two snapshots. """ + if not init and not prev: + pytest.fail("FEC histogram from previous snapshot is not provided") fec_hist = get_fec_histogram(duthost, intf_name) if not fec_hist: @@ -202,16 +205,25 @@ def validate_fec_histogram(duthost, intf_name): error_bins = [] for bin_index in critical_bins: bin_value = int(fec_hist[bin_index].get('codewords', 0)) - if bin_value > 0: - error_bins.append((bin_index, bin_value)) + if init: + if bin_value > 0: + error_bins.append((bin_index, bin_value)) + else: + prev_bin_value = int(prev[bin_index].get('codewords', 0)) + if bin_value - prev_bin_value > 0: + error_bins.append((bin_index, bin_value)) if error_bins: - error_messages = ["FEC histogram bin {} has errors for interface {}: {}".format(bin_index, intf_name, bin_value) + error_messages = ["FEC histogram bin {} has errors for interface {}: {} (init: {})".format( + bin_index, intf_name, bin_value, init) for bin_index, bin_value in error_bins] - logging.error("\n".join(error_messages)) - return False + if init: + logging.info("\n".join(error_messages)) + else: + logging.error("\n".join(error_messages)) + return False, fec_hist - return True + return True, fec_hist def test_verify_fec_histogram(duthosts, enum_rand_one_per_hwsku_frontend_hostname): @@ -227,8 +239,26 @@ def test_verify_fec_histogram(duthosts, enum_rand_one_per_hwsku_frontend_hostnam # Get operationally up and interfaces with supported speeds interfaces = get_fec_eligible_interfaces(duthost, SUPPORTED_SPEEDS) + # It's possible there are some transient FEC symbol errors on interface + # state transition. Hence, this test uses the first check to read the current + # FEC histogram counters to see whether there are stale errors. If so, + # it will increase the waiting time for the next read and compare any + # changes in the critical bins between 2 snapshots. For a stable link, no + # increments in these critical bins are expected. + snapshots = {} + sleep_time = 10 for intf_name in interfaces: - for _ in range(3): - if not validate_fec_histogram(duthost, intf_name): + valid, fec_hist = validate_fec_histogram(duthost, intf_name, True) + if not valid: + logging.info("Update test sleep time to 10 min due to bin errors in the initial snapshot") + sleep_time = 10 * 60 + snapshots[intf_name] = fec_hist + + for _ in range(2): + time.sleep(sleep_time) + for intf_name in interfaces: + prev_fec_hist = snapshots[intf_name] + valid, fec_hist = validate_fec_histogram(duthost, intf_name, False, prev_fec_hist) + if not valid: pytest.fail("FEC histogram validation failed for interface {}".format(intf_name)) - time.sleep(10) + snapshots[intf_name] = fec_hist