Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions tests/platform_tests/test_intf_fec.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,13 @@ def get_fec_histogram(duthost, intf_name):
return fec_hist


def validate_fec_histogram(duthost, intf_name):
def validate_fec_histogram(duthost, intf_name, init, prev=None):
"""
@Summary: Validate FEC histogram critical bins for any errors. Fail the test if bin value > 0
for a stable link over last two snapshots.
"""
if not init and not prev:
pytest.fail("FEC histogram from previous snapshot is not provided")

fec_hist = get_fec_histogram(duthost, intf_name)
if not fec_hist:
Expand All @@ -232,16 +235,25 @@ def validate_fec_histogram(duthost, intf_name):
error_bins = []
for bin_index in critical_bins:
bin_value = int(fec_hist[bin_index].get('codewords', 0))
if bin_value > 0:
error_bins.append((bin_index, bin_value))
if init:
if bin_value > 0:
error_bins.append((bin_index, bin_value))
else:
prev_bin_value = int(prev[bin_index].get('codewords', 0))
if bin_value - prev_bin_value > 0:
error_bins.append((bin_index, bin_value))

if error_bins:
error_messages = ["FEC histogram bin {} has errors for interface {}: {}".format(bin_index, intf_name, bin_value)
error_messages = ["FEC histogram bin {} has errors for interface {}: {} (init: {})".format(
bin_index, intf_name, bin_value, init)
for bin_index, bin_value in error_bins]
logging.error("\n".join(error_messages))
return False
if init:
logging.info("\n".join(error_messages))
else:
logging.error("\n".join(error_messages))
return False, fec_hist

return True
return True, fec_hist


def test_verify_fec_histogram(duthosts, enum_rand_one_per_hwsku_frontend_hostname):
Expand All @@ -260,8 +272,26 @@ def test_verify_fec_histogram(duthosts, enum_rand_one_per_hwsku_frontend_hostnam
if not interfaces:
pytest.skip("Skipping this test as there is no fec eligible interface")

# It's possible there are some transient FEC symbol errors on interface
# state transition. Hence, this test uses the first check to read the current
# FEC histogram counters to see whether there are stale errors. If so,
# it will increase the waiting time for the next read and compare any
# changes in the critical bins between 2 snapshots. For a stable link, no
# increments in these critical bins are expected.
snapshots = {}
sleep_time = 10
for intf_name in interfaces:
for _ in range(3):
if not validate_fec_histogram(duthost, intf_name):
valid, fec_hist = validate_fec_histogram(duthost, intf_name, True)
if not valid:
logging.info("Update test sleep time to 10 min due to bin errors in the initial snapshot")
sleep_time = 10 * 60
snapshots[intf_name] = fec_hist

for _ in range(2):
time.sleep(sleep_time)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kewei-arista I am not sure this 10min/20min wait time would be acceptable for all platform owners. Can we make this test attribute driven?

for intf_name in interfaces:
prev_fec_hist = snapshots[intf_name]
valid, fec_hist = validate_fec_histogram(duthost, intf_name, False, prev_fec_hist)
if not valid:
pytest.fail("FEC histogram validation failed for interface {}".format(intf_name))
time.sleep(10)
snapshots[intf_name] = fec_hist
Loading