From 056e47f3895cd79d7f243c59206702da23671b9f Mon Sep 17 00:00:00 2001 From: markxiao Date: Fri, 30 Jan 2026 10:37:15 -0800 Subject: [PATCH] Fix telemetry/test_events.py for v6 topos Signed-off-by: markxiao --- tests/common/devices/sonic.py | 6 +- tests/telemetry/conftest.py | 4 +- tests/telemetry/events/bgp_events.py | 64 +++++++++++++++------ tests/telemetry/events/dhcp-relay_events.py | 23 ++++---- tests/telemetry/events/eventd_events.py | 4 +- tests/telemetry/events/host_events.py | 30 +++++----- tests/telemetry/events/run_events_test.py | 6 +- tests/telemetry/events/swss_events.py | 14 ++--- tests/telemetry/test_events.py | 7 ++- 9 files changed, 98 insertions(+), 60 deletions(-) diff --git a/tests/common/devices/sonic.py b/tests/common/devices/sonic.py index c58a7509b7..36577616c6 100644 --- a/tests/common/devices/sonic.py +++ b/tests/common/devices/sonic.py @@ -2090,7 +2090,9 @@ def is_bgp_state_idle(self): Returns: True or False """ - bgp_summary = self.command("show ip bgp summary")["stdout_lines"] + bgp_summary_v4 = self.command("show ip bgp summary")["stdout_lines"] + bgp_summary_v6 = self.command("show ipv6 bgp summary")["stdout_lines"] + bgp_summary = bgp_summary_v4 + bgp_summary_v6 idle_count = 0 expected_idle_count = 0 @@ -2101,7 +2103,7 @@ def is_bgp_state_idle(self): if "Total number of neighbors" in line: tokens = line.split() - expected_idle_count = int(tokens[-1]) + expected_idle_count += int(tokens[-1]) if "BGPMonitor" in line: bgp_monitor_count += 1 diff --git a/tests/telemetry/conftest.py b/tests/telemetry/conftest.py index dd29d5de3f..88e2880da8 100644 --- a/tests/telemetry/conftest.py +++ b/tests/telemetry/conftest.py @@ -49,7 +49,7 @@ def do_init(duthost): @pytest.fixture(scope="module") -def test_eventd_healthy(duthosts, enum_rand_one_per_hwsku_hostname, ptfhost, ptfadapter, +def test_eventd_healthy(duthosts, tbinfo, enum_rand_one_per_hwsku_hostname, ptfhost, ptfadapter, setup_streaming_telemetry, gnxi_path): """ @summary: Test eventd heartbeat before testing all testcases @@ -72,6 +72,6 @@ def test_eventd_healthy(duthosts, enum_rand_one_per_hwsku_hostname, ptfhost, ptf py_assert(wait_until(100, 10, 0, duthost.is_service_fully_started, "eventd"), "eventd not started.") - module.test_event(duthost, gnxi_path, ptfhost, ptfadapter, DATA_DIR, None) + module.test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, DATA_DIR, None) logger.info("Completed test file: {}".format("eventd_events test completed.")) diff --git a/tests/telemetry/events/bgp_events.py b/tests/telemetry/events/bgp_events.py index e72c46a187..b2836b0986 100644 --- a/tests/telemetry/events/bgp_events.py +++ b/tests/telemetry/events/bgp_events.py @@ -2,46 +2,78 @@ import logging import time +import ipaddress from run_events_test import run_test +from tests.common.utilities import is_ipv6_only_topology logger = logging.getLogger(__name__) tag = "sonic-events-bgp" -def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, drop_tcp_packets, +def test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, drop_tcp_packets, "bgp_notification.json", "sonic-events-bgp:notification", tag) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, shutdown_bgp_neighbors, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, shutdown_bgp_neighbors, "bgp_state.json", "sonic-events-bgp:bgp-state", tag) -def drop_tcp_packets(duthost): - bgp_neighbor = list(duthost.get_bgp_neighbors().keys())[0] +def drop_tcp_packets(duthost, tbinfo): + # Check if topo is IPv6-only and select appropriate BGP neighbor + is_v6_topo = is_ipv6_only_topology(tbinfo) + + # Get all BGP neighbors and filter by IP version based on v6/non-v6 topo + all_bgp_neighbors = duthost.get_bgp_neighbors() + bgp_neighbor = None + + if is_v6_topo: + # Find an IPv6 BGP neighbor + for neighbor_ip in all_bgp_neighbors.keys(): + if ipaddress.ip_address(neighbor_ip).version == 6: + bgp_neighbor = neighbor_ip + break + if bgp_neighbor is None: + raise Exception("No IPv6 BGP neighbors found for IPv6-only topo") + iptables_cmd = "ip6tables" + logger.info( + "Using IPv6 BGP neighbor %s and ip6tables for IPv6-only topo", + bgp_neighbor + ) + else: + # Find an IPv4 BGP neighbor (or just use the first one) + for neighbor_ip in all_bgp_neighbors.keys(): + if ipaddress.ip_address(neighbor_ip).version == 4: + bgp_neighbor = neighbor_ip + break + if bgp_neighbor is None: + # Fallback to first neighbor if no IPv4 found + bgp_neighbor = list(all_bgp_neighbors.keys())[0] + iptables_cmd = "iptables" + logger.info("Using IPv4 BGP neighbor {} and iptables".format(bgp_neighbor)) holdtime_timer_ms = duthost.get_bgp_neighbor_info(bgp_neighbor)["bgpTimerConfiguredHoldTimeMsecs"] logger.info("Adding rule to drop TCP packets to test bgp-notification") - ret = duthost.shell("iptables -I INPUT -p tcp --dport 179 -j DROP") - assert ret["rc"] == 0, "Unable to add DROP rule to iptables" + ret = duthost.shell("{} -I INPUT -p tcp --dport 179 -j DROP".format(iptables_cmd)) + assert ret["rc"] == 0, "Unable to add DROP rule to {}".format(iptables_cmd) - ret = duthost.shell("iptables -I INPUT -p tcp --sport 179 -j DROP") - assert ret["rc"] == 0, "Unable to add DROP rule to iptables" + ret = duthost.shell("{} -I INPUT -p tcp --sport 179 -j DROP".format(iptables_cmd)) + assert ret["rc"] == 0, "Unable to add DROP rule to {}".format(iptables_cmd) - ret = duthost.shell("iptables -L") - assert ret["rc"] == 0, "Unable to list iptables rules" + ret = duthost.shell("{} -L".format(iptables_cmd)) + assert ret["rc"] == 0, "Unable to list {} rules".format(iptables_cmd) time.sleep(holdtime_timer_ms / 1000) # Give time for hold timer expiry event, val from configured bgp neighbor info - ret = duthost.shell("iptables -D INPUT -p tcp --dport 179 -j DROP") - assert ret["rc"] == 0, "Unable to remove DROP rule from iptables" + ret = duthost.shell("{} -D INPUT -p tcp --dport 179 -j DROP".format(iptables_cmd)) + assert ret["rc"] == 0, "Unable to remove DROP rule from {}".format(iptables_cmd) - ret = duthost.shell("iptables -D INPUT -p tcp --sport 179 -j DROP") - assert ret["rc"] == 0, "Unable to remove DROP rule from iptables" + ret = duthost.shell("{} -D INPUT -p tcp --sport 179 -j DROP".format(iptables_cmd)) + assert ret["rc"] == 0, "Unable to remove DROP rule from {}".format(iptables_cmd) -def shutdown_bgp_neighbors(duthost): +def shutdown_bgp_neighbors(duthost, tbinfo): logger.info("Shutting down bgp neighbors to test bgp-state event") assert duthost.is_service_running("bgpcfgd", "bgp") is True and duthost.is_bgp_state_idle() is False logger.info("Start all bgp sessions") diff --git a/tests/telemetry/events/dhcp-relay_events.py b/tests/telemetry/events/dhcp-relay_events.py index fb9aa62de1..1836dc086d 100644 --- a/tests/telemetry/events/dhcp-relay_events.py +++ b/tests/telemetry/events/dhcp-relay_events.py @@ -7,6 +7,7 @@ from tests.common.helpers.assertions import pytest_assert as py_assert from tests.common.utilities import wait_until +from tests.common.utilities import is_ipv6_only_topology from run_events_test import run_test from event_utils import find_test_vlan, find_test_client_port_and_mac, create_dhcp_discover_packet @@ -14,7 +15,7 @@ tag = "sonic-events-dhcp-relay" -def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): +def test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): features_states, succeeded = duthost.get_feature_status() if not succeeded or features_states["dhcp_relay"] != "enabled": pytest.skip("dhcp_relay is not enabled, skipping dhcp_relay events") @@ -22,30 +23,32 @@ def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang) switch_role = device_metadata['localhost'].get('type', '') if switch_role == 'BmcMgmtToRRouter': pytest.skip("Skipping dhcp_relay events for mx topologies") + if is_ipv6_only_topology(tbinfo): + pytest.skip("Skipping dhcp_relay events for IPv6-only topologies") logger.info("Beginning to test dhcp-relay events") - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger_dhcp_relay_discard, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger_dhcp_relay_discard, "dhcp_relay_discard.json", "sonic-events-dhcp-relay:dhcp-relay-discard", tag, False, 30, ptfadapter) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger_dhcp_relay_disparity, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger_dhcp_relay_disparity, "dhcp_relay_disparity.json", "sonic-events-dhcp-relay:dhcp-relay-disparity", tag, False, 30, ptfadapter) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger_dhcp_relay_bind_failure, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger_dhcp_relay_bind_failure, "dhcp_relay_bind_failure.json", "sonic-events-dhcp-relay:dhcp-relay-bind-failure", tag, False, 30) -def trigger_dhcp_relay_discard(duthost, ptfadapter): - send_dhcp_discover_packets(duthost, ptfadapter) +def trigger_dhcp_relay_discard(duthost, tbinfo, ptfadapter): + send_dhcp_discover_packets(duthost, tbinfo, ptfadapter) -def trigger_dhcp_relay_disparity(duthost, ptfadapter): +def trigger_dhcp_relay_disparity(duthost, tbinfo, ptfadapter): """11 packets because dhcpmon process will store up to 10 unhealthy status events https://github.com/sonic-net/sonic-dhcpmon/blob/master/src/dhcp_mon.cpp#L94 static int dhcp_unhealthy_max_count = 10; Sending at interval of 18 seconds because dhcpmon process will check health at that interval static int window_interval_sec = 18; """ - send_dhcp_discover_packets(duthost, ptfadapter, 11, 18) + send_dhcp_discover_packets(duthost, tbinfo, ptfadapter, 11, 18) -def trigger_dhcp_relay_bind_failure(duthost): +def trigger_dhcp_relay_bind_failure(duthost, tbinfo): # Flush ipv6 vlan address and restart dhc6relay process py_assert(wait_until(100, 10, 0, duthost.is_service_fully_started, "dhcp_relay"), "dhcp_relay container not started") @@ -81,7 +84,7 @@ def trigger_dhcp_relay_bind_failure(duthost): "dhcp_relay not started.") -def send_dhcp_discover_packets(duthost, ptfadapter, packets_to_send=5, interval=1): +def send_dhcp_discover_packets(duthost, tbinfo, ptfadapter, packets_to_send=5, interval=1): py_assert(wait_until(100, 10, 0, duthost.is_service_fully_started, "dhcp_relay"), "dhcp_relay container not started") diff --git a/tests/telemetry/events/eventd_events.py b/tests/telemetry/events/eventd_events.py index 366230d14f..5fdd659142 100644 --- a/tests/telemetry/events/eventd_events.py +++ b/tests/telemetry/events/eventd_events.py @@ -8,7 +8,7 @@ tag = "sonic-events-eventd" -def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): +def test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): logger.info("Beginning to test eventd heartbeat") - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, None, "heartbeat.json", "sonic-events-eventd:heartbeat", tag, True) diff --git a/tests/telemetry/events/host_events.py b/tests/telemetry/events/host_events.py index 24cacc521d..c7204e44d4 100644 --- a/tests/telemetry/events/host_events.py +++ b/tests/telemetry/events/host_events.py @@ -13,11 +13,11 @@ tag = "sonic-events-host" -def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): +def test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): logger.info("Beginning to test host events") - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger_kernel_event, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger_kernel_event, "event_kernel.json", "sonic-events-host:event-kernel", tag, False) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, kill_critical_process, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, kill_critical_process, "process_exited_unexpectedly.json", "sonic-events-host:process-exited-unexpectedly", tag, False) backup_monit_config(duthost) @@ -29,17 +29,17 @@ def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang) ] ) try: - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, None, "memory_usage.json", "sonic-events-host:memory-usage", tag, False) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, None, "disk_usage.json", "sonic-events-host:disk-usage", tag, False) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, None, "cpu_usage.json", "sonic-events-host:cpu-usage", tag, False) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger_mem_threshold_exceeded_alert, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger_mem_threshold_exceeded_alert, "mem_threshold_exceeded.json", "sonic-events-host:mem-threshold-exceeded", tag) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, restart_container, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, restart_container, "event_stopped_ctr.json", "sonic-events-host:event-stopped-ctr", tag, False) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, stop_container, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, stop_container, "event_down_ctr.json", "sonic-events-host:event-down-ctr", tag, False) finally: restore_monit_config(duthost) @@ -47,13 +47,13 @@ def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang) try: # We need to alot flat 60 seconds for watchdog timeout to fire since the timer is set to 60\ # With a base limit of 30 seconds, we will use 90 seconds - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, None, "watchdog_timeout.json", "sonic-events-host:watchdog-timeout", tag, False, 90) finally: delete_test_watchdog_timeout_service(duthost) -def trigger_mem_threshold_exceeded_alert(duthost): +def trigger_mem_threshold_exceeded_alert(duthost, tbinfo): logger.info("Invoking memory checker with low threshold") cmd = "docker images | grep -w sonic-gnmi" if duthost.shell(cmd, module_ignore_errors=True)['rc'] == 0: @@ -62,7 +62,7 @@ def trigger_mem_threshold_exceeded_alert(duthost): duthost.shell("/usr/bin/memory_checker telemetry 100", module_ignore_errors=True) -def trigger_kernel_event(duthost): +def trigger_kernel_event(duthost, tbinfo): logger.info("Invoking logger for kernel events") # syslog at github.com/torvalds/linux/blob/master/fs/squashfs/decompressor_multi.c#L193 trigger_logger(duthost, "zlib decompression failed, data probably corrupt", "kernel") @@ -95,7 +95,7 @@ def get_critical_process(duthost): return "", "" -def restart_container(duthost): +def restart_container(duthost, tbinfo): logger.info("Stopping container for event stopped event") container = get_running_container(duthost) assert container != "", "No available container for testing" @@ -105,7 +105,7 @@ def restart_container(duthost): assert is_container_running, "{} not running after restart".format(container) -def stop_container(duthost): +def stop_container(duthost, tbinfo): logger.info("Stop container for event down event") container = get_running_container(duthost) assert container != "", "No available container for testing" @@ -125,7 +125,7 @@ def stop_container(duthost): duthost.shell("systemctl restart {}".format(container)) -def kill_critical_process(duthost): +def kill_critical_process(duthost, tbinfo): logger.info("Killing critical process for exited unexpectedly event") pid, container = get_critical_process(duthost) assert pid != "", "No available process for testing" diff --git a/tests/telemetry/events/run_events_test.py b/tests/telemetry/events/run_events_test.py index ba9c6b4fa0..edc8bf175d 100644 --- a/tests/telemetry/events/run_events_test.py +++ b/tests/telemetry/events/run_events_test.py @@ -10,14 +10,14 @@ logger = logging.getLogger(__name__) -def run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger, json_file, +def run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger, json_file, filter_event_regex, tag, heartbeat=False, timeout=30, ptfadapter=None): op_file = os.path.join(data_dir, json_file) if trigger is not None: # no trigger for heartbeat if ptfadapter is None: - trigger(duthost) # add events to cache + trigger(duthost, tbinfo) # add events to cache else: - trigger(duthost, ptfadapter) + trigger(duthost, tbinfo, ptfadapter) listen_for_events(duthost, gnxi_path, ptfhost, filter_event_regex, op_file, timeout) # listen from cache data = {} diff --git a/tests/telemetry/events/swss_events.py b/tests/telemetry/events/swss_events.py index dd2b6572a4..7aa5bbb13e 100644 --- a/tests/telemetry/events/swss_events.py +++ b/tests/telemetry/events/swss_events.py @@ -29,21 +29,21 @@ WAIT_TIME = 3 -def test_event(duthost, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): +def test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, data_dir, validate_yang): if duthost.topo_type.lower() in ["m0", "mx"]: logger.info("Skipping swss events test on MGFX topologies") return logger.info("Beginning to test swss events") - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, shutdown_interface, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, shutdown_interface, "if_state.json", "sonic-events-swss:if-state", tag) if duthost.facts["hwsku"] not in LOSSY_ONLY_HWSKUS: - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, generate_pfc_storm, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, generate_pfc_storm, "pfc_storm.json", "sonic-events-swss:pfc-storm", tag) - run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, trigger_crm_threshold_exceeded, + run_test(duthost, tbinfo, gnxi_path, ptfhost, data_dir, validate_yang, trigger_crm_threshold_exceeded, "chk_crm_threshold.json", "sonic-events-swss:chk_crm_threshold", tag) -def shutdown_interface(duthost): +def shutdown_interface(duthost, tbinfo): logger.info("Shutting down interface") interfaces = duthost.get_interfaces_status() pattern = re.compile(r'^Ethernet[0-9]{1,2}$') @@ -67,7 +67,7 @@ def shutdown_interface(duthost): wait_until(15, 1, 0, verify_port_admin_oper_status, duthost, if_state_test_port, "up") -def generate_pfc_storm(duthost): +def generate_pfc_storm(duthost, tbinfo): logger.info("Generating pfc storm") interfaces = duthost.get_interfaces_status() pattern = re.compile(r'^Ethernet[0-9]{1,2}$') @@ -89,7 +89,7 @@ def generate_pfc_storm(duthost): format(queue_oid)) -def trigger_crm_threshold_exceeded(duthost): +def trigger_crm_threshold_exceeded(duthost, tbinfo): logger.info("Triggering crm threshold exceeded") duthost.shell("crm config polling interval {}".format(CRM_TEST_POLLING_INTERVAL)) diff --git a/tests/telemetry/test_events.py b/tests/telemetry/test_events.py index 198fcd52a0..c7dee07752 100644 --- a/tests/telemetry/test_events.py +++ b/tests/telemetry/test_events.py @@ -35,8 +35,9 @@ def validate_yang(duthost, op_file="", yang_file=""): @pytest.mark.parametrize('setup_streaming_telemetry', [False], indirect=True) @pytest.mark.disable_loganalyzer -def test_events(duthosts, enum_rand_one_per_hwsku_hostname, ptfhost, ptfadapter, setup_streaming_telemetry, gnxi_path, - test_eventd_healthy, toggle_all_simulator_ports_to_enum_rand_one_per_hwsku_host_m, # noqa F811 +def test_events(duthosts, tbinfo, enum_rand_one_per_hwsku_hostname, ptfhost, ptfadapter, + setup_streaming_telemetry, gnxi_path, test_eventd_healthy, + toggle_all_simulator_ports_to_enum_rand_one_per_hwsku_host_m, # noqa F811 setup_standby_ports_on_non_enum_rand_one_per_hwsku_host_m): # noqa F811 """ Run series of events inside duthost and validate that output is correct and conforms to YANG schema""" @@ -50,7 +51,7 @@ def test_events(duthosts, enum_rand_one_per_hwsku_hostname, ptfhost, ptfadapter, if file.endswith("_events.py") and not file.endswith("eventd_events.py"): module = __import__(file[:len(file)-3]) try: - module.test_event(duthost, gnxi_path, ptfhost, ptfadapter, DATA_DIR, validate_yang) + module.test_event(duthost, tbinfo, gnxi_path, ptfhost, ptfadapter, DATA_DIR, validate_yang) except pytest.skip.Exception as e: logger.info("Skipping test file: {} due to {}".format(file, e)) continue