Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 55 additions & 9 deletions cfgmgr/vrfmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#define TABLE_LOCAL_PREF 1001 // after l3mdev-table
#define MGMT_VRF_TABLE_ID 5000
#define MGMT_VRF "mgmt"
#define VRF_DELETE_BATCH_SIZE_INITIAL 1
#define VRF_DELETE_BATCH_SIZE_MAX 500

using namespace swss;

Expand Down Expand Up @@ -47,6 +49,7 @@ VrfMgr::VrfMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, con
string vrfName;
uint32_t table;
IpShowRowType rowType = LINK_ROW;
vector<string> vrfsToDelete;
const auto& rows = tokenize(res, '\n');
for (const auto& row : rows)
{
Expand Down Expand Up @@ -78,21 +81,64 @@ VrfMgr::VrfMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, con
break;
}

SWSS_LOG_NOTICE("Remove vrf device %s", vrfName.c_str());
cmd.str("");
cmd.clear();
cmd << IP_CMD << " link del " << vrfName;
int ret = swss::exec(cmd.str(), res);
if (ret)
{
SWSS_LOG_ERROR("Command '%s' failed with rc %d", cmd.str().c_str(), ret);
}
vrfsToDelete.push_back(vrfName);
}
rowType = LINK_ROW;
break;
}
}

if (!vrfsToDelete.empty())
{
const size_t total_vrfs = vrfsToDelete.size();
size_t processed = 0;
vector<string> batch_vrfs;
unsigned int current_batch_size = VRF_DELETE_BATCH_SIZE_INITIAL;
batch_vrfs.reserve(VRF_DELETE_BATCH_SIZE_MAX);
int batch_num = 0;

for (const auto& vrf : vrfsToDelete)
{
batch_vrfs.push_back(vrf);

if (batch_vrfs.size() >= current_batch_size || processed + batch_vrfs.size() >= total_vrfs)
{
batch_num++;

cmd.str("");
cmd.clear();
cmd << BASH_CMD << " -c '(";

for (size_t i = 0; i < batch_vrfs.size(); i++)
{
if (i > 0) cmd << " ";
cmd << IP_CMD << " link del " << shellquote(batch_vrfs[i])
<< " 2>/dev/null &";
}
cmd << " wait) 2>&1'";

SWSS_LOG_NOTICE("Batch %d: deleting %zu VRFs in parallel (batch_size=%u, progress: %zu/%zu)",
batch_num, batch_vrfs.size(), current_batch_size,
processed + batch_vrfs.size(), total_vrfs);

int ret = swss::exec(cmd.str(), res);
if (ret != 0 && !res.empty())
{
SWSS_LOG_WARN("Batch %d deletion errors: %s", batch_num, res.c_str());
}

processed += batch_vrfs.size();
batch_vrfs.clear();

unsigned int increment = std::max(1U, current_batch_size >> 2);
Copy link

@anish-n anish-n Feb 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious what is the value of doing it in increments, why don't we just set the batch size to some static number like 100 and do it in a batch like that? I see that we have stated there is startup netlink congestion if we batch too soon, have we seen performance issues if we batch too soon?

current_batch_size = std::min(current_batch_size + increment, static_cast<unsigned int>(VRF_DELETE_BATCH_SIZE_MAX));
}
}

SWSS_LOG_NOTICE("Completed deletion of %zu VRF devices in %d batches",
total_vrfs, batch_num);
}

cmd.str("");
cmd.clear();
cmd << IP_CMD << " rule | grep '^0:'";
Expand Down
76 changes: 62 additions & 14 deletions cfgmgr/vxlanmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ extern MacAddress gMacAddress;
#define VLAN "vlan"
#define DST_IP "dst_ip"
#define SOURCE_VTEP "source_vtep"
#define VXLAN_DELETE_BATCH_SIZE_INITIAL 1
#define VXLAN_DELETE_BATCH_SIZE_MAX 500

static std::string getVxlanName(const swss::VxlanMgr::VxlanInfo & info)
{
Expand Down Expand Up @@ -1216,26 +1218,72 @@ void VxlanMgr::restoreVxlanNetDevices()

void VxlanMgr::clearAllVxlanDevices()
{
for (auto it = m_vxlanNetDevices.begin(); it != m_vxlanNetDevices.end();)
if (m_vxlanNetDevices.empty())
{
std::string netdev_name = it->first;
std::string netdev_type = it->second;
SWSS_LOG_INFO("Deleting Stale NetDevice %s, type: %s\n", netdev_name.c_str(), netdev_type.c_str());
VxlanInfo info;
std::string res;
if (netdev_type.compare(VXLAN))
SWSS_LOG_NOTICE("No vxlan devices to clear");
return;
}

const size_t total_devices = m_vxlanNetDevices.size();
size_t processed = 0;
std::vector<std::string> batch_devices;
unsigned int current_batch_size = VXLAN_DELETE_BATCH_SIZE_INITIAL;
batch_devices.reserve(VXLAN_DELETE_BATCH_SIZE_MAX);
int batch_num = 0;

for (const auto& device : m_vxlanNetDevices)
{
const std::string& netdev_name = device.first;
const std::string& netdev_type = device.second;

std::string dev_cmd;
if (netdev_type == VXLAN)
{
info.m_vxlan = netdev_name;
downVxlanNetdevice(netdev_name);
cmdDeleteVxlan(info, res);
dev_cmd = "(" + std::string(IP_CMD) + " link set dev " + shellquote(netdev_name) +
" down 2>/dev/null || true; " + std::string(IP_CMD) + " link del dev " +
shellquote(netdev_name) + " 2>/dev/null || true) &";
}
else if(netdev_type.compare(VXLAN_IF))
else if (netdev_type == VXLAN_IF)
{
info.m_vxlanIf = netdev_name;
cmdDeleteVxlanIf(info, res);
dev_cmd = "(" + std::string(IP_CMD) + " link del " + shellquote(netdev_name) +
" 2>/dev/null || true) &";
}

batch_devices.push_back(dev_cmd);

if (batch_devices.size() >= current_batch_size || processed + batch_devices.size() >= total_devices)
{
batch_num++;

std::ostringstream batch_cmd;
batch_cmd << BASH_CMD << " -c '";
for (size_t i = 0; i < batch_devices.size(); ++i)
{
if (i > 0) batch_cmd << " ";
batch_cmd << batch_devices[i];
}
batch_cmd << " wait'";

SWSS_LOG_NOTICE("Batch %d: deleting %zu devices in parallel (batch_size=%u, progress: %zu/%zu)",
batch_num, batch_devices.size(), current_batch_size, processed + batch_devices.size(), total_devices);

std::string res;
int ret = swss::exec(batch_cmd.str(), res);
if (ret != 0)
{
SWSS_LOG_WARN("Batch %d deletion errors: %s", batch_num, res.c_str());
}

processed += batch_devices.size();
batch_devices.clear();

unsigned int increment = std::max(1U, current_batch_size >> 2);
current_batch_size = std::min(current_batch_size + increment, static_cast<unsigned int>(VXLAN_DELETE_BATCH_SIZE_MAX));
}
it = m_vxlanNetDevices.erase(it);
}

m_vxlanNetDevices.clear();
SWSS_LOG_NOTICE("Cleared %zu vxlan devices in %d batches", total_devices, batch_num);
}

void VxlanMgr::disableLearningForAllVxlanNetdevices()
Expand Down
43 changes: 43 additions & 0 deletions tests/test_vrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,49 @@ def test_VRFMgr_Update(self, dvs, testlog):

self.vrf_remove(dvs, "Vrf_a", state)

def test_VRFMgr_ConstructorCleanup(self, dvs, testlog):
"""Test that vrfmgrd constructor cleans up stale VRF devices on restart"""
self.setup_db(dvs)

num_stale_vrfs = 100

# Collect initial VRF count
_, output = dvs.runcmd(['sh', '-c',
"ip link show type vrf | grep -c '^[0-9]' || true"])
initial_vrf_count = int(output.strip()) if output.strip() else 0

# Create stale VRF devices directly in Linux
# These simulate leftover devices from a previous run
for i in range(num_stale_vrfs):
vrf_name = f"StaleVrf{i}"
table_id = 2000 + i
(exitcode, _) = dvs.runcmd(['sh', '-c',
f"ip link add {vrf_name} type vrf table {table_id}"])
assert exitcode == 0, f"Failed to create stale VRF {vrf_name}"

# Verify stale VRFs were created in kernel
_, output = dvs.runcmd(['sh', '-c',
"ip link show type vrf | grep -c '^[0-9]' || true"])
current_vrf_count = int(output.strip()) if output.strip() else 0
expected_vrf_count = initial_vrf_count + num_stale_vrfs
assert current_vrf_count == expected_vrf_count, \
f"Expected {expected_vrf_count} VRFs, found {current_vrf_count}"

# Kill vrfmgrd process to trigger restart
dvs.runcmd(['sh', '-c', "pkill -9 vrfmgrd"])
time.sleep(1)

# Restart vrfmgrd via supervisorctl
dvs.runcmd(['sh', '-c', "supervisorctl restart vrfmgrd"])
time.sleep(10)

# Verify all stale VRFs were deleted
_, output = dvs.runcmd(['sh', '-c',
"ip link show type vrf | grep -c '^[0-9]' || true"])
final_vrf_count = int(output.strip()) if output.strip() else 0
assert final_vrf_count == initial_vrf_count, \
f"Expected {initial_vrf_count} VRFs after cleanup, found {final_vrf_count}"

@pytest.mark.xfail(reason="Test unstable, blocking PR builds")
def test_VRFMgr_Capacity(self, dvs, testlog):
self.setup_db(dvs)
Expand Down
74 changes: 74 additions & 0 deletions tests/test_vxlan_tunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,80 @@ def test_vnet_cleanup_config_reload(dvs, env_setup):
assert "Vxlan1" in stdout
assert "Brvxlan1" in stdout

def test_vxlanmgr_constructor_cleanup(dvs, env_setup):
"""Test that vxlanmgrd constructor cleans up stale VXLAN and VXLAN_IF devices on restart"""

num_stale_devices = 100

# Collect initial VXLAN device count
_, output = dvs.runcmd(['sh', '-c',
"ip link show type vxlan | grep -c '^[0-9]' || true"])
initial_vxlan_count = int(output.strip()) if output.strip() else 0

# Collect initial VXLAN_IF (bridge) device count
_, output = dvs.runcmd(['sh', '-c',
"ip link show type bridge | grep -c '^[0-9]' || true"])
initial_bridge_count = int(output.strip()) if output.strip() else 0

# Create stale VXLAN devices directly in Linux
# These simulate leftover devices from a previous run
for i in range(num_stale_devices):
vxlan_name = f"StaleVxlan{i}"
vni = 10000 + i
ret, _ = dvs.runcmd(['sh', '-c',
f"ip link add {vxlan_name} type vxlan id {vni} dstport 4789 nolearning"])
assert ret == 0, f"Failed to create stale VXLAN {vxlan_name}"

# Create stale VXLAN_IF (bridge) devices
for i in range(num_stale_devices):
bridge_name = f"Brvxlan{1000 + i}"
ret, _ = dvs.runcmd(['sh', '-c',
f"ip link add {bridge_name} type bridge"])
assert ret == 0, f"Failed to create stale bridge {bridge_name}"

# Verify stale VXLANs were created in kernel
_, output = dvs.runcmd(['sh', '-c',
"ip link show type vxlan | grep -c '^[0-9]' || true"])
current_vxlan_count = int(output.strip()) if output.strip() else 0
expected_vxlan_count = initial_vxlan_count + num_stale_devices
assert current_vxlan_count == expected_vxlan_count, \
f"Expected {expected_vxlan_count} VXLANs, found {current_vxlan_count}"

# Verify stale bridges were created in kernel
_, output = dvs.runcmd(['sh', '-c',
"ip link show type bridge | grep -c '^[0-9]' || true"])
current_bridge_count = int(output.strip()) if output.strip() else 0
expected_bridge_count = initial_bridge_count + num_stale_devices
assert current_bridge_count == expected_bridge_count, \
f"Expected {expected_bridge_count} bridges, found {current_bridge_count}"

# Kill vxlanmgrd process to trigger restart
dvs.runcmd(['sh', '-c', "pkill -9 vxlanmgrd"])
time.sleep(1)

# Restart vxlanmgrd via supervisorctl
dvs.runcmd(['sh', '-c', "supervisorctl restart vxlanmgrd"])

# Reapply cfg to restore legitimate devices
cfg_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0)
apply_test_vnet_cfg(cfg_db)

time.sleep(20)

# Verify all stale VXLANs were deleted
_, output = dvs.runcmd(['sh', '-c',
"ip link show type vxlan | grep -c '^[0-9]' || true"])
final_vxlan_count = int(output.strip()) if output.strip() else 0
assert final_vxlan_count == initial_vxlan_count, \
f"Expected {initial_vxlan_count} VXLANs after cleanup, found {final_vxlan_count}"

# Verify all stale bridges were deleted
_, output = dvs.runcmd(['sh', '-c',
"ip link show type bridge | grep -c '^[0-9]' || true"])
final_bridge_count = int(output.strip()) if output.strip() else 0
assert final_bridge_count == initial_bridge_count, \
f"Expected {initial_bridge_count} bridges after cleanup, found {final_bridge_count}"

# Add Dummy always-pass test at end as workaroud
# for issue when Flaky fail on final test it invokes module tear-down before retrying
def test_nonflaky_dummy():
Expand Down