From 0a4261f7d1a4c9296e0090e949f1c83c30cadded Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 18 Sep 2023 15:50:34 +0200 Subject: [PATCH 1/3] nl_bridge::fdb_timeout(): check ifindex of hit before deletion nl_cache_search() uses nl_object_identical()[1] to lookup the target object. This limits the check to unique attributes, which does not include the ifindex [2]. Therefore nl_cache_search() may return an object for a different port, in case it moved, which we then may wrongly remove from the cache. So add a check for the ifindex of the hit before attempting to delete any neighbors or cache entries. We could have also alternatively used nl_cache_find() which uses all attributes, but this function iterates over the whole cache intead of using the hashtable if possible, so would be much less performant. [1] https://github.com/thom311/libnl/blob/libnl3_7_0/lib/cache.c#L1113-L1114 [2] https://github.com/thom311/libnl/blob/libnl3_7_0/lib/route/neigh.c#L323 Fixes: 291591d6f313 ("l2 aging added") Signed-off-by: Jonas Gorski (cherry picked from commit a566d4caa1b0f8a05e179f7fc4d1196740bd3eb5) Signed-off-by: Jonas Gorski --- src/netlink/nl_bridge.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/netlink/nl_bridge.cc b/src/netlink/nl_bridge.cc index 57305966..c5f9f450 100644 --- a/src/netlink/nl_bridge.cc +++ b/src/netlink/nl_bridge.cc @@ -824,20 +824,19 @@ int nl_bridge::fdb_timeout(rtnl_link *br_link, uint16_t vid, std::unique_ptr h_src( nl_addr_build(AF_LLC, mac.somem(), mac.memlen()), nl_addr_put); - rtnl_neigh_set_ifindex(n.get(), rtnl_link_get_ifindex(br_link)); rtnl_neigh_set_master(n.get(), rtnl_link_get_master(br_link)); rtnl_neigh_set_family(n.get(), AF_BRIDGE); rtnl_neigh_set_vlan(n.get(), vid); rtnl_neigh_set_lladdr(n.get(), h_src.get()); rtnl_neigh_set_flags(n.get(), NTF_MASTER | NTF_EXT_LEARNED); - rtnl_neigh_set_state(n.get(), NUD_REACHABLE); // find entry in local l2_cache std::unique_ptr n_lookup( NEIGH_CAST(nl_cache_search(l2_cache.get(), OBJ_CAST(n.get()))), rtnl_neigh_put); - if (n_lookup) { + if (n_lookup && rtnl_neigh_get_ifindex(n_lookup.get()) == + rtnl_link_get_ifindex(br_link)) { // * remove l2 entry from kernel nl_msg *msg = nullptr; rtnl_neigh_build_delete_request(n.get(), NLM_F_REQUEST, &msg); From da2efd95d9366897eb4d9707a5e097346a0aa607 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 18 Sep 2023 16:22:00 +0200 Subject: [PATCH 2/3] nl_bridge::add_neigh_to_fdb(): remove old cache item on update Calling nl_cache_add() for an object cache that supports hashing will refuse to add an object if it already contains an "identical" object, where identical is considered having the same master, vid and mac for AF_BRIDGE neighbors. Therefore we need to remove the old entry first before we can add an updated one when we handle neighbors moving between ports. Without it, we may fail to update the flow if the port moves back again to its original port, as the outdated entry makes us think we already have a flow for it. Fixes: de9b16eea7e4 ("nl_bridge: take over fdb entries instead of creating them") Signed-off-by: Jonas Gorski (cherry picked from commit 82f59be0f832c03485d75a8a10a97076dec0ce20) Signed-off-by: Jonas Gorski --- src/netlink/nl_bridge.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/netlink/nl_bridge.cc b/src/netlink/nl_bridge.cc index c5f9f450..0390997d 100644 --- a/src/netlink/nl_bridge.cc +++ b/src/netlink/nl_bridge.cc @@ -722,8 +722,11 @@ void nl_bridge::add_neigh_to_fdb(rtnl_neigh *neigh, bool update) { NEIGH_CAST(nl_cache_search(l2_cache.get(), OBJ_CAST(n.get()))), rtnl_neigh_put); - if (n_lookup && rtnl_neigh_get_ifindex(n_lookup.get()) == ifindex) { - return; + if (n_lookup) { + if (rtnl_neigh_get_ifindex(n_lookup.get()) == ifindex) + return; + + nl_cache_remove(OBJ_CAST(n_lookup.get())); } rtnl_neigh_set_ifindex(n.get(), ifindex); From 1ac2438f2611f55e68afc3ee6b0cfa1c877f7ba6 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 7 Mar 2024 12:20:13 +0100 Subject: [PATCH 3/3] nl_bridge: fix removing aged out neighbors from fdb When fixing moving neighbors between ports we removed the ifindex from the filter neigh to find a neighbor at any port with the mac address. But since we use the same neigh to construct the fdb delete message, this caused the message to miss the ifindex, and the kernel was rejecting the deletion. This resulting in us not updating out local cache of known layer 2 neighbors, and failing to relearn them. Fix this by just using the found entry from our local cache, n_lookup. Fixes: a566d4caa1b0 ("nl_bridge::fdb_timeout(): check ifindex of hit before deletion") Signed-off-by: Jonas Gorski (cherry picked from commit 09bf1a6beb829f2b103c2affc99dc4cfb3083e4f) Signed-off-by: Jonas Gorski --- src/netlink/nl_bridge.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netlink/nl_bridge.cc b/src/netlink/nl_bridge.cc index 0390997d..c5e7b3ac 100644 --- a/src/netlink/nl_bridge.cc +++ b/src/netlink/nl_bridge.cc @@ -842,7 +842,7 @@ int nl_bridge::fdb_timeout(rtnl_link *br_link, uint16_t vid, rtnl_link_get_ifindex(br_link)) { // * remove l2 entry from kernel nl_msg *msg = nullptr; - rtnl_neigh_build_delete_request(n.get(), NLM_F_REQUEST, &msg); + rtnl_neigh_build_delete_request(n_lookup.get(), NLM_F_REQUEST, &msg); assert(msg); // send the message and create new fdb entry