diff --git a/cni/network/network.go b/cni/network/network.go index 1ec1666f45..1b895f1c96 100644 --- a/cni/network/network.go +++ b/cni/network/network.go @@ -18,7 +18,6 @@ import ( "github.com/Azure/azure-container-networking/cni/util" "github.com/Azure/azure-container-networking/cns" cnscli "github.com/Azure/azure-container-networking/cns/client" - "github.com/Azure/azure-container-networking/cns/fsnotify" "github.com/Azure/azure-container-networking/common" "github.com/Azure/azure-container-networking/dhcp" "github.com/Azure/azure-container-networking/iptables" @@ -716,7 +715,7 @@ func (plugin *NetPlugin) createEpInfo(opt *createEpInfoOpt) (*network.EndpointIn *opt.infraSeen = true } else { ifName = "eth" + strconv.Itoa(opt.endpointIndex) - endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName) + endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType) } endpointInfo := network.EndpointInfo{ @@ -1065,36 +1064,11 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { } logger.Info("Retrieved network info, populating endpoint infos with container id", zap.String("containerID", args.ContainerID)) - var epInfos []*network.EndpointInfo - if plugin.nm.IsStatelessCNIMode() { - // network ID is passed in and used only for migration - // otherwise, in stateless, we don't need the network id for deletion - epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID) - // if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found - if err != nil { - if errors.Is(err, network.ErrConnectionFailure) { - logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err)) - addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath) - logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr)) - if addErr != nil { - logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr)) - return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID)) - } - return nil - } - if errors.Is(err, network.ErrEndpointStateNotFound) { - logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err)) - return nil - } - logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err)) - return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) - } - } else { - epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID) + epInfos, err := plugin.nm.GetEndpoint(networkID, args) + if err != nil { + return plugin.RetriableError(fmt.Errorf("failed to retrieve endpoint: %w", err)) } - - // for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra) - // this block is not applied to stateless CNI + // when the endpoint is not created, but the ips are already allocated (only works if single network, single infra) if len(epInfos) == 0 { endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName) if !nwCfg.MultiTenancy { @@ -1120,7 +1094,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { if err = plugin.nm.DeleteEndpoint(epInfo.NetworkID, epInfo.EndpointID, epInfo); err != nil { // An error will not be returned if the endpoint is not found // return a retriable error so the container runtime will retry this DEL later - // the implementation of this function returns nil if the endpoint doens't exist, so + // the implementation of this function returns nil if the endpoint doesn't exist, so // we don't have to check that here return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) } @@ -1132,15 +1106,25 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { zap.String("endpointID", epInfo.EndpointID)) telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID) + // Delegated/secondary nic ips are statically allocated so we don't need to release + // Call into IPAM plugin to release the endpoint's addresses. if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") { - // Delegated/secondary nic ips are statically allocated so we don't need to release - // Call into IPAM plugin to release the endpoint's addresses. - for i := range epInfo.IPAddresses { - logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String())) - telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID)) - err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options) - if err != nil { - return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err)) + // This is an special case for stateless CNI when Asychronous DEL to CNS will take place + // At this point the endpoint is already deleted in previous block and CNS will release the IP whenever it is up + if epInfo.IPAddresses == nil && plugin.nm.IsStatelessCNIMode() { + logger.Warn("Release ip Asynchronously by CNS", + zap.String("containerID", args.ContainerID)) + if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil { + return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err)) + } + } else { + for i := range epInfo.IPAddresses { + logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String())) + telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID)) + err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options) + if err != nil { + return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err)) + } } } } else if epInfo.EnableInfraVnet { // remove in future PR diff --git a/network/endpoint_linux.go b/network/endpoint_linux.go index 5f57a66d51..64be117240 100644 --- a/network/endpoint_linux.go +++ b/network/endpoint_linux.go @@ -547,3 +547,29 @@ func getDefaultGateway(routes []RouteInfo) net.IP { func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) { return epInfo, nil } + +// getEndpointInfoByIfNameImpl returns an array of EndpointInfo for the given endpoint based on the IfName(s) found in the network namespace. +func (nm *networkManager) getEndpointInfoByIfNameImpl(ep *endpoint) ([]*EndpointInfo, error) { + epInfo := &EndpointInfo{ + EndpointID: ep.Id, + NetNsPath: ep.NetworkNameSpace, + NICType: cns.InfraNIC, + IfName: ep.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client + } + ret := []*EndpointInfo{} + ret = append(ret, epInfo) + logger.Info("Fetching Secondary Endpoint from", zap.String("NetworkNameSpace: ", ep.NetworkNameSpace)) + secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nm.nsClient, nil, ep) + ifnames, err := secondaryepClient.FetchInterfacesFromNetnsPath(ep.IfName, ep.NetworkNameSpace) + if err != nil { + return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err) + } + for _, ifName := range ifnames { + ret = append(ret, &EndpointInfo{ + NetNsPath: ep.NetworkNameSpace, + IfName: ifName, + NICType: cns.NodeNetworkInterfaceFrontendNIC, + }) + } + return ret, nil +} diff --git a/network/endpoint_windows.go b/network/endpoint_windows.go index edd52327f2..087eee9228 100644 --- a/network/endpoint_windows.go +++ b/network/endpoint_windows.go @@ -746,3 +746,7 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri logger.Info("Retrieved device problem code", zap.String("code", devpkeyDeviceProblemCode)) return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil } + +func (nm *networkManager) getEndpointInfoByIfNameImpl(_ *endpoint) ([]*EndpointInfo, error) { + return nil, nil +} diff --git a/network/errors.go b/network/errors.go index c4c808357f..d7852076db 100644 --- a/network/errors.go +++ b/network/errors.go @@ -3,9 +3,11 @@ package network import "errors" var ( - errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint - errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint - ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile") - ErrConnectionFailure = errors.New("couldn't connect to CNS") - ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state") + errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint + errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint + ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile") + ErrConnectionFailure = errors.New("couldn't connect to CNS") + ErrEndpointRemovalFailure = errors.New("Failed to remove endpoint") + ErrEndpointRetrievalFailure = errors.New("Failed to obtain endpoint") + ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state") ) diff --git a/network/manager.go b/network/manager.go index 7bc1441fea..32fadf593d 100644 --- a/network/manager.go +++ b/network/manager.go @@ -5,6 +5,7 @@ package network import ( "context" + "fmt" "net" "sync" "time" @@ -19,6 +20,7 @@ import ( "github.com/Azure/azure-container-networking/netlink" "github.com/Azure/azure-container-networking/platform" "github.com/Azure/azure-container-networking/store" + cniSkel "github.com/containernetworking/cni/pkg/skel" "github.com/pkg/errors" "go.uber.org/zap" ) @@ -116,11 +118,13 @@ type NetworkManager interface { UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error GetNumberOfEndpoints(ifName string, networkID string) int GetEndpointID(containerID, ifName string) string + GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string IsStatelessCNIMode() bool SaveState(eps []*endpoint) error DeleteState(epInfos []*EndpointInfo) error + GetEndpoint(networkID string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo - GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) + GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) } // Creates a new network manager. @@ -455,7 +459,7 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string // GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo // TODO unit tests need to be added, WorkItem: 26606939 // In stateless cni, container id is the endpoint id, so you can pass in either -func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) { +func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) { endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID) if err != nil { if endpointResponse.Response.ReturnCode == types.NotFound { @@ -466,7 +470,7 @@ func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*En } return nil, ErrGetEndpointStateFailure } - epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID) + epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID, netns) for i := 0; i < len(epInfos); i++ { if epInfos[i].NICType == cns.InfraNIC { @@ -514,7 +518,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint nw := &network{ Id: networkID, // currently unused in stateless cni HnsId: epInfo.HNSNetworkID, - Mode: opModeTransparentVlan, + Mode: opModeTransparent, SnatBridgeIP: "", NetNs: dummyGUID, // to trigger hns v2, windows extIf: &externalInterface{ @@ -529,6 +533,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network) HostIfName: epInfo.HostIfName, LocalIP: "", + IPAddresses: epInfo.IPAddresses, VlanID: 0, AllowInboundFromHostToNC: false, // stateless currently does not support apipa AllowInboundFromNCToHost: false, @@ -537,11 +542,12 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false NetNs: dummyGUID, // to trigger hnsv2, windows NICType: epInfo.NICType, + NetworkNameSpace: epInfo.NetNsPath, IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client } logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId)) - err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep) + err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep) if err != nil { return err } @@ -562,7 +568,7 @@ func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*Endpoi if nm.IsStatelessCNIMode() { logger.Info("calling cns getEndpoint API") - epInfos, err := nm.GetEndpointState(networkID, endpointID) + epInfos, err := nm.GetEndpointState(networkID, endpointID, "") if err != nil { return nil, err } @@ -745,6 +751,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string { return containerID + "-" + ifName } +// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type. +func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string { + // For stateless CNI, secondary NICs use containerID-ifName as endpointID. + if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC { + return containerID + "-" + ifName + } + // For InfraNIC, use GetEndpointID() logic. + return nm.GetEndpointID(containerID, ifName) +} + // saves the map of network ids to endpoints to the state file func (nm *networkManager) SaveState(eps []*endpoint) error { nm.Lock() @@ -779,7 +795,7 @@ func (nm *networkManager) DeleteState(_ []*EndpointInfo) error { } // called to convert a cns restserver EndpointInfo into a network EndpointInfo -func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID string) []*EndpointInfo { +func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID, netns string) []*EndpointInfo { ret := []*EndpointInfo{} for ifName, ipInfo := range endpointInfo.IfnameToIPMap { @@ -809,6 +825,10 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI epInfo.NICType = ipInfo.NICType epInfo.HNSNetworkID = ipInfo.HnsNetworkID epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress) + // fill out the netns if it is empty via args passed by container runtime + if epInfo.NetNsPath == "" { + epInfo.NetNsPath = netns + } ret = append(ret, epInfo) } return ret @@ -847,3 +867,54 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo { return ifNametoIPInfoMap } + +func (nm *networkManager) GetEndpoint(networkID string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) { + if nm.IsStatelessCNIMode() { + logger.Info("calling cns getEndpoint API") + epInfos, err := nm.GetEndpointState(networkID, args.ContainerID, args.Netns) + if err != nil { + switch { + case errors.Is(err, ErrConnectionFailure): + logger.Error("Failed to connect to CNS", zap.Error(err)) + logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID)) + // In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS, + // we still have to remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state. + // This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations. + // When that happens, kubelet and containerd hang during sandbox creation or teardown. + // The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace, + // triggering hot-unplug/re-register events and leaving the node in an unhealthy state. + // This workaround mitigates the issue by generating a minimal endpointInfo via containerd args and netlink APIs that can be then passed to DeleteEndpoint API. + epInfos, err = nm.generateEndpointLocally(args) + if err != nil { + logger.Error("Failed to fetch secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err) + } + case errors.Is(err, ErrEndpointStateNotFound): + logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err)) + return nil, nil + default: + logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err)) + return nil, ErrEndpointRetrievalFailure + } + } + for _, epInfo := range epInfos { + logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType)) + } + return epInfos, nil + } + return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil +} + +// generateEndpointLocally fetches the endpoint information using containerd args and netlink APIs +func (nm *networkManager) generateEndpointLocally(args *cniSkel.CmdArgs) ([]*EndpointInfo, error) { + ep := &endpoint{ + Id: args.ContainerID, + NetworkNameSpace: args.Netns, + IfName: args.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client + } + epInfo, err := nm.getEndpointInfoByIfNameImpl(ep) + if err != nil { + return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err) + } + return epInfo, nil +} diff --git a/network/manager_mock.go b/network/manager_mock.go index 52ba4f3bc4..2a73a45093 100644 --- a/network/manager_mock.go +++ b/network/manager_mock.go @@ -1,7 +1,9 @@ package network import ( + "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/common" + cniSkel "github.com/containernetworking/cni/pkg/skel" ) // MockNetworkManager is a mock structure for Network Manager @@ -94,6 +96,16 @@ func (nm *MockNetworkManager) GetEndpointID(containerID, ifName string) string { return containerID + "-" + ifName } +// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type. +func (nm *MockNetworkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string { + // For stateless CNI, secondary NICs use containerID-ifName as endpointID. + if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC { + return containerID + "-" + ifName + } + // For InfraNIC, use GetEndpointID() logic. + return nm.GetEndpointID(containerID, ifName) +} + func (nm *MockNetworkManager) GetAllEndpoints(networkID string) (map[string]*EndpointInfo, error) { return nm.TestEndpointInfoMap, nil } @@ -207,6 +219,10 @@ func (nm *MockNetworkManager) GetEndpointInfosFromContainerID(containerID string return ret } -func (nm *MockNetworkManager) GetEndpointState(_, _ string) ([]*EndpointInfo, error) { +func (nm *MockNetworkManager) GetEndpointState(_, _, _ string) ([]*EndpointInfo, error) { return []*EndpointInfo{}, nil } + +func (nm *MockNetworkManager) GetEndpoint(_ string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) { + return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil +} diff --git a/network/manager_test.go b/network/manager_test.go index 8c8545b97a..4cc79a8d8b 100644 --- a/network/manager_test.go +++ b/network/manager_test.go @@ -350,7 +350,7 @@ var _ = Describe("Test Manager", func() { PodNamespace: "test-pod-ns", } - epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID) + epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID, "") Expect(len(epInfos)).To(Equal(1)) Expect(epInfos[0]).To(Equal( @@ -400,7 +400,7 @@ var _ = Describe("Test Manager", func() { PodNamespace: "test-pod-ns", } - epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID) + epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID, "") Expect(len(epInfos)).To(Equal(2)) Expect(epInfos).To(ContainElement( @@ -489,3 +489,109 @@ var _ = Describe("Test Manager", func() { }) }) }) + +func TestGetEndpointIDByNicType_Cases(t *testing.T) { + nm := &networkManager{} + + cases := []struct { + name string + stateless bool + containerID string + ifName string + nicType cns.NICType + expectedResult string + }{ + { + name: "Stateless InfraNIC", + stateless: true, + containerID: "container123", + ifName: "eth0", + nicType: cns.InfraNIC, + expectedResult: "container123", + }, + { + name: "Stateless SecondaryNIC", + stateless: true, + containerID: "container123", + ifName: "eth1", + nicType: cns.DelegatedVMNIC, + expectedResult: "container123-eth1", + }, + { + name: "Stateful InfraNIC", + stateless: false, + containerID: "container123456789", + ifName: "eth0", + nicType: cns.InfraNIC, + expectedResult: "containe-eth0", // truncated to 8 chars + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + nm.statelessCniMode = tc.stateless + id := nm.GetEndpointIDByNicType(tc.containerID, tc.ifName, tc.nicType) + if id != tc.expectedResult { + t.Errorf("expected %s, got %s", tc.expectedResult, id) + } + }) + } +} + +func TestCnsEndpointInfotoCNIEpInfos_Cases(t *testing.T) { + cases := []struct { + name string + ifName string + ipInfo restserver.IPInfo + netNs string + expectedNetNs string + expectedIfName string + expectedNICType cns.NICType + }{ + { + name: "DelegatedVMNIC", + ifName: "eth1", + netNs: "/var/run/netns/testns", + ipInfo: restserver.IPInfo{ + NICType: cns.DelegatedVMNIC, + }, + expectedNetNs: "/var/run/netns/testns", + expectedIfName: "eth1", + expectedNICType: cns.DelegatedVMNIC, + }, + { + name: "InfraNIC", + ifName: "eth0", + netNs: "", + ipInfo: restserver.IPInfo{ + NICType: cns.InfraNIC, + }, + expectedNetNs: "", + expectedIfName: "eth0", + expectedNICType: cns.InfraNIC, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + endpointInfo := restserver.EndpointInfo{ + IfnameToIPMap: map[string]*restserver.IPInfo{ + tc.ifName: &tc.ipInfo, + }, + } + epInfos := cnsEndpointInfotoCNIEpInfos(endpointInfo, "container123", tc.netNs) + if len(epInfos) == 0 { + t.Fatalf("expected at least one epInfo") + } + if epInfos[0].NetNsPath != tc.expectedNetNs { + t.Errorf("expected NetNsPath %q, got %q", tc.expectedNetNs, epInfos[0].NetNsPath) + } + if epInfos[0].IfName != tc.expectedIfName { + t.Errorf("expected IfName %q, got %q", tc.expectedIfName, epInfos[0].IfName) + } + if epInfos[0].NICType != tc.expectedNICType { + t.Errorf("expected NICType %v, got %v", tc.expectedNICType, epInfos[0].NICType) + } + }) + } +} diff --git a/network/secondary_endpoint_client_linux.go b/network/secondary_endpoint_client_linux.go index 6d9d5c3230..9ba8adf657 100644 --- a/network/secondary_endpoint_client_linux.go +++ b/network/secondary_endpoint_client_linux.go @@ -6,12 +6,14 @@ import ( "strings" "time" + "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/netio" "github.com/Azure/azure-container-networking/netlink" "github.com/Azure/azure-container-networking/netns" "github.com/Azure/azure-container-networking/network/networkutils" "github.com/Azure/azure-container-networking/platform" "github.com/pkg/errors" + vishnetlink "github.com/vishvananda/netlink" "go.uber.org/zap" "k8s.io/kubernetes/pkg/kubelet" ) @@ -188,15 +190,90 @@ func (client *SecondaryEndpointClient) DeleteEndpoints(ep *endpoint) error { logger.Error("Failed to exit netns with", zap.Error(newErrorSecondaryEndpointClient(err))) } }() - // TODO: For stateless cni linux, check if delegated vmnic type, and if so, delete using this *endpoint* struct's ifname + // For stateless cni linux, check if delegated vmnic type, and if so, delete using this *endpoint* struct's ifname + if ep.NICType == cns.NodeNetworkInterfaceFrontendNIC { + if err := client.moveInterfaceToHostNetns(ep.IfName, vmns); err != nil { + logger.Error("Failed to move interface", zap.String("IfName", ep.IfName), zap.Error(newErrorSecondaryEndpointClient(err))) + } + } + // For Stateful cni linux, Use SecondaryInterfaces map to move all interfaces to host netns + // TODO: SecondaryInterfaces map should be retired and only IfName field and NICType should be used to determine the delegated NIC for iface := range ep.SecondaryInterfaces { - if err := client.netlink.SetLinkNetNs(iface, uintptr(vmns)); err != nil { + if err := client.moveInterfaceToHostNetns(iface, vmns); err != nil { logger.Error("Failed to move interface", zap.String("IfName", iface), zap.Error(newErrorSecondaryEndpointClient(err))) continue } - delete(ep.SecondaryInterfaces, iface) } return nil } + +// moveInterfaceToHostNetns moves the given interface to the host netns. +func (client *SecondaryEndpointClient) moveInterfaceToHostNetns(ifName string, vmns int) error { + logger.Info("Moving interface to host netns", zap.String("IfName", ifName)) + if err := client.netlink.SetLinkNetNs(ifName, uintptr(vmns)); err != nil { + return newErrorSecondaryEndpointClient(err) + } + return nil +} + +// FetchInterfacesFromNetnsPath finds all interfaces from the specified netns path except the infra and non-eth interfaces. +func (client *SecondaryEndpointClient) FetchInterfacesFromNetnsPath(infraInterfaceName, netnspath string) ([]string, error) { + // Get VM namespace + _, err := netns.New().Get() + if err != nil { + return nil, newErrorSecondaryEndpointClient(err) + } + + // Open the network namespace. + logger.Info("Opening netns", zap.Any("NetNsPath", netnspath)) + ns, err := client.nsClient.OpenNamespace(netnspath) + if err != nil { + if strings.Contains(err.Error(), errFileNotExist.Error()) { + return nil, nil + } + + return nil, newErrorSecondaryEndpointClient(err) + } + defer ns.Close() + + // Enter the container network namespace. + logger.Info("Entering netns", zap.Any("NetNsPath", netnspath)) + if err := ns.Enter(); err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + + return nil, newErrorSecondaryEndpointClient(err) + } + + // Return to host network namespace. + defer func() { + logger.Info("Exiting netns", zap.Any("NetNsPath", netnspath)) + if err := ns.Exit(); err != nil { + logger.Error("Failed to exit netns with", zap.Error(newErrorSecondaryEndpointClient(err))) + } + }() + // Use the netlink API to list links + links, err := vishnetlink.LinkList() + if err != nil { + return nil, newErrorSecondaryEndpointClient(err) + } + + ifnames := make([]string, 0, len(links)) + for _, l := range links { + ifnames = append(ifnames, l.Attrs().Name) + } + ret := make([]string, 0, len(ifnames)) + // For stateless cni linux, iterate through all interfaces and check if delegated vmnic type, and if so, delete using this *endpoint* struct's ifname + for _, iface := range ifnames { + // skip the infra interface as well as non-eth interfaces + if iface == infraInterfaceName || !strings.HasPrefix(iface, "eth") { + continue + } + ret = append(ret, iface) + } + logger.Info("Found interfaces in netns that needs to be moved back to host", zap.Any("interfaces", ret)) + return ret, nil +}