Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 24 additions & 40 deletions cni/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"github.com/Azure/azure-container-networking/cni/util"
"github.com/Azure/azure-container-networking/cns"
cnscli "github.com/Azure/azure-container-networking/cns/client"
"github.com/Azure/azure-container-networking/cns/fsnotify"
"github.com/Azure/azure-container-networking/common"
"github.com/Azure/azure-container-networking/dhcp"
"github.com/Azure/azure-container-networking/iptables"
Expand Down Expand Up @@ -716,7 +715,7 @@
*opt.infraSeen = true
} else {
ifName = "eth" + strconv.Itoa(opt.endpointIndex)
endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName)
endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType)
}

endpointInfo := network.EndpointInfo{
Expand Down Expand Up @@ -1065,36 +1064,11 @@
}
logger.Info("Retrieved network info, populating endpoint infos with container id", zap.String("containerID", args.ContainerID))

var epInfos []*network.EndpointInfo
if plugin.nm.IsStatelessCNIMode() {
// network ID is passed in and used only for migration
// otherwise, in stateless, we don't need the network id for deletion
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID)
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found
if err != nil {
if errors.Is(err, network.ErrConnectionFailure) {
logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err))
addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath)
logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr))
if addErr != nil {
logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr))
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID))
}
return nil
}
if errors.Is(err, network.ErrEndpointStateNotFound) {
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
return nil
}
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
}
} else {
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
epInfos, err := plugin.nm.GetEndpoint(networkID, args)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to retrieve endpoint: %w", err))
}

// for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
// this block is not applied to stateless CNI
// when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
if len(epInfos) == 0 {
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
if !nwCfg.MultiTenancy {
Expand All @@ -1120,7 +1094,7 @@
if err = plugin.nm.DeleteEndpoint(epInfo.NetworkID, epInfo.EndpointID, epInfo); err != nil {
// An error will not be returned if the endpoint is not found
// return a retriable error so the container runtime will retry this DEL later
// the implementation of this function returns nil if the endpoint doens't exist, so
// the implementation of this function returns nil if the endpoint doesn't exist, so
// we don't have to check that here
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
}
Expand All @@ -1132,15 +1106,25 @@
zap.String("endpointID", epInfo.EndpointID))
telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID)

// Delegated/secondary nic ips are statically allocated so we don't need to release
// Call into IPAM plugin to release the endpoint's addresses.
if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") {
// Delegated/secondary nic ips are statically allocated so we don't need to release
// Call into IPAM plugin to release the endpoint's addresses.
for i := range epInfo.IPAddresses {
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
// This is an special case for stateless CNI when Asychronous DEL to CNS will take place

Check failure on line 1112 in cni/network/network.go

View workflow job for this annotation

GitHub Actions / Lint (windows-latest)

`Asychronous` is a misspelling of `Asynchronous` (misspell)

Check failure on line 1112 in cni/network/network.go

View workflow job for this annotation

GitHub Actions / Lint (ubuntu-latest)

`Asychronous` is a misspelling of `Asynchronous` (misspell)
// At this point the endpoint is already deleted in previous block and CNS will release the IP whenever it is up
if epInfo.IPAddresses == nil && plugin.nm.IsStatelessCNIMode() {
logger.Warn("Release ip Asynchronously by CNS",
zap.String("containerID", args.ContainerID))
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
}
} else {
for i := range epInfo.IPAddresses {
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
}
}
}
} else if epInfo.EnableInfraVnet { // remove in future PR
Expand Down
26 changes: 26 additions & 0 deletions network/endpoint_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,3 +547,29 @@ func getDefaultGateway(routes []RouteInfo) net.IP {
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) {
return epInfo, nil
}

// getEndpointInfoByIfNameImpl returns an array of EndpointInfo for the given endpoint based on the IfName(s) found in the network namespace.
func (nm *networkManager) getEndpointInfoByIfNameImpl(ep *endpoint) ([]*EndpointInfo, error) {
epInfo := &EndpointInfo{
EndpointID: ep.Id,
NetNsPath: ep.NetworkNameSpace,
NICType: cns.InfraNIC,
IfName: ep.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
}
ret := []*EndpointInfo{}
ret = append(ret, epInfo)
logger.Info("Fetching Secondary Endpoint from", zap.String("NetworkNameSpace: ", ep.NetworkNameSpace))
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nm.nsClient, nil, ep)
ifnames, err := secondaryepClient.FetchInterfacesFromNetnsPath(ep.IfName, ep.NetworkNameSpace)
if err != nil {
return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err)
}
for _, ifName := range ifnames {
ret = append(ret, &EndpointInfo{
NetNsPath: ep.NetworkNameSpace,
IfName: ifName,
NICType: cns.NodeNetworkInterfaceFrontendNIC,
})
}
return ret, nil
}
4 changes: 4 additions & 0 deletions network/endpoint_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -746,3 +746,7 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri
logger.Info("Retrieved device problem code", zap.String("code", devpkeyDeviceProblemCode))
return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil
}

func (nm *networkManager) getEndpointInfoByIfNameImpl(_ *endpoint) ([]*EndpointInfo, error) {
return nil, nil
}
12 changes: 7 additions & 5 deletions network/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package network
import "errors"

var (
errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
ErrConnectionFailure = errors.New("couldn't connect to CNS")
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
ErrConnectionFailure = errors.New("couldn't connect to CNS")
ErrEndpointRemovalFailure = errors.New("Failed to remove endpoint")
ErrEndpointRetrievalFailure = errors.New("Failed to obtain endpoint")
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
)
85 changes: 78 additions & 7 deletions network/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package network

import (
"context"
"fmt"
"net"
"sync"
"time"
Expand All @@ -19,6 +20,7 @@ import (
"github.com/Azure/azure-container-networking/netlink"
"github.com/Azure/azure-container-networking/platform"
"github.com/Azure/azure-container-networking/store"
cniSkel "github.com/containernetworking/cni/pkg/skel"
"github.com/pkg/errors"
"go.uber.org/zap"
)
Expand Down Expand Up @@ -116,11 +118,13 @@ type NetworkManager interface {
UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error
GetNumberOfEndpoints(ifName string, networkID string) int
GetEndpointID(containerID, ifName string) string
GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: GetEndpointIDByNICType as nic is an acronym

IsStatelessCNIMode() bool
SaveState(eps []*endpoint) error
DeleteState(epInfos []*EndpointInfo) error
GetEndpoint(networkID string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error)
GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo
GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error)
GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error)
}

// Creates a new network manager.
Expand Down Expand Up @@ -455,7 +459,7 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string
// GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo
// TODO unit tests need to be added, WorkItem: 26606939
// In stateless cni, container id is the endpoint id, so you can pass in either
func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) {
func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) {
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID)
if err != nil {
if endpointResponse.Response.ReturnCode == types.NotFound {
Expand All @@ -466,7 +470,7 @@ func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*En
}
return nil, ErrGetEndpointStateFailure
}
epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID)
epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID, netns)

for i := 0; i < len(epInfos); i++ {
if epInfos[i].NICType == cns.InfraNIC {
Expand Down Expand Up @@ -514,7 +518,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
nw := &network{
Id: networkID, // currently unused in stateless cni
HnsId: epInfo.HNSNetworkID,
Mode: opModeTransparentVlan,
Mode: opModeTransparent,
SnatBridgeIP: "",
NetNs: dummyGUID, // to trigger hns v2, windows
extIf: &externalInterface{
Expand All @@ -529,6 +533,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network)
HostIfName: epInfo.HostIfName,
LocalIP: "",
IPAddresses: epInfo.IPAddresses,
VlanID: 0,
AllowInboundFromHostToNC: false, // stateless currently does not support apipa
AllowInboundFromNCToHost: false,
Expand All @@ -537,11 +542,12 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint
NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false
NetNs: dummyGUID, // to trigger hnsv2, windows
NICType: epInfo.NICType,
NetworkNameSpace: epInfo.NetNsPath,
IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
}
logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId))

err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep)
err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep)
if err != nil {
return err
}
Expand All @@ -562,7 +568,7 @@ func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*Endpoi

if nm.IsStatelessCNIMode() {
logger.Info("calling cns getEndpoint API")
epInfos, err := nm.GetEndpointState(networkID, endpointID)
epInfos, err := nm.GetEndpointState(networkID, endpointID, "")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -745,6 +751,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string {
return containerID + "-" + ifName
}

// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
return containerID + "-" + ifName
}
// For InfraNIC, use GetEndpointID() logic.
return nm.GetEndpointID(containerID, ifName)
}

// saves the map of network ids to endpoints to the state file
func (nm *networkManager) SaveState(eps []*endpoint) error {
nm.Lock()
Expand Down Expand Up @@ -779,7 +795,7 @@ func (nm *networkManager) DeleteState(_ []*EndpointInfo) error {
}

// called to convert a cns restserver EndpointInfo into a network EndpointInfo
func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID string) []*EndpointInfo {
func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID, netns string) []*EndpointInfo {
ret := []*EndpointInfo{}

for ifName, ipInfo := range endpointInfo.IfnameToIPMap {
Expand Down Expand Up @@ -809,6 +825,10 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI
epInfo.NICType = ipInfo.NICType
epInfo.HNSNetworkID = ipInfo.HnsNetworkID
epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress)
// fill out the netns if it is empty via args passed by container runtime
if epInfo.NetNsPath == "" {
epInfo.NetNsPath = netns
}
ret = append(ret, epInfo)
}
return ret
Expand Down Expand Up @@ -847,3 +867,54 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {

return ifNametoIPInfoMap
}

func (nm *networkManager) GetEndpoint(networkID string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) {
if nm.IsStatelessCNIMode() {
logger.Info("calling cns getEndpoint API")
epInfos, err := nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
if err != nil {
switch {
case errors.Is(err, ErrConnectionFailure):
logger.Error("Failed to connect to CNS", zap.Error(err))
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
// we still have to remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
// This workaround mitigates the issue by generating a minimal endpointInfo via containerd args and netlink APIs that can be then passed to DeleteEndpoint API.
epInfos, err = nm.generateEndpointLocally(args)
if err != nil {
logger.Error("Failed to fetch secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err)
}
case errors.Is(err, ErrEndpointStateNotFound):
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
return nil, nil
default:
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
return nil, ErrEndpointRetrievalFailure
}
}
for _, epInfo := range epInfos {
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
}
return epInfos, nil
}
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
}

// generateEndpointLocally fetches the endpoint information using containerd args and netlink APIs
func (nm *networkManager) generateEndpointLocally(args *cniSkel.CmdArgs) ([]*EndpointInfo, error) {
ep := &endpoint{
Id: args.ContainerID,
NetworkNameSpace: args.Netns,
IfName: args.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
}
epInfo, err := nm.getEndpointInfoByIfNameImpl(ep)
if err != nil {
return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err)
}
return epInfo, nil
}
18 changes: 17 additions & 1 deletion network/manager_mock.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package network

import (
"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/common"
cniSkel "github.com/containernetworking/cni/pkg/skel"
)

// MockNetworkManager is a mock structure for Network Manager
Expand Down Expand Up @@ -94,6 +96,16 @@ func (nm *MockNetworkManager) GetEndpointID(containerID, ifName string) string {
return containerID + "-" + ifName
}

// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
func (nm *MockNetworkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
return containerID + "-" + ifName
}
// For InfraNIC, use GetEndpointID() logic.
return nm.GetEndpointID(containerID, ifName)
}

func (nm *MockNetworkManager) GetAllEndpoints(networkID string) (map[string]*EndpointInfo, error) {
return nm.TestEndpointInfoMap, nil
}
Expand Down Expand Up @@ -207,6 +219,10 @@ func (nm *MockNetworkManager) GetEndpointInfosFromContainerID(containerID string
return ret
}

func (nm *MockNetworkManager) GetEndpointState(_, _ string) ([]*EndpointInfo, error) {
func (nm *MockNetworkManager) GetEndpointState(_, _, _ string) ([]*EndpointInfo, error) {
return []*EndpointInfo{}, nil
}

func (nm *MockNetworkManager) GetEndpoint(_ string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) {
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
}
Loading
Loading