Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cert-v2] tun_linux.go: retry setting default MTU #1268

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 22 additions & 19 deletions overlay/tun_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"strings"
"sync/atomic"
"time"
"unsafe"

"github.com/gaissmai/bart"
Expand Down Expand Up @@ -367,12 +368,6 @@ func (t *tun) Activate() error {
return fmt.Errorf("failed to bring the tun device up: %s", err)
}

// Run the interface
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
return fmt.Errorf("failed to run tun device: %s", err)
}

//set route MTU
for i := range t.vpnNetworks {
if err = t.setDefaultRoute(t.vpnNetworks[i]); err != nil {
Expand All @@ -385,7 +380,11 @@ func (t *tun) Activate() error {
return err
}

//todo do we want to keep the link-local address?
// Run the interface
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
return fmt.Errorf("failed to run tun device: %s", err)
}

return nil
}
Expand All @@ -400,8 +399,6 @@ func (t *tun) setMTU() {
}

func (t *tun) setDefaultRoute(cidr netip.Prefix) error {
// Default route

dr := &net.IPNet{
IP: cidr.Masked().Addr().AsSlice(),
Mask: net.CIDRMask(cidr.Bits(), cidr.Addr().BitLen()),
Expand All @@ -420,7 +417,20 @@ func (t *tun) setDefaultRoute(cidr netip.Prefix) error {
}
err := netlink.RouteReplace(&nr)
if err != nil {
return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
t.l.WithError(err).WithField("cidr", cidr).Warn("Failed to set default route MTU, retrying")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ordering of Activate changed a good deal, have we tried moving it back to the order it was in to see if that helps this situation?

nebula/overlay/tun_linux.go

Lines 328 to 352 in e264a0f

ifrf.Flags = ifrf.Flags | unix.IFF_UP
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
return fmt.Errorf("failed to bring the tun device up: %s", err)
}
link, err := netlink.LinkByName(t.Device)
if err != nil {
return fmt.Errorf("failed to get tun device link: %s", err)
}
t.deviceIndex = link.Attrs().Index
if err = t.setDefaultRoute(); err != nil {
return err
}
// Set the routes
if err = t.addRoutes(false); err != nil {
return err
}
// Run the interface
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
return fmt.Errorf("failed to run tun device: %s", err)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fiddled with the ordering and it didn't seem to make a difference.

I think the problem we're seeing here is somehow caused by having more than one IP -- especially an IPv6 IP. When I test with single-IP certs I'm completely unable to reproduce the problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh also, when this fails it always fails when adding the second route. The first route always works.

//retry twice more -- on some systems there appears to be a race condition where if we set routes too soon, netlink says `invalid argument`
for i := 0; i < 2; i++ {
time.Sleep(100 * time.Millisecond)
err = netlink.RouteReplace(&nr)
if err == nil {
break
} else {
t.l.WithError(err).WithField("cidr", cidr).WithField("mtu", t.DefaultMTU).Warn("Failed to set default route MTU, retrying")
}
}
if err != nil {
return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
}
}

return nil
Expand Down Expand Up @@ -547,7 +557,6 @@ func (t *tun) updateRoutes(r netlink.RouteUpdate) {
return
}

//TODO: IPV6-WORK what if not ok?
gwAddr, ok := netip.AddrFromSlice(r.Gw)
if !ok {
t.l.WithField("route", r).Debug("Ignoring route update, invalid gateway address")
Expand All @@ -568,12 +577,6 @@ func (t *tun) updateRoutes(r netlink.RouteUpdate) {
return
}

if x := r.Dst.IP.To4(); x == nil {
// Nebula only handles ipv4 on the overlay currently
t.l.WithField("route", r).Debug("Ignoring route update, destination is not ipv4")
return
}

dstAddr, ok := netip.AddrFromSlice(r.Dst.IP)
if !ok {
t.l.WithField("route", r).Debug("Ignoring route update, invalid destination address")
Expand Down Expand Up @@ -602,11 +605,11 @@ func (t *tun) Close() error {
}

if t.ReadWriteCloser != nil {
t.ReadWriteCloser.Close()
_ = t.ReadWriteCloser.Close()
}

if t.ioctlFd > 0 {
os.NewFile(t.ioctlFd, "ioctlFd").Close()
_ = os.NewFile(t.ioctlFd, "ioctlFd").Close()
}

return nil
Expand Down
Loading