From 02023bab52f20799df59cabbfeebb46aff6f4f03 Mon Sep 17 00:00:00 2001 From: Nate Sales Date: Tue, 9 Jul 2024 17:39:15 -0400 Subject: [PATCH] Revert "feat: remove optimizer from main repo" This reverts commit 57768db1 --- Makefile | 11 +- cmd/optimizer.go | 40 ++++ cmd/optimizer_test.go | 44 +++++ docs/docs/cli.md | 1 + docs/docs/configuration.md | 114 ++++++++++++ pkg/config/config.go | 37 ++++ pkg/optimizer/optimizer.go | 227 +++++++++++++++++++++++ pkg/optimizer/optimizer_test.go | 24 +++ pkg/process/process.go | 4 + tests/alert-test.sh | 4 + tests/bird-matrix/build-bird-versions.sh | 4 +- tests/probe-simple.yml | 28 +++ 12 files changed, 535 insertions(+), 3 deletions(-) create mode 100644 cmd/optimizer.go create mode 100644 cmd/optimizer_test.go create mode 100644 pkg/optimizer/optimizer.go create mode 100644 pkg/optimizer/optimizer_test.go create mode 100755 tests/alert-test.sh create mode 100644 tests/probe-simple.yml diff --git a/Makefile b/Makefile index 741f9d75..5b4dbb2d 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,25 @@ dep: pip3 install flask +dummy-iface: + # Allow UDP ping. For more information, see https://github.com/go-ping/ping#linux + sudo sysctl -w net.ipv4.ping_group_range="0 2147483647" + sudo ip link add dev dummy0 type dummy + sudo ip addr add dev dummy0 192.0.2.1/24 + sudo ip addr add dev dummy0 2001:db8::1/64 + sudo ip link set dev dummy0 up + peeringdb-test-harness: nohup python3 tests/peeringdb/peeringdb-test-api.py & -test-setup: peeringdb-test-harness +test-setup: dummy-iface peeringdb-test-harness test: export PATHVECTOR_TEST=1 && go test -v -race -coverprofile=coverage.txt -covermode=atomic ./pkg/... ./cmd/... test-teardown: pkill -f tests/peeringdb/peeringdb-test-api.py + sudo ip link del dev dummy0 rm -f nohup.out test-sequence: test-setup test test-teardown diff --git a/cmd/optimizer.go b/cmd/optimizer.go new file mode 100644 index 00000000..0b0ce87c --- /dev/null +++ b/cmd/optimizer.go @@ -0,0 +1,40 @@ +package cmd + +import ( + "fmt" + + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + + "github.com/natesales/pathvector/pkg/optimizer" +) + +func init() { + rootCmd.AddCommand(optimizerCmd) +} + +var optimizerCmd = &cobra.Command{ + Use: "optimizer", + Short: "Start optimization daemon", + Run: func(cmd *cobra.Command, args []string) { + c, err := loadConfig() + if err != nil { + log.Fatal(err) + } + + log.Infof("Starting optimizer") + sourceMap := map[string][]string{} // peer name to list of source addresses + for peerName, peerData := range c.Peers { + if peerData.OptimizerProbeSources != nil && len(*peerData.OptimizerProbeSources) > 0 { + sourceMap[fmt.Sprintf("%d%s%s", *peerData.ASN, optimizer.Delimiter, peerName)] = *peerData.OptimizerProbeSources + } + } + log.Debugf("Optimizer probe sources: %v", sourceMap) + if len(sourceMap) == 0 { + log.Fatal("No peers have optimization enabled, exiting now") + } + if err := optimizer.StartProbe(c.Optimizer, sourceMap, c, noConfigure, dryRun); err != nil { + log.Fatal(err) + } + }, +} diff --git a/cmd/optimizer_test.go b/cmd/optimizer_test.go new file mode 100644 index 00000000..8584d580 --- /dev/null +++ b/cmd/optimizer_test.go @@ -0,0 +1,44 @@ +package cmd + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOptimizer(t *testing.T) { + args := []string{ + "--verbose", + } + files, err := filepath.Glob("../tests/probe-*.yml") + assert.Nil(t, err) + assert.GreaterOrEqual(t, 1, len(files)) + + for _, testFile := range files { + // Run pathvector to generate config first, so there is a config to modify + rootCmd.SetArgs(append(args, []string{ + "generate", + "--config", testFile, + }...)) + t.Logf("Running pre-optimizer generate: %v", args) + assert.Nil(t, rootCmd.Execute()) + + args = append(args, []string{ + "optimizer", + "--config", testFile, + }...) + t.Logf("running probe integration with args %v", args) + rootCmd.SetArgs(args) + assert.Nil(t, rootCmd.Execute()) + + // Check if local pref is lowered + checkFile, err := os.ReadFile("test-cache/AS65510_EXAMPLE.conf") + assert.Nil(t, err) + if !strings.Contains(string(checkFile), "bgp_local_pref = 80; # pathvector:localpref") { + t.Errorf("expected bgp_local_pref = 80 but not found in file") + } + } +} diff --git a/docs/docs/cli.md b/docs/docs/cli.md index 7a1e58a0..96116793 100644 --- a/docs/docs/cli.md +++ b/docs/docs/cli.md @@ -20,6 +20,7 @@ Available Commands: match Find common IXPs for a given ASN reload Reload a session restart Restart a session + optimizer Start optimization daemon status Show protocol status version Show version information diff --git a/docs/docs/configuration.md b/docs/docs/configuration.md index 392dcd01..bbe2c751 100644 --- a/docs/docs/configuration.md +++ b/docs/docs/configuration.md @@ -405,6 +405,14 @@ Kernel routing configuration options |------|---------|------------| | [Kernel](#kernel-1) | | | +### `optimizer` + +Route optimizer options + +| Type | Default | Validation | +|------|---------|------------| +| [Optimizer](#optimizer-1) | | | + ### `plugins` Plugin-specific configuration @@ -572,6 +580,96 @@ Routing table to read from | string | | | +## Optimizer +### `targets` + +List of probe targets + +| Type | Default | Validation | +|------|---------|------------| +| []string | | | + +### `latency-threshold` + +Maximum allowable latency in milliseconds + +| Type | Default | Validation | +|------|---------|------------| +| uint | 100 | | + +### `packet-loss-threshold` + +Maximum allowable packet loss (percent) + +| Type | Default | Validation | +|------|---------|------------| +| float64 | 0.5 | | + +### `modifier` + +Amount to lower local pref by for depreferred peers + +| Type | Default | Validation | +|------|---------|------------| +| uint | 20 | | + +### `probe-count` + +Number of pings to send in each run + +| Type | Default | Validation | +|------|---------|------------| +| int | 5 | | + +### `probe-timeout` + +Number of seconds to wait before considering the ICMP message unanswered + +| Type | Default | Validation | +|------|---------|------------| +| int | 1 | | + +### `probe-interval` + +Number of seconds wait between each optimizer run + +| Type | Default | Validation | +|------|---------|------------| +| int | 120 | | + +### `cache-size` + +Number of probe results to store per peer + +| Type | Default | Validation | +|------|---------|------------| +| int | 15 | | + +### `probe-udp` + +Use UDP probe (else ICMP) + +| Type | Default | Validation | +|------|---------|------------| +| bool | false | | + +### `alert-script` + +Script to call on optimizer event + +| Type | Default | Validation | +|------|---------|------------| +| string | | | + +### `exit-on-cache-full` + +Exit optimizer on cache full + +| Type | Default | Validation | +|------|---------|------------| +| bool | false | | + + ## Peer ### `template` @@ -1373,6 +1471,22 @@ Configuration to add after the export policy before the final accept/reject term |------|---------|------------| | string | | | +### `probe-sources` + +Optimizer probe source addresses + +| Type | Default | Validation | +|------|---------|------------| +| []string | | | + +### `optimize-inbound` + +Should the optimizer modify inbound policy? + +| Type | Default | Validation | +|------|---------|------------| +| bool | false | | + ## VRRPInstance ### `state` diff --git a/pkg/config/config.go b/pkg/config/config.go index bddfa721..bcbab16c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,5 +1,9 @@ package config +import ( + "github.com/go-ping/ping" +) + var defaultTransitASNs = []uint32{ 174, // Cogent // 209, // Qwest (HE carries this on IXPs IPv6 (Jul 12 2018)) @@ -219,6 +223,10 @@ type Peer struct { PreExport *string `yaml:"pre-export" description:"Configuration to add before the export policy" default:"-"` PreExportFinal *string `yaml:"pre-export-final" description:"Configuration to add after the export policy before the final accept/reject term" default:"-"` + // Optimizer + OptimizerProbeSources *[]string `yaml:"probe-sources" description:"Optimizer probe source addresses" default:"-"` + OptimizeInbound *bool `yaml:"optimize-inbound" description:"Should the optimizer modify inbound policy?" default:"false"` + ProtocolName *string `yaml:"-" description:"-" default:"-"` UserSpecifiedName *string `yaml:"-" description:"-" default:"-"` Protocols *[]string `yaml:"-" description:"-" default:"-"` @@ -287,6 +295,33 @@ type Kernel struct { KStatics6 map[string]string `yaml:"-" description:"-"` } +// ProbeResult stores a single probe result +type ProbeResult struct { + Time int64 + Stats ping.Statistics +} + +// Optimizer stores route optimizer configuration +type Optimizer struct { + Targets []string `yaml:"targets" description:"List of probe targets"` + LatencyThreshold uint `yaml:"latency-threshold" description:"Maximum allowable latency in milliseconds" default:"100"` + PacketLossThreshold float64 `yaml:"packet-loss-threshold" description:"Maximum allowable packet loss (percent)" default:"0.5"` + LocalPrefModifier uint `yaml:"modifier" description:"Amount to lower local pref by for depreferred peers" default:"20"` + + PingCount int `yaml:"probe-count" description:"Number of pings to send in each run" default:"5"` + PingTimeout int `yaml:"probe-timeout" description:"Number of seconds to wait before considering the ICMP message unanswered" default:"1"` + Interval int `yaml:"probe-interval" description:"Number of seconds wait between each optimizer run" default:"120"` + CacheSize int `yaml:"cache-size" description:"Number of probe results to store per peer" default:"15"` + + ProbeUDPMode bool `yaml:"probe-udp" description:"Use UDP probe (else ICMP)" default:"false"` + + AlertScript string `yaml:"alert-script" description:"Script to call on optimizer event"` + + ExitOnCacheFull bool `yaml:"exit-on-cache-full" description:"Exit optimizer on cache full" default:"false"` + + Db map[string][]ProbeResult `yaml:"-" description:"-"` +} + // Config stores the global configuration type Config struct { PeeringDBQueryTimeout uint `yaml:"peeringdb-query-timeout" description:"PeeringDB query timeout in seconds" default:"10"` @@ -351,6 +386,7 @@ type Config struct { BFDInstances map[string]*BFDInstance `yaml:"bfd" description:"BFD instances"` MRTInstances map[string]*MRTInstance `yaml:"mrt" description:"MRT instances"` Kernel *Kernel `yaml:"kernel" description:"Kernel routing configuration options"` + Optimizer *Optimizer `yaml:"optimizer" description:"Route optimizer options"` Plugins map[string]string `yaml:"plugins" description:"Plugin-specific configuration"` RTRServerHost string `yaml:"-" description:"-"` @@ -377,6 +413,7 @@ func (c *Config) Init() { c.BFDInstances = map[string]*BFDInstance{} c.MRTInstances = map[string]*MRTInstance{} c.Kernel = &Kernel{} + c.Optimizer = &Optimizer{} c.Plugins = map[string]string{} if c.TransitASNs == nil { diff --git a/pkg/optimizer/optimizer.go b/pkg/optimizer/optimizer.go new file mode 100644 index 00000000..bc9c152a --- /dev/null +++ b/pkg/optimizer/optimizer.go @@ -0,0 +1,227 @@ +package optimizer + +import ( + "fmt" + "net" + "os" + "os/exec" + "path" + "regexp" + "strings" + "time" + + "github.com/go-ping/ping" + log "github.com/sirupsen/logrus" + + "github.com/natesales/pathvector/pkg/bird" + "github.com/natesales/pathvector/pkg/config" + "github.com/natesales/pathvector/pkg/util" +) + +// Delimiter is an arbitrary delimiter used to split ASN from peerName +const Delimiter = "####" + +type peerAvg struct { + Latency time.Duration + PacketLoss float64 +} + +// parsePeerDelimiter parses a ASN/name string and returns the ASN and name +func parsePeerDelimiter(i string) (string, string) { + parts := strings.Split(i, Delimiter) + return parts[0], parts[1] +} + +// sameAddressFamily returns if two strings (IP addresses) are of the same address family +func sameAddressFamily(a string, b string) bool { + a4 := net.ParseIP(a).To4() != nil // Is address A IPv4? + b4 := net.ParseIP(b).To4() != nil // Is address B IPv4? + // Are (both A and B IPv4) or (both A and B not IPv4) + return (a4 && b4) || (!a4 && !b4) +} + +// sendPing sends a probe ping to a specified target +func sendPing(source string, target string, count int, timeout int, udp bool) (*ping.Statistics, error) { + pinger, err := ping.NewPinger(target) + if err != nil { + return &ping.Statistics{}, err + } + + // Set pinger options + pinger.Count = count + pinger.Timeout = time.Duration(timeout) * time.Second + pinger.Source = source + pinger.SetPrivileged(!udp) + + // Run the ping + if err = pinger.Run(); err != nil { + return &ping.Statistics{}, fmt.Errorf("ping: %s", err) + } + + return pinger.Statistics(), nil // nil error +} + +// StartProbe starts the probe scheduler to send probes to all configured targets and logs the results +func StartProbe(o *config.Optimizer, sourceMap map[string][]string, global *config.Config, noConfigure bool, dryRun bool) error { + // Initialize Db map + if o.Db == nil { + o.Db = map[string][]config.ProbeResult{} // peerName to list of probe results + } + + for { + // Loop over every source/target pair + for peerName, sources := range sourceMap { + for _, source := range sources { + for _, target := range o.Targets { + if sameAddressFamily(source, target) { + log.Debugf("[Optimizer] Sending %d ICMP probes src %s dst %s", o.PingCount, source, target) + stats, err := sendPing(source, target, o.PingCount, o.PingTimeout, o.ProbeUDPMode) + if err != nil { + return err + } + + // Check for nil Db entries + if o.Db[peerName] == nil { + o.Db[peerName] = []config.ProbeResult{} + } + + result := config.ProbeResult{ + Time: time.Now().UnixNano(), + Stats: *stats, + } + + log.Debugf("[Optimizer] cache usage: %d/%d", len(o.Db[peerName]), o.CacheSize) + + if len(o.Db[peerName]) < o.CacheSize { + // If the array is not full to CacheSize, append the result + o.Db[peerName] = append(o.Db[peerName], result) + } else { + // If the array is full to probeCacheSize... + if o.ExitOnCacheFull { + return nil + } + // Chop off the first element and append the result + o.Db[peerName] = append(o.Db[peerName][1:], result) + } + } + } + } + } + + // Compute averages + computeMetrics(o, global, noConfigure, dryRun) + + // Sleep before sending the next probe + waitInterval := time.Duration(o.Interval) * time.Second + log.Debugf("[Optimizer] Waiting %s until next probe run", waitInterval) + time.Sleep(waitInterval) + } +} + +// computeMetrics calculates average latency and packet loss +func computeMetrics(o *config.Optimizer, global *config.Config, noConfigure bool, dryRun bool) { + p := map[string]*peerAvg{} + for peer := range o.Db { + if p[peer] == nil { + p[peer] = &peerAvg{Latency: 0, PacketLoss: 0} + } + for result := range o.Db[peer] { + p[peer].PacketLoss += o.Db[peer][result].Stats.PacketLoss + p[peer].Latency += o.Db[peer][result].Stats.AvgRtt + } + + // Calculate average latency and packet loss + totalProbes := float64(len(o.Db[peer])) + p[peer].PacketLoss = p[peer].PacketLoss / totalProbes + p[peer].Latency = p[peer].Latency / time.Duration(totalProbes) + + // Check thresholds to apply optimizations + var alerts []string + peerASN, peerName := parsePeerDelimiter(peer) + if p[peer].PacketLoss >= o.PacketLossThreshold { + alerts = append( + alerts, + fmt.Sprintf("Peer AS%s %s met or exceeded maximum allowable packet loss: %.1f >= %.1f", + peerASN, peerName, p[peer].PacketLoss, o.PacketLossThreshold, + ), + ) + } + if p[peer].Latency >= time.Duration(o.LatencyThreshold)*time.Millisecond { + alerts = append( + alerts, + fmt.Sprintf("Peer AS%s %s met or exceeded maximum allowable latency: %v >= %v", + peerASN, peerName, p[peer].Latency, o.LatencyThreshold, + ), + ) + } + + // If there is at least one alert, + if len(alerts) > 0 { + for _, alert := range alerts { + log.Debugf("[Optimizer] %s", alert) + if o.AlertScript != "" { + //nolint:golint,gosec + birdCmd := exec.Command(o.AlertScript, alert) + birdCmd.Stdout = os.Stdout + birdCmd.Stderr = os.Stderr + if err := birdCmd.Run(); err != nil { + log.Warnf("[Optimizer] alert script: %v", err) + } + } + } + modifyPref(peer, + global.Peers, + o.LocalPrefModifier, + global.CacheDirectory, + global.BIRDDirectory, + global.BIRDSocket, + global.BIRDBinary, + noConfigure, + dryRun, + ) + } + } +} + +func modifyPref( + peerPair string, + peers map[string]*config.Peer, + localPrefModifier uint, + cacheDirectory string, + birdDirectory string, + birdSocket string, + birdBinary string, + noConfigure bool, + dryRun bool, +) { + peerASN, peerName := parsePeerDelimiter(peerPair) + fileName := path.Join(birdDirectory, fmt.Sprintf("AS%s_%s.conf", peerASN, *util.Sanitize(peerName))) + peerFile, err := os.ReadFile(fileName) + if err != nil { + log.Fatalf("reading peer file: %s", err) + } + + peerData := peers[peerName] + if *peerData.OptimizeInbound { + // Calculate new local pref + currentLocalPref := *peerData.LocalPref + newLocalPref := uint(currentLocalPref) - localPrefModifier + + lpRegex := regexp.MustCompile(`bgp_local_pref = .*; # pathvector:localpref`) + modified := lpRegex.ReplaceAllString(string(peerFile), fmt.Sprintf("bgp_local_pref = %d; # pathvector:localpref", newLocalPref)) + + //nolint:golint,gosec + if err := os.WriteFile(fileName, []byte(modified), 0644); err != nil { + log.Fatal(err) + } else { + log.Printf("[Optimizer] Lowered AS%s %s local-pref from %d to %d", peerASN, peerName, currentLocalPref, newLocalPref) + } + } + + // Run BIRD config validation + bird.Validate(birdBinary, birdDirectory) + + if !dryRun { + bird.MoveCacheAndReconfigure(birdDirectory, cacheDirectory, birdSocket, noConfigure) + } +} diff --git a/pkg/optimizer/optimizer_test.go b/pkg/optimizer/optimizer_test.go new file mode 100644 index 00000000..dc3bbf60 --- /dev/null +++ b/pkg/optimizer/optimizer_test.go @@ -0,0 +1,24 @@ +package optimizer + +import ( + "testing" +) + +func TestOptimizerSameAddressFamily(t *testing.T) { + testCases := []struct { + a string + b string + same bool + }{ + {"192.0.2.1", "192.0.2.1", true}, + {"192.0.2.1", "2001:db8::1", false}, + {"2001:db8::1", "2001:db8::1", true}, + {"2001:db8::1", "192.0.2.1", false}, + } + for _, tc := range testCases { + out := sameAddressFamily(tc.a, tc.b) + if out != tc.same { + t.Errorf("a %s b %s expected same %v got %v", tc.a, tc.b, tc.same, out) + } + } +} diff --git a/pkg/process/process.go b/pkg/process/process.go index 2e3e20ab..747ce61a 100644 --- a/pkg/process/process.go +++ b/pkg/process/process.go @@ -284,6 +284,10 @@ func Load(configBlob []byte) (*config.Config, error) { peerData.PreExportFinal = util.Ptr(templateReplacements(*peerData.PreExportFinal, peerData)) } + if peerData.DefaultLocalPref != nil && util.Deref(peerData.OptimizeInbound) { + log.Fatalf("Both DefaultLocalPref and OptimizeInbound set, Pathvector cannot optimize this peer.") + } + if peerData.OnlyAnnounce != nil && util.Deref(peerData.AnnounceAll) { log.Fatalf("[%s] only-announce and announce-all cannot both be true", peerName) } diff --git a/tests/alert-test.sh b/tests/alert-test.sh new file mode 100755 index 00000000..b0bc1aff --- /dev/null +++ b/tests/alert-test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Pathvector optimizer alert script + +echo "Optimization Alert: $1" diff --git a/tests/bird-matrix/build-bird-versions.sh b/tests/bird-matrix/build-bird-versions.sh index 62e52b0b..6acf45dc 100755 --- a/tests/bird-matrix/build-bird-versions.sh +++ b/tests/bird-matrix/build-bird-versions.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Build the last 3 bird versions +# Build the last 5 bird versions if [ ! -d bird ]; then git clone https://gitlab.nic.cz/labs/bird.git @@ -7,7 +7,7 @@ fi cd bird || exit 1 -for tag in $(git tag | grep "^v2.0." | sort -V | tail -n 3); do +for tag in $(git tag | grep "^v2.0." | sort -V | tail -n 5); do echo "Building $tag" git reset --hard HEAD git checkout "$tag" diff --git a/tests/probe-simple.yml b/tests/probe-simple.yml new file mode 100644 index 00000000..beb148e5 --- /dev/null +++ b/tests/probe-simple.yml @@ -0,0 +1,28 @@ +asn: 65530 +router-id: 192.0.2.1 +source4: 192.0.2.1 +source6: 2001:db8::1 +prefixes: + - 192.0.2.0/24 + - 2001:db8::/48 +cache-directory: test-cache +peeringdb-url: http://localhost:5000/api + +optimizer: + probe-udp: true + exit-on-cache-full: true + probe-interval: 1 + cache-size: 3 + targets: + - 192.0.2.2 + - 2001:db8::2 + alert-script: ../tests/alert-test.sh + +peers: + Example: + asn: 65510 + neighbors: + - 203.0.113.12 + - 2001:db8::12 + optimize-inbound: true + probe-sources: [ "192.0.2.1", "2001:db8::1" ]