Skip to content

Commit

Permalink
fix: close telemetry handler before crashing
Browse files Browse the repository at this point in the history
Signed-off-by: Hunter Gregory <42728408+huntergregory@users.noreply.github.com>
  • Loading branch information
huntergregory committed Jan 8, 2025
1 parent 4dac095 commit 1249e1d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 0 deletions.
3 changes: 3 additions & 0 deletions npm/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
nodeIP, err = util.NodeIP()
if err != nil {
metrics.SendErrorLogAndMetric(util.NpmID, "error: failed to get node IP while booting up: %v", err)
metrics.Close()
return fmt.Errorf("failed to get node IP while booting up: %w", err)
}
klog.Infof("node IP is %s", nodeIP)
Expand All @@ -197,6 +198,7 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
dp, err = dataplane.NewDataPlane(models.GetNodeName(), common.NewIOShim(), npmV2DataplaneCfg, stopChannel)
if err != nil {
metrics.SendErrorLogAndMetric(util.NpmID, "error: failed to create dataplane with error %v", err)
metrics.Close()
return fmt.Errorf("failed to create dataplane with error %w", err)
}
dp.RunPeriodicTasks()
Expand All @@ -210,6 +212,7 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
metrics.SendLog(util.NpmID, "starting NPM", metrics.PrintLog)
if err = npMgr.Start(config, stopChannel); err != nil {
metrics.SendErrorLogAndMetric(util.NpmID, "Failed to start NPM due to %+v", err)
metrics.Close()
return fmt.Errorf("failed to start with err: %w", err)
}

Expand Down
11 changes: 11 additions & 0 deletions npm/metrics/ai-utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"k8s.io/klog"
)

const telemetryCloseWaitTimeSeconds = 10

var (
th aitelemetry.TelemetryHandle
npmVersion int
Expand Down Expand Up @@ -54,6 +56,15 @@ func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata string) e
return nil
}

// Close cleans up the telemetry handle, which effectively waits for all telemetry data to be sent
func Close() {
if th == nil {
return
}

th.Close(telemetryCloseWaitTimeSeconds)
}

// SendErrorLogAndMetric sends a metric through AI telemetry and sends a log to the Kusto Messages table
func SendErrorLogAndMetric(operationID int, format string, args ...interface{}) {
// Send error metrics
Expand Down
1 change: 1 addition & 0 deletions npm/pkg/dataplane/ipsets/ipsetmanager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ func (iMgr *IPSetManager) applyIPSets() error {
msg := fmt.Sprintf("exceeded max consecutive failures (%d) when applying ipsets. final error: %s", maxConsecutiveFailures, restoreError.Error())
klog.Error(msg)
metrics.SendErrorLogAndMetric(util.IpsmID, msg)
metrics.Close()
panic(msg)
}

Expand Down

0 comments on commit 1249e1d

Please sign in to comment.