From 580924e63b5a93835fa1f513191268eac436d3e1 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Mon, 29 Oct 2018 21:44:43 +0200 Subject: [PATCH] Record canary duration and total - add Prometheus metrics canary_duration_seconds and canary_total --- pkg/controller/recorder.go | 46 ++++++++++++++++++++++++++++++------- pkg/controller/scheduler.go | 34 +++++++++++++++++++-------- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/pkg/controller/recorder.go b/pkg/controller/recorder.go index cf967e122..e293989f2 100644 --- a/pkg/controller/recorder.go +++ b/pkg/controller/recorder.go @@ -2,6 +2,7 @@ package controller import ( "fmt" + "time" "github.com/prometheus/client_golang/prometheus" flaggerv1 "github.com/stefanprodan/flagger/pkg/apis/flagger/v1alpha1" @@ -9,12 +10,27 @@ import ( // CanaryRecorder records the canary analysis as Prometheus metrics type CanaryRecorder struct { - status *prometheus.GaugeVec - weight *prometheus.GaugeVec + duration *prometheus.HistogramVec + total *prometheus.GaugeVec + status *prometheus.GaugeVec + weight *prometheus.GaugeVec } // NewCanaryRecorder creates a new recorder and registers the Prometheus metrics func NewCanaryRecorder(register bool) CanaryRecorder { + duration := prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Subsystem: controllerAgentName, + Name: "canary_duration_seconds", + Help: "Seconds spent performing canary analysis.", + Buckets: prometheus.DefBuckets, + }, []string{"name", "namespace"}) + + total := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: controllerAgentName, + Name: "canary_total", + Help: "Total number of canary object", + }, []string{"namespace"}) + // 0 - running, 1 - successful, 2 - failed status := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: controllerAgentName, @@ -29,18 +45,32 @@ func NewCanaryRecorder(register bool) CanaryRecorder { }, []string{"workload", "namespace"}) if register { + prometheus.MustRegister(duration) + prometheus.MustRegister(total) prometheus.MustRegister(status) prometheus.MustRegister(weight) } return CanaryRecorder{ - status: status, - weight: weight, + duration: duration, + total: total, + status: status, + weight: weight, } } -// RecordStatus sets the last known canary analysis status -func (cr *CanaryRecorder) RecordStatus(cd *flaggerv1.Canary) { +// SetDuration sets the time spent in seconds performing canary analysis +func (cr *CanaryRecorder) SetDuration(cd *flaggerv1.Canary, duration time.Duration) { + cr.duration.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Observe(duration.Seconds()) +} + +// SetTotal sets the total number of canaries per namespace +func (cr *CanaryRecorder) SetTotal(namespace string, total int) { + cr.total.WithLabelValues(namespace).Set(float64(total)) +} + +// SetStatus sets the last known canary analysis status +func (cr *CanaryRecorder) SetStatus(cd *flaggerv1.Canary) { status := 1 switch cd.Status.State { case "running": @@ -53,8 +83,8 @@ func (cr *CanaryRecorder) RecordStatus(cd *flaggerv1.Canary) { cr.status.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Set(float64(status)) } -// RecordWeight sets the weight values for primary and canary destinations -func (cr *CanaryRecorder) RecordWeight(cd *flaggerv1.Canary, primary int, canary int) { +// SetWeight sets the weight values for primary and canary destinations +func (cr *CanaryRecorder) SetWeight(cd *flaggerv1.Canary, primary int, canary int) { cr.weight.WithLabelValues(fmt.Sprintf("%s-primary", cd.Spec.TargetRef.Name), cd.Namespace).Set(float64(primary)) cr.weight.WithLabelValues(cd.Spec.TargetRef.Name, cd.Namespace).Set(float64(canary)) } diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 07e8d7886..c0998257b 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -9,16 +9,28 @@ import ( ) func (c *Controller) scheduleCanaries() { + stats := make(map[string]int) c.canaries.Range(func(key interface{}, value interface{}) bool { r := value.(*flaggerv1.Canary) if r.Spec.TargetRef.Kind == "Deployment" { go c.advanceCanary(r.Name, r.Namespace) } + + t, ok := stats[r.Namespace] + if !ok { + stats[r.Namespace] = 1 + } else { + stats[r.Namespace] = t + 1 + } return true }) + for k, v := range stats { + c.recorder.SetTotal(k, v) + } } func (c *Controller) advanceCanary(name string, namespace string) { + begin := time.Now() // check if the canary exists cd, err := c.flaggerClient.FlaggerV1alpha1().Canaries(namespace).Get(name, v1.GetOptions{}) if err != nil { @@ -58,13 +70,17 @@ func (c *Controller) advanceCanary(name string, namespace string) { return } - c.recorder.RecordWeight(cd, primaryRoute.Weight, canaryRoute.Weight) + c.recorder.SetWeight(cd, primaryRoute.Weight, canaryRoute.Weight) // check if canary analysis should start (canary revision has changes) or continue if ok := c.checkCanaryStatus(cd, c.deployer); !ok { return } + defer func() { + c.recorder.SetDuration(cd, time.Since(begin)) + }() + // check if the number of failed checks reached the threshold if cd.Status.State == "running" && cd.Status.FailedChecks >= cd.Spec.CanaryAnalysis.Threshold { c.recordEventWarningf(cd, "Rolling back %s.%s failed checks threshold reached %v", @@ -78,7 +94,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { return } - c.recorder.RecordWeight(cd, primaryRoute.Weight, canaryRoute.Weight) + c.recorder.SetWeight(cd, primaryRoute.Weight, canaryRoute.Weight) c.recordEventWarningf(cd, "Canary failed! Scaling down %s.%s", cd.Spec.TargetRef.Name, cd.Namespace) @@ -93,7 +109,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { c.logger.Errorf("%v", err) return } - c.recorder.RecordStatus(cd) + c.recorder.SetStatus(cd) return } @@ -127,7 +143,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { return } - c.recorder.RecordWeight(cd, primaryRoute.Weight, canaryRoute.Weight) + c.recorder.SetWeight(cd, primaryRoute.Weight, canaryRoute.Weight) c.recordEventInfof(cd, "Advance %s.%s canary weight %v", cd.Name, cd.Namespace, canaryRoute.Weight) // promote canary @@ -149,7 +165,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { return } - c.recorder.RecordWeight(cd, primaryRoute.Weight, canaryRoute.Weight) + c.recorder.SetWeight(cd, primaryRoute.Weight, canaryRoute.Weight) c.recordEventInfof(cd, "Promotion completed! Scaling down %s.%s", cd.Spec.TargetRef.Name, cd.Namespace) // shutdown canary @@ -163,13 +179,13 @@ func (c *Controller) advanceCanary(name string, namespace string) { c.recordEventWarningf(cd, "%v", err) return } - c.recorder.RecordStatus(cd) + c.recorder.SetStatus(cd) } } func (c *Controller) checkCanaryStatus(cd *flaggerv1.Canary, deployer CanaryDeployer) bool { + c.recorder.SetStatus(cd) if cd.Status.State == "running" { - c.recorder.RecordStatus(cd) return true } @@ -178,7 +194,7 @@ func (c *Controller) checkCanaryStatus(cd *flaggerv1.Canary, deployer CanaryDepl c.logger.Errorf("%v", err) return false } - c.recorder.RecordStatus(cd) + c.recorder.SetStatus(cd) c.recordEventInfof(cd, "Initialization done! %s.%s", cd.Name, cd.Namespace) return false } @@ -193,7 +209,7 @@ func (c *Controller) checkCanaryStatus(cd *flaggerv1.Canary, deployer CanaryDepl c.logger.Errorf("%v", err) return false } - c.recorder.RecordStatus(cd) + c.recorder.SetStatus(cd) return false } return false