Skip to content

Commit

Permalink
Change the metrics to atomic values and add additional metrics gather…
Browse files Browse the repository at this point in the history
…ing to migrations.

Signed-off-by: Bella Khizgiyaev <bkhizgiy@redhat.com>
  • Loading branch information
bkhizgiy authored and ahadas committed Jun 27, 2024
1 parent 3f107bd commit 0887c14
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 106 deletions.
106 changes: 86 additions & 20 deletions pkg/monitoring/metrics/forklift-controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,112 @@ import (
)

const (
Succeeded = "succeeded"
Failed = "failed"
Executing = "executing"
Running = "running"
Pending = "pending"
Canceled = "canceled"
Blocked = "blocked"
Ready = "ready"
Warm = "warm"
Cold = "cold"
Local = "local"
Remote = "remote"
Succeeded = "Succeeded"
Failed = "Failed"
Executing = "Executing"
Running = "Running"
Pending = "Pending"
Canceled = "Canceled"
Blocked = "Blocked"
Ready = "Ready"
Deleted = "Deleted"
Warm = "Warm"
Cold = "Cold"
Local = "Local"
Remote = "Remote"
)

var (
// 'status' - [ succeeded, failed ]
// 'status' - [ succeeded, failed, Executing, Canceled]
// 'provider' - [oVirt, vSphere, Openstack, OVA, openshift]
migrationGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "mtv_workload_migrations",
Help: "VM Migrations sorted by status and provider type",
// 'mode' - [Cold, Warm]
// 'target' - [Local, Remote]
migratioStatusCounter = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "mtv_migrations_status",
Help: "VM Migrations sorted by status status, provider, mode and destination",
},
[]string{
"status",
"provider",
"mode",
"target",
},
)

// 'status' - [ succeeded, failed, Executing, Running, Pending, Canceled, Blocked, Deleted]
// 'provider' - [oVirt, vSphere, Openstack, OVA, openshift]
// 'mode' - [Cold, Warm]
// 'target' - [Local, Remote]
planStatusCounter = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "mtv_plans_status",
Help: "VM migration Plans sorted by status, provider, mode and destination",
},
[]string{
"status",
"provider",
"mode",
"target",
},
)

// 'status' - [ succeeded, failed, Executing, Canceled]
// 'provider' - [oVirt, vSphere, Openstack, OVA, openshift]
// 'mode' - [Cold, Warm]
// 'target' - [Local, Remote]
// 'plan' - [Id]
migrationDurationGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "mtv_migration_duration_in_seconds",
Help: "Duration of VM migrations in seconds",
},
[]string{"provider", "mode", "target", "plan"},
)

// 'provider' - [oVirt, vSphere, Openstack, OVA, openshift]
// 'mode' - [Cold, Warm]
// 'target' - [Local, Remote]
// 'plan' - [Id]
dataTransferredGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "mtv_data_transferred_in_bytes",
Help: "Total data transferred during VM migrations in bytes",
},
[]string{
"provider",
"mode",
"target",
"plan",
},
)

// 'status' - [ succeeded, failed, Executing, Running, Pending, Canceled, Blocked]
// 'status' - [ succeeded, failed, Executing, Canceled]
// 'provider' - [oVirt, vSphere, Openstack, OVA, openshift]
// 'mode' - [Cold, Warm]
// 'target' - [Local, Remote]
planStatusCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "mtv_workload_plans_status_total",
Help: "VM migration Plans sorted by status and provider type",
// 'plan' - [Id]
migratioPlanCorolationStatusnCounter = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "mtv_workload_migrations_status_plan_correlation",
Help: "VM Migrations by status, provider type and plan",
},
[]string{
"status",
"provider",
"mode",
"target",
"plan",
},
)

// 'provider' - [oVirt, vSphere, Openstack, OVA, openshift]
// 'mode' - [Cold, Warm]
// 'target' - [Local, Remote]
migrationDurationHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "mtv_migration_duration_in_seconds_bucket",
Help: "Histogram of VM migrations duration in seconds",
Buckets: []float64{1 * 3600, 2 * 3600, 5 * 3600, 10 * 3600, 24 * 3600, 48 * 3600}, // 1, 2, 5, 10, 24, 48 hours in seconds
},
[]string{
"provider",
"mode",
"target",
},
)
)
129 changes: 69 additions & 60 deletions pkg/monitoring/metrics/forklift-controller/migration_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ package forklift_controller
import (
"context"
"fmt"
"strings"
"time"

api "github.com/konveyor/forklift-controller/pkg/apis/forklift/v1beta1"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var processedSucceededMigrations = make(map[string]struct{})

// Calculate Migrations metrics every 10 seconds
func RecordMigrationMetrics(c client.Client) {
go func() {
Expand All @@ -26,85 +29,91 @@ func RecordMigrationMetrics(c client.Client) {
continue
}

// Holding counter vars used to make gauge update "atomic"
var succeededRHV, succeededOCP, succeededOVA, succeededVsphere, succeededOpenstack float64
var failedRHV, failedOCP, failedOVA, failedVsphere, failedOpenstack float64
// Initialize or reset the counter map at the beginning of each iteration
counterMap := make(map[string]float64)

// for all migrations, count # in executing, succeeded, failed, canceled
for _, m := range migrations.Items {

plan := api.Plan{}
err := c.Get(context.TODO(), client.ObjectKey{Namespace: m.Spec.Plan.Namespace, Name: m.Spec.Plan.Name}, &plan)
if err != nil {
continue
}

provider := api.Provider{}
err = c.Get(context.TODO(), client.ObjectKey{Namespace: plan.Spec.Provider.Source.Namespace, Name: plan.Spec.Provider.Source.Name}, &provider)
sourceProvider := api.Provider{}
err = c.Get(context.TODO(), client.ObjectKey{Namespace: plan.Spec.Provider.Source.Namespace, Name: plan.Spec.Provider.Source.Name}, &sourceProvider)
if err != nil {
continue
}

destProvider := api.Provider{}
err = c.Get(context.TODO(), client.ObjectKey{Namespace: plan.Spec.Provider.Destination.Namespace, Name: plan.Spec.Provider.Destination.Name}, &destProvider)
if err != nil {
continue
}

isLocal := destProvider.Spec.URL == ""
isWarm := plan.Spec.Warm

var target, mode, key string
if isLocal {
target = Local
} else {
target = Remote
}
if isWarm {
mode = Warm
} else {
mode = Cold
}

provider := sourceProvider.Type().String()

if m.Status.HasCondition(Succeeded) {
switch provider.Type() {
case api.Ova:
succeededOVA++
continue
case api.OVirt:
succeededRHV++
continue
case api.VSphere:
succeededVsphere++
continue
case api.OpenShift:
succeededOCP++
continue
case api.OpenStack:
succeededOpenstack++
continue
key = fmt.Sprintf("%s|%s|%s|%s|%s", Succeeded, provider, mode, target, string(plan.UID))
counterMap[key]++

startTime := m.Status.Started.Time
endTime := m.Status.Completed.Time
duration := endTime.Sub(startTime).Seconds()

var totalDataTransferred float64
for _, vm := range m.Status.VMs {
for _, step := range vm.Pipeline {
if step.Name == "DiskTransferV2v" || step.Name == "DiskTransfer" {
for _, task := range step.Tasks {
totalDataTransferred += float64(task.Progress.Completed) * 1024 * 1024 // convert to Bytes
}
}
}
}

// Set the metrics for duration and data transferred and update the map for scaned migration
if _, exists := processedSucceededMigrations[string(m.UID)]; !exists {
migrationDurationGauge.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target, "plan": string(plan.UID)}).Set(duration)
migrationDurationHistogram.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target}).Observe(duration)
dataTransferredGauge.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target, "plan": string(plan.UID)}).Set(totalDataTransferred)
processedSucceededMigrations[string(m.UID)] = struct{}{}
}
}
if m.Status.HasCondition(Failed) {
switch provider.Type() {
case api.Ova:
failedOVA++
continue
case api.OVirt:
failedRHV++
continue
case api.VSphere:
failedVsphere++
continue
case api.OpenShift:
failedOCP++
continue
case api.OpenStack:
failedOpenstack++
continue
}
key = fmt.Sprintf("%s|%s|%s|%s|%s", Failed, provider, mode, target, string(plan.UID))
counterMap[key]++
}
if m.Status.HasCondition(Executing) {
key = fmt.Sprintf("%s|%s|%s|%s|%s", Executing, provider, mode, target, string(plan.UID))
counterMap[key]++
}
if m.Status.HasCondition(Canceled) {
key = fmt.Sprintf("%s|%s|%s|%s|%s", Canceled, provider, mode, target, string(plan.UID))
counterMap[key]++
}
}

migrationGauge.With(
prometheus.Labels{"status": Succeeded, "provider": api.OVirt.String()}).Set(succeededRHV)
migrationGauge.With(
prometheus.Labels{"status": Succeeded, "provider": api.OpenShift.String()}).Set(succeededOCP)
migrationGauge.With(
prometheus.Labels{"status": Succeeded, "provider": api.OpenStack.String()}).Set(succeededOpenstack)
migrationGauge.With(
prometheus.Labels{"status": Succeeded, "provider": api.Ova.String()}).Set(succeededOVA)
migrationGauge.With(
prometheus.Labels{"status": Succeeded, "provider": api.VSphere.String()}).Set(succeededVsphere)
migrationGauge.With(
prometheus.Labels{"status": Failed, "provider": api.OVirt.String()}).Set(failedRHV)
migrationGauge.With(
prometheus.Labels{"status": Failed, "provider": api.OpenShift.String()}).Set(failedOCP)
migrationGauge.With(
prometheus.Labels{"status": Failed, "provider": api.OpenStack.String()}).Set(failedOpenstack)
migrationGauge.With(
prometheus.Labels{"status": Failed, "provider": api.Ova.String()}).Set(failedOVA)
migrationGauge.With(
prometheus.Labels{"status": Failed, "provider": api.VSphere.String()}).Set(failedVsphere)
for key, value := range counterMap {
parts := strings.Split(key, "|")
migratioStatusCounter.With(prometheus.Labels{"status": parts[0], "provider": parts[1], "mode": parts[2], "target": parts[3]}).Set(value)
migratioPlanCorolationStatusnCounter.With(prometheus.Labels{"status": parts[0], "provider": parts[1], "mode": parts[2], "target": parts[3], "plan": parts[4]}).Set(value)
}
}
}()
}
Loading

0 comments on commit 0887c14

Please sign in to comment.