Skip to content

Commit

Permalink
Merge pull request #270 from 3scale-ops/redis-status-metrics
Browse files Browse the repository at this point in the history
Improve redis server status metrics
  • Loading branch information
3scale-robot authored Oct 4, 2023
2 parents e1c07ad + 9e4509f commit 7592d08
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 33 deletions.
5 changes: 5 additions & 0 deletions controllers/sentinel_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ func (r *SentinelReconciler) reconcileStatus(ctx context.Context, instance *saas
log.Error(merr, "DiscoveryError")
}

// publish metrics based on the discovered cluster status
if err := metrics.FromShardedCluster(ctx, cluster, false, instance.GetName()); err != nil {
log.Error(err, "unable to publish redis cluster status metrics")
}

shards := make(saasv1alpha1.MonitoredShards, len(cluster.Shards))
for idx, shard := range cluster.Shards {
shards[idx] = saasv1alpha1.MonitoredShard{
Expand Down
80 changes: 80 additions & 0 deletions pkg/redis/metrics/cluster_status_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package metrics

import (
"context"

"github.com/3scale/saas-operator/pkg/redis/client"
"github.com/3scale/saas-operator/pkg/redis/sharded"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var (
serverInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "server_info",
Namespace: "saas_redis_cluster_status",
Help: "redis cluster member info",
},
[]string{"resource", "shard", "redis_server_host", "redis_server_alias", "role", "read_only"})
roSlaveCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "ro_slave_count",
Namespace: "saas_redis_cluster_status",
Help: "read-only slave count",
},
[]string{"resource", "shard"},
)
rwSlaveCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "rw_slave_count",
Namespace: "saas_redis_cluster_status",
Help: "read-write slave count",
},
[]string{"resource", "shard"},
)
)

func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(serverInfo, roSlaveCount, rwSlaveCount)
}

func FromShardedCluster(ctx context.Context, cluster *sharded.Cluster, refresh bool, resource string) error {

if refresh {
err := cluster.SentinelDiscover(ctx, sharded.SlaveReadOnlyDiscoveryOpt)
if err != nil {
return err
}
}

for _, shard := range cluster.Shards {
roslave := 0
rwslave := 0

for _, server := range shard.Servers {
ro, ok := server.Config["slave-read-only"]
if !ok {
ro = "no"
}
serverInfo.With(prometheus.Labels{"resource": resource, "shard": shard.Name,
"redis_server_host": server.ID(), "redis_server_alias": server.GetAlias(),
"role": string(server.Role), "read_only": ro,
}).Set(float64(1))

if server.Role == client.Slave {
if ro == "yes" {
roslave++
} else {
rwslave++
}
}
}

roSlaveCount.With(prometheus.Labels{"resource": resource, "shard": shard.Name}).Set(float64(roslave))
rwSlaveCount.With(prometheus.Labels{"resource": resource, "shard": shard.Name}).Set(float64(rwslave))
}

return nil
}
35 changes: 2 additions & 33 deletions pkg/redis/metrics/sentinel_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@ import (
)

var (
serverInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "server_info",
Namespace: "saas_redis_sentinel",
Help: `"redis server info"`,
},
[]string{"sentinel", "shard", "redis_server", "role"})
linkPendingCommands = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "link_pending_commands",
Expand All @@ -47,14 +40,6 @@ var (
},
[]string{"sentinel", "shard", "redis_server", "role"},
)
numSlaves = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "num_slaves",
Namespace: "saas_redis_sentinel",
Help: `"sentinel master <name> num-slaves"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
)
numOtherSentinels = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "num_other_sentinels",
Expand Down Expand Up @@ -86,8 +71,8 @@ var (
func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(
serverInfo, linkPendingCommands, lastOkPingReply, roleReportedTime,
numSlaves, numOtherSentinels, masterLinkDownTime, slaveReplOffset,
linkPendingCommands, lastOkPingReply, roleReportedTime,
numOtherSentinels, masterLinkDownTime, slaveReplOffset,
)
}

Expand Down Expand Up @@ -176,11 +161,9 @@ func (smg *SentinelMetricsGatherer) Stop() {
smg.cancel()
// Reset all gauge metrics so the values related to
// this exporter are deleted from the collection
serverInfo.Reset()
linkPendingCommands.Reset()
lastOkPingReply.Reset()
roleReportedTime.Reset()
numSlaves.Reset()
numOtherSentinels.Reset()
masterLinkDownTime.Reset()
slaveReplOffset.Reset()
Expand All @@ -195,10 +178,6 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error {

for _, master := range mresult {

serverInfo.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
}).Set(float64(1))

linkPendingCommands.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
}).Set(float64(master.LinkPendingCommands))
Expand All @@ -211,10 +190,6 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error {
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
}).Set(float64(master.RoleReportedTime))

numSlaves.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
}).Set(float64(master.NumSlaves))

numOtherSentinels.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
}).Set(float64(master.NumOtherSentinels))
Expand All @@ -235,10 +210,6 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error {

for _, slave := range sresult {

serverInfo.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
}).Set(float64(1))

linkPendingCommands.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
}).Set(float64(slave.LinkPendingCommands))
Expand Down Expand Up @@ -272,11 +243,9 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error {
}

func cleanupMetrics(labels prometheus.Labels) {
serverInfo.Delete(labels)
linkPendingCommands.Delete(labels)
lastOkPingReply.Delete(labels)
roleReportedTime.Delete(labels)
numSlaves.Delete(labels)
numOtherSentinels.Delete(labels)
masterLinkDownTime.Delete(labels)
slaveReplOffset.Delete(labels)
Expand Down

0 comments on commit 7592d08

Please sign in to comment.