From 17dfadcf4181de7735309be56236f4f71c4c65a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E9=B8=BF=E6=96=8C?= Date: Wed, 24 Apr 2024 15:32:20 +0800 Subject: [PATCH] chore(general): add healthz-related metrics --- cmd/base/context.go | 10 ++-------- cmd/base/healthz.go | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/cmd/base/context.go b/cmd/base/context.go index 905e2ee46..e7dc00a01 100644 --- a/cmd/base/context.go +++ b/cmd/base/context.go @@ -189,7 +189,7 @@ func NewGenericContext( Handler: httpHandler.WithHandleChain(mux), Addr: genericConf.GenericEndpoint, }, - healthChecker: NewHealthzChecker(), + healthChecker: NewHealthzChecker(customMetricsEmitterPool.GetDefaultMetricsEmitter()), DisabledByDefault: disabledByDefault, MetaInformerFactory: metaInformerFactory, KubeInformerFactory: kubeInformerFactory, @@ -266,14 +266,8 @@ func (c *GenericContext) StartInformer(ctx context.Context) { // serveHealthZHTTP is used to provide health check for current running components. func (c *GenericContext) serveHealthZHTTP(mux *http.ServeMux, enableHealthzCheck bool) { mux.HandleFunc(healthZPath, func(w http.ResponseWriter, r *http.Request) { - if !enableHealthzCheck { - w.WriteHeader(200) - _, _ = w.Write([]byte("healthz check is disabled")) - return - } - ok, content := c.healthChecker.CheckHealthy() - if ok { + if ok || !enableHealthzCheck { w.WriteHeader(200) _, _ = w.Write([]byte(content)) } else { diff --git a/cmd/base/healthz.go b/cmd/base/healthz.go index 51ec15aa6..9105562d6 100644 --- a/cmd/base/healthz.go +++ b/cmd/base/healthz.go @@ -19,25 +19,45 @@ package katalyst_base import ( "context" "encoding/json" + "time" "go.uber.org/atomic" + "k8s.io/apimachinery/pkg/util/wait" + "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/general" ) +const ( + syncPeriod = 30 * time.Second + MetricNameUnhealthyRule = "unhealthy_healthz_check_rule" +) + // HealthzChecker periodically checks the running states type HealthzChecker struct { // if unhealthyReason is none-empty, it means some check failed unhealthyReason *atomic.String + emitter metrics.MetricEmitter } -func NewHealthzChecker() *HealthzChecker { +func NewHealthzChecker(emitter metrics.MetricEmitter) *HealthzChecker { return &HealthzChecker{ unhealthyReason: atomic.NewString(""), + emitter: emitter, } } -func (h *HealthzChecker) Run(_ context.Context) {} +func (h *HealthzChecker) Run(ctx context.Context) { + go wait.Until(func() { + results := general.GetRegisterReadinessCheckResult() + for key, result := range results { + if !result.Ready { + _ = h.emitter.StoreInt64(MetricNameUnhealthyRule, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "rule", Val: string(key)}) + } + } + }, syncPeriod, ctx.Done()) +} // CheckHealthy returns whether the component is healthy. func (h *HealthzChecker) CheckHealthy() (bool, string) {