From 7320f9e73d5389839512c44e05a90a567484cc5e Mon Sep 17 00:00:00 2001 From: Madhur Shrimal Date: Wed, 23 Oct 2024 10:50:48 -0400 Subject: [PATCH] feat: add metrics middleware (#5) * feat: add metrics middleware * fmt: imports * dash: update metrics dashboard * rename to metrics --- internal/metrics/metrics.go | 37 ++++++------------------ internal/middleware/metrics.go | 43 ++++++++++++++++++++++++++++ internal/server/server.go | 8 +++++- internal/services/kms/kms.go | 15 ---------- internal/services/signing/signing.go | 5 ---- monitoring/signer.json | 43 +++++++--------------------- 6 files changed, 69 insertions(+), 82 deletions(-) create mode 100644 internal/middleware/metrics.go diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index de88255..30036e0 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -3,28 +3,22 @@ package metrics import "github.com/prometheus/client_golang/prometheus" const ( - SuccessLabel = "success" - FailureLabel = "failure" - SubsystemRPCServer = "rpc_server" MetricRequestTotal = "request_total" MetricRequestDurationSeconds = "request_duration_seconds" - MetricResponseTotal = "response_total" MethodLabelName = "method" - StatusLabelName = "status" + CodeLabelName = "code" ) type Recorder interface { - RecordRPCServerRequest(method string) func() - RecordRPCServerResponse(method string, status string) + RecordRPCServerRequest(method string) func(code string) } type RPCServerMetrics struct { RPCServerRequestTotal *prometheus.CounterVec RPCServerRequestDurationSeconds *prometheus.SummaryVec - RPCServerResponseTotal *prometheus.CounterVec } func NewRPCServerMetrics(ns string, registry *prometheus.Registry) *RPCServerMetrics { @@ -33,8 +27,8 @@ func NewRPCServerMetrics(ns string, registry *prometheus.Registry) *RPCServerMet Namespace: ns, Subsystem: SubsystemRPCServer, Name: MetricRequestTotal, - Help: "Total number of RPC server requests.", - }, []string{MethodLabelName}), + Help: "Total number of RPC server requests with status codes", + }, []string{MethodLabelName, CodeLabelName}), RPCServerRequestDurationSeconds: prometheus.NewSummaryVec(prometheus.SummaryOpts{ Namespace: ns, Subsystem: SubsystemRPCServer, @@ -42,41 +36,28 @@ func NewRPCServerMetrics(ns string, registry *prometheus.Registry) *RPCServerMet Help: "Duration of RPC server requests in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.01, 0.99: 0.001}, }, []string{MethodLabelName}), - RPCServerResponseTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: ns, - Subsystem: SubsystemRPCServer, - Name: MetricResponseTotal, - Help: "Total number of RPC server responses.", - }, []string{MethodLabelName, StatusLabelName}), } registry.MustRegister(m.RPCServerRequestTotal) registry.MustRegister(m.RPCServerRequestDurationSeconds) - registry.MustRegister(m.RPCServerResponseTotal) return m } -func (m *RPCServerMetrics) RecordRPCServerRequest(method string) func() { - m.RPCServerRequestTotal.WithLabelValues(method).Inc() +func (m *RPCServerMetrics) RecordRPCServerRequest(method string) func(code string) { timer := prometheus.NewTimer(m.RPCServerRequestDurationSeconds.WithLabelValues(method)) - return func() { + return func(code string) { + m.RPCServerRequestTotal.WithLabelValues(method, code).Inc() timer.ObserveDuration() } } -func (m *RPCServerMetrics) RecordRPCServerResponse(method string, status string) { - m.RPCServerResponseTotal.WithLabelValues(method, status).Inc() -} - type NoopRPCMetrics struct{} func NewNoopRPCMetrics() *NoopRPCMetrics { return &NoopRPCMetrics{} } -func (NoopRPCMetrics) RecordRPCServerRequest(method string) func() { - return func() {} +func (NoopRPCMetrics) RecordRPCServerRequest(method string) func(code string) { + return func(code string) {} } -func (NoopRPCMetrics) RecordRPCServerResponse(method string, status string) {} - var _ Recorder = (*NoopRPCMetrics)(nil) diff --git a/internal/middleware/metrics.go b/internal/middleware/metrics.go new file mode 100644 index 0000000..eb64ad0 --- /dev/null +++ b/internal/middleware/metrics.go @@ -0,0 +1,43 @@ +package middleware + +import ( + "context" + + "github.com/Layr-Labs/cerberus/internal/metrics" + "github.com/prometheus/client_golang/prometheus" + "google.golang.org/grpc" + "google.golang.org/grpc/status" +) + +type MetricsMiddleware struct { + registry *prometheus.Registry + recorder metrics.Recorder +} + +func NewMetricsMiddleware( + registry *prometheus.Registry, + recorder metrics.Recorder, +) *MetricsMiddleware { + return &MetricsMiddleware{ + registry: registry, + recorder: recorder, + } +} + +// UnaryServerInterceptor returns a new unary server interceptor for metrics +func (m *MetricsMiddleware) UnaryServerInterceptor() grpc.UnaryServerInterceptor { + return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + recordDuration := m.recorder.RecordRPCServerRequest(info.FullMethod) + + // Handle request + resp, err := handler(ctx, req) + + // Get status code + code := status.Code(err) + + // Record response + recordDuration(code.String()) + + return resp, err + } +} diff --git a/internal/server/server.go b/internal/server/server.go index cc0058b..26cf7f7 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -16,6 +16,7 @@ import ( "github.com/Layr-Labs/cerberus/internal/configuration" "github.com/Layr-Labs/cerberus/internal/metrics" + "github.com/Layr-Labs/cerberus/internal/middleware" "github.com/Layr-Labs/cerberus/internal/services/kms" "github.com/Layr-Labs/cerberus/internal/services/signing" "github.com/Layr-Labs/cerberus/internal/store/filesystem" @@ -35,7 +36,8 @@ func Start(config *configuration.Configuration, logger *slog.Logger) { registry := prometheus.NewRegistry() registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) registry.MustRegister(collectors.NewGoCollector()) - rpcMetrics := metrics.NewRPCServerMetrics("remote_bls", registry) + rpcMetrics := metrics.NewRPCServerMetrics("cerberus", registry) + go startMetricsServer(registry, config.MetricsPort, logger) keystore := filesystem.NewStore(config.KeystoreDir, logger) @@ -51,6 +53,10 @@ func Start(config *configuration.Configuration, logger *slog.Logger) { opts = append(opts, grpc.Creds(creds)) } + // Register metrics middleware + metricsMiddleware := middleware.NewMetricsMiddleware(registry, rpcMetrics) + opts = append(opts, grpc.UnaryInterceptor(metricsMiddleware.UnaryServerInterceptor())) + s := grpc.NewServer(opts...) kmsService := kms.NewService(config, keystore, logger, rpcMetrics) signingService := signing.NewService(config, keystore, logger, rpcMetrics) diff --git a/internal/services/kms/kms.go b/internal/services/kms/kms.go index 0ef35e7..936ef0e 100644 --- a/internal/services/kms/kms.go +++ b/internal/services/kms/kms.go @@ -48,22 +48,18 @@ func (k *Service) GenerateKeyPair( ctx context.Context, req *v1.GenerateKeyPairRequest, ) (*v1.GenerateKeyPairResponse, error) { - observe := k.metrics.RecordRPCServerRequest("kms/GenerateKeyPair") - defer observe() password := req.GetPassword() // Generate a new BLS key pair keyPair, err := keystore.NewKeyPair(password, mnemonic.English) if err != nil { k.logger.Error(fmt.Sprintf("Failed to generate BLS key pair: %v", err)) - k.metrics.RecordRPCServerResponse("kms/GenerateKeyPair", metrics.FailureLabel) return nil, status.Error(codes.Internal, err.Error()) } pubKeyHex, err := k.store.StoreKey(ctx, keyPair) if err != nil { k.logger.Error(fmt.Sprintf("Failed to save BLS key pair to file: %v", err)) - k.metrics.RecordRPCServerResponse("kms/GenerateKeyPair", metrics.FailureLabel) return nil, status.Error(codes.Internal, err.Error()) } @@ -72,7 +68,6 @@ func (k *Service) GenerateKeyPair( copy(pkBytesSlice, keyPair.PrivateKey[:]) privKeyHex := common.Trim0x(hex.EncodeToString(pkBytesSlice)) - k.metrics.RecordRPCServerResponse("kms/GenerateKeyPair", metrics.SuccessLabel) return &v1.GenerateKeyPairResponse{ PublicKey: pubKeyHex, PrivateKey: privKeyHex, @@ -84,8 +79,6 @@ func (k *Service) ImportKey( ctx context.Context, req *v1.ImportKeyRequest, ) (*v1.ImportKeyResponse, error) { - observe := k.metrics.RecordRPCServerRequest("kms/ImportKey") - defer observe() pkString := req.GetPrivateKey() password := req.GetPassword() pkMnemonic := req.GetMnemonic() @@ -96,7 +89,6 @@ func (k *Service) ImportKey( ks, err := keystore.NewKeyPairFromMnemonic(pkMnemonic, password) if err != nil { k.logger.Error(fmt.Sprintf("Failed to import key pair from mnemonic: %v", err)) - k.metrics.RecordRPCServerResponse("kms/ImportKey", metrics.FailureLabel) return nil, status.Error(codes.InvalidArgument, err.Error()) } pkBytes = ks.PrivateKey @@ -111,7 +103,6 @@ func (k *Service) ImportKey( pkBytes, err = hex.DecodeString(pkHex) if err != nil { k.logger.Error(fmt.Sprintf("Failed to import key pair from string: %v", err)) - k.metrics.RecordRPCServerResponse("kms/ImportKey", metrics.FailureLabel) return nil, status.Error(codes.InvalidArgument, err.Error()) } } @@ -123,11 +114,9 @@ func (k *Service) ImportKey( ) if err != nil { k.logger.Error(fmt.Sprintf("Failed to save BLS key pair to file: %v", err)) - k.metrics.RecordRPCServerResponse("kms/ImportKey", metrics.FailureLabel) return nil, status.Error(codes.Internal, err.Error()) } - k.metrics.RecordRPCServerResponse("kms/ImportKey", metrics.SuccessLabel) return &v1.ImportKeyResponse{PublicKey: pubKeyHex}, nil } @@ -135,15 +124,11 @@ func (k *Service) ListKeys( ctx context.Context, req *v1.ListKeysRequest, ) (*v1.ListKeysResponse, error) { - observe := k.metrics.RecordRPCServerRequest("kms/ListKeys") - defer observe() pubKeys, err := k.store.ListKeys(ctx) if err != nil { k.logger.Error(fmt.Sprintf("Failed to list keys: %v", err)) - k.metrics.RecordRPCServerResponse("kms/ListKeys", metrics.FailureLabel) return nil, status.Error(codes.Internal, err.Error()) } - k.metrics.RecordRPCServerResponse("kms/ListKeys", metrics.SuccessLabel) return &v1.ListKeysResponse{PublicKeys: pubKeys}, nil } diff --git a/internal/services/signing/signing.go b/internal/services/signing/signing.go index 7d24c61..3609e6c 100644 --- a/internal/services/signing/signing.go +++ b/internal/services/signing/signing.go @@ -45,8 +45,6 @@ func (s *Service) SignGeneric( ctx context.Context, req *v1.SignGenericRequest, ) (*v1.SignGenericResponse, error) { - observe := s.metrics.RecordRPCServerRequest("signing/SignGeneric") - defer observe() // Take the public key and data from the request pubKeyHex := common.Trim0x(req.GetPublicKey()) password := req.GetPassword() @@ -56,7 +54,6 @@ func (s *Service) SignGeneric( blsKey, err := s.store.RetrieveKey(ctx, pubKeyHex, password) if err != nil { s.logger.Error(fmt.Sprintf("Failed to retrieve key: %v", err)) - s.metrics.RecordRPCServerResponse("signing/SignGeneric", metrics.FailureLabel) return nil, status.Error(codes.Internal, err.Error()) } s.keyCache[pubKeyHex] = blsKey @@ -66,7 +63,6 @@ func (s *Service) SignGeneric( data := req.GetData() if len(data) > 32 { s.logger.Error("Data is too long, must be 32 bytes") - s.metrics.RecordRPCServerResponse("signing/SignGeneric", metrics.FailureLabel) return nil, status.Error(codes.InvalidArgument, "data is too long, must be 32 bytes") } @@ -75,6 +71,5 @@ func (s *Service) SignGeneric( // Sign the data with the private key sig := blsKey.SignMessage(byteArray) s.logger.Info(fmt.Sprintf("Signed a message successfully using %s", req.PublicKey)) - s.metrics.RecordRPCServerResponse("signing/SignGeneric", metrics.SuccessLabel) return &v1.SignGenericResponse{Signature: sig.Serialize()}, nil } diff --git a/monitoring/signer.json b/monitoring/signer.json index 3ec32a6..e639d9c 100644 --- a/monitoring/signer.json +++ b/monitoring/signer.json @@ -110,7 +110,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "remote_bls_rpc_server_request_duration_seconds{method=\"signing/SignGeneric\", quantile=\"0.95\"}", + "expr": "cerberus_rpc_server_request_duration_seconds{quantile=\"0.95\", method=\"/signer.v1.Signer/SignGeneric\"}", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -212,7 +212,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(remote_bls_rpc_server_request_total{method=\"signing/SignGeneric\"}[$__range])", + "expr": "rate(cerberus_rpc_server_request_total{method=\"/signer.v1.Signer/SignGeneric\"}[$__range])", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -240,6 +240,7 @@ "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, @@ -285,34 +286,10 @@ } }, "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "C {__name__=\"remote_bls_rpc_server_response_total\", instance=\"localhost:9091\", job=\"prometheus\", method=\"signing/SignGeneric\", status=\"success\"}" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - }, { "matcher": { "id": "byName", - "options": "C {__name__=\"remote_bls_rpc_server_response_total\", instance=\"localhost:9091\", job=\"prometheus\", method=\"signing/SignGeneric\", status=\"success\"}" + "options": "C {__name__=\"cerberus_rpc_server_request_total\", code=\"OK\", instance=\"localhost:9091\", job=\"prometheus\", method=\"/signer.v1.Signer/SignGeneric\"}" }, "properties": [ { @@ -351,10 +328,10 @@ "disableTextWrap": false, "editorMode": "builder", "exemplar": false, - "expr": "remote_bls_rpc_server_response_total{status=\"success\", method=\"signing/SignGeneric\"}", + "expr": "cerberus_rpc_server_request_total{method=\"/signer.v1.Signer/SignGeneric\", code=\"OK\"}", "fullMetaSearch": false, "hide": true, - "includeNullMetadata": true, + "includeNullMetadata": false, "instant": false, "legendFormat": "{{method}}", "range": true, @@ -368,7 +345,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "remote_bls_rpc_server_response_total{method=\"signing/SignGeneric\"}", + "expr": "cerberus_rpc_server_request_total{method=\"/signer.v1.Signer/SignGeneric\"}", "fullMetaSearch": false, "hide": true, "includeNullMetadata": true, @@ -401,13 +378,13 @@ "list": [] }, "time": { - "from": "now-15m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "browser", - "title": "BLS Signer", + "title": "Cerberus", "uid": "bdzhsw9k29am8d", - "version": 17, + "version": 23, "weekStart": "" } \ No newline at end of file