diff --git a/metrics/metrics.go b/metrics/metrics.go index 5ab99c32..d46dec78 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -13,7 +13,8 @@ import ( ) const ( - Namespace = "eigenda_proxy" + namespace = "eigenda_proxy" + httpServerSubsystem = "http_server" ) // Config ... Metrics server configuration @@ -28,8 +29,7 @@ type Config struct { type Metricer interface { RecordInfo(version string) RecordUp() - RecordRPCServerRequest(method string) func() - RecordRPCClientResponse(method string, err error) + RecordRPCServerRequest(method string) func(status string) Document() []metrics.DocumentedMetric } @@ -39,7 +39,8 @@ type Metrics struct { Info *prometheus.GaugeVec Up prometheus.Gauge - metrics.RPCMetrics + HTTPServerRequestsTotal *prometheus.CounterVec + HTTPServerRequestDurationSeconds *prometheus.HistogramVec registry *prometheus.Registry factory metrics.Factory @@ -47,11 +48,10 @@ type Metrics struct { var _ Metricer = (*Metrics)(nil) -func NewMetrics(procName string) *Metrics { - if procName == "" { - procName = "default" +func NewMetrics(subsystem string) *Metrics { + if subsystem == "" { + subsystem = "default" } - ns := Namespace + "_" + procName registry := prometheus.NewRegistry() registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) @@ -60,20 +60,40 @@ func NewMetrics(procName string) *Metrics { return &Metrics{ Up: factory.NewGauge(prometheus.GaugeOpts{ - Namespace: ns, + Namespace: namespace, + Subsystem: subsystem, Name: "up", Help: "1 if the proxy server has finished starting up", }), Info: factory.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: ns, + Namespace: namespace, + Subsystem: subsystem, Name: "info", Help: "Pseudo-metric tracking version and config info", }, []string{ "version", }), - RPCMetrics: metrics.MakeRPCMetrics(ns, factory), - registry: registry, - factory: factory, + HTTPServerRequestsTotal: factory.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: httpServerSubsystem, + Name: "requests_total", + Help: "Total requests to the HTTP server", + }, []string{ + "method", "status", + }), + HTTPServerRequestDurationSeconds: factory.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: httpServerSubsystem, + Name: "request_duration_seconds", + // TODO: we might want different buckets for different routes? + // also probably different buckets depending on the backend (memstore, s3, and eigenda have different latencies) + Buckets: prometheus.ExponentialBucketsRange(0.05, 1200, 20), + Help: "Histogram of HTTP server request durations", + }, []string{ + "method", // no status on histograms because those are very expensive + }), + registry: registry, + factory: factory, } } @@ -89,6 +109,19 @@ func (m *Metrics) RecordUp() { m.Up.Set(1) } +// RecordRPCServerRequest is a helper method to record an incoming HTTP request. +// It bumps the requests metric, and tracks how long it takes to serve a response, +// including the HTTP status code. +func (m *Metrics) RecordRPCServerRequest(method string) func(status string) { + // we don't want to track the status code on the histogram because that would + // create a huge number of labels, and cost a lot on cloud hosted services + timer := prometheus.NewTimer(m.HTTPServerRequestDurationSeconds.WithLabelValues(method)) + return func(status string) { + m.HTTPServerRequestsTotal.WithLabelValues(method, status).Inc() + timer.ObserveDuration() + } +} + // StartServer starts the metrics server on the given hostname and port. func (m *Metrics) StartServer(hostname string, port int) (*ophttp.HTTPServer, error) { addr := net.JoinHostPort(hostname, strconv.Itoa(port)) @@ -103,7 +136,6 @@ func (m *Metrics) Document() []metrics.DocumentedMetric { } type noopMetricer struct { - metrics.NoopRPCMetrics } var NoopMetrics Metricer = new(noopMetricer) @@ -117,3 +149,7 @@ func (n *noopMetricer) RecordInfo(_ string) { func (n *noopMetricer) RecordUp() { } + +func (n *noopMetricer) RecordRPCServerRequest(string) func(status string) { + return func(string) {} +} diff --git a/server/server.go b/server/server.go index 0e1924cb..4baa7535 100644 --- a/server/server.go +++ b/server/server.go @@ -70,11 +70,15 @@ func WithMetrics(handleFn func(http.ResponseWriter, *http.Request) error, // where the first 3 bytes of the path are the commitment header // commit type | da layer type | version byte // we want to group all requests by commitment header, otherwise the prometheus metric labels will explode + // TODO: commitment header is different for non-op commitments. We will need to change this to accommodate other commitments. + // probably want (commitment mode, cert version) as the labels, since commit-type/da-layer are not relevant anyways. commitmentHeader := r.URL.Path[:3] recordDur := m.RecordRPCServerRequest(commitmentHeader) - defer recordDur() - return handleFn(w, r) + err := handleFn(w, r) + // we assume that every route will set the status header + recordDur(w.Header().Get("status")) + return err } }