Skip to content

Commit

Permalink
refactor(metrics): remove dependency on optimism metrics (#94)
Browse files Browse the repository at this point in the history
* refactor(metrics): remove dependency on optimism metrics
Also add status code label to http_requests_total metric

* doc: added TODO comment for updating commitmentHeader in metric middleware

* fix: lint accomodate -> accommodate

* fix: lint
  • Loading branch information
samlaf authored Aug 22, 2024
1 parent 4cdda99 commit 1838c6e
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 16 deletions.
64 changes: 50 additions & 14 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import (
)

const (
Namespace = "eigenda_proxy"
namespace = "eigenda_proxy"
httpServerSubsystem = "http_server"
)

// Config ... Metrics server configuration
Expand All @@ -28,8 +29,7 @@ type Config struct {
type Metricer interface {
RecordInfo(version string)
RecordUp()
RecordRPCServerRequest(method string) func()
RecordRPCClientResponse(method string, err error)
RecordRPCServerRequest(method string) func(status string)

Document() []metrics.DocumentedMetric
}
Expand All @@ -39,19 +39,19 @@ type Metrics struct {
Info *prometheus.GaugeVec
Up prometheus.Gauge

metrics.RPCMetrics
HTTPServerRequestsTotal *prometheus.CounterVec
HTTPServerRequestDurationSeconds *prometheus.HistogramVec

registry *prometheus.Registry
factory metrics.Factory
}

var _ Metricer = (*Metrics)(nil)

func NewMetrics(procName string) *Metrics {
if procName == "" {
procName = "default"
func NewMetrics(subsystem string) *Metrics {
if subsystem == "" {
subsystem = "default"
}
ns := Namespace + "_" + procName

registry := prometheus.NewRegistry()
registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
Expand All @@ -60,20 +60,40 @@ func NewMetrics(procName string) *Metrics {

return &Metrics{
Up: factory.NewGauge(prometheus.GaugeOpts{
Namespace: ns,
Namespace: namespace,
Subsystem: subsystem,
Name: "up",
Help: "1 if the proxy server has finished starting up",
}),
Info: factory.NewGaugeVec(prometheus.GaugeOpts{
Namespace: ns,
Namespace: namespace,
Subsystem: subsystem,
Name: "info",
Help: "Pseudo-metric tracking version and config info",
}, []string{
"version",
}),
RPCMetrics: metrics.MakeRPCMetrics(ns, factory),
registry: registry,
factory: factory,
HTTPServerRequestsTotal: factory.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: httpServerSubsystem,
Name: "requests_total",
Help: "Total requests to the HTTP server",
}, []string{
"method", "status",
}),
HTTPServerRequestDurationSeconds: factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: httpServerSubsystem,
Name: "request_duration_seconds",
// TODO: we might want different buckets for different routes?
// also probably different buckets depending on the backend (memstore, s3, and eigenda have different latencies)
Buckets: prometheus.ExponentialBucketsRange(0.05, 1200, 20),
Help: "Histogram of HTTP server request durations",
}, []string{
"method", // no status on histograms because those are very expensive
}),
registry: registry,
factory: factory,
}
}

Expand All @@ -89,6 +109,19 @@ func (m *Metrics) RecordUp() {
m.Up.Set(1)
}

// RecordRPCServerRequest is a helper method to record an incoming HTTP request.
// It bumps the requests metric, and tracks how long it takes to serve a response,
// including the HTTP status code.
func (m *Metrics) RecordRPCServerRequest(method string) func(status string) {
// we don't want to track the status code on the histogram because that would
// create a huge number of labels, and cost a lot on cloud hosted services
timer := prometheus.NewTimer(m.HTTPServerRequestDurationSeconds.WithLabelValues(method))
return func(status string) {
m.HTTPServerRequestsTotal.WithLabelValues(method, status).Inc()
timer.ObserveDuration()
}
}

// StartServer starts the metrics server on the given hostname and port.
func (m *Metrics) StartServer(hostname string, port int) (*ophttp.HTTPServer, error) {
addr := net.JoinHostPort(hostname, strconv.Itoa(port))
Expand All @@ -103,7 +136,6 @@ func (m *Metrics) Document() []metrics.DocumentedMetric {
}

type noopMetricer struct {
metrics.NoopRPCMetrics
}

var NoopMetrics Metricer = new(noopMetricer)
Expand All @@ -117,3 +149,7 @@ func (n *noopMetricer) RecordInfo(_ string) {

func (n *noopMetricer) RecordUp() {
}

func (n *noopMetricer) RecordRPCServerRequest(string) func(status string) {
return func(string) {}
}
8 changes: 6 additions & 2 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,15 @@ func WithMetrics(handleFn func(http.ResponseWriter, *http.Request) error,
// where the first 3 bytes of the path are the commitment header
// commit type | da layer type | version byte
// we want to group all requests by commitment header, otherwise the prometheus metric labels will explode
// TODO: commitment header is different for non-op commitments. We will need to change this to accommodate other commitments.
// probably want (commitment mode, cert version) as the labels, since commit-type/da-layer are not relevant anyways.
commitmentHeader := r.URL.Path[:3]
recordDur := m.RecordRPCServerRequest(commitmentHeader)
defer recordDur()

return handleFn(w, r)
err := handleFn(w, r)
// we assume that every route will set the status header
recordDur(w.Header().Get("status"))
return err
}
}

Expand Down

0 comments on commit 1838c6e

Please sign in to comment.