Skip to content

Commit

Permalink
Add metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jsha committed Sep 5, 2023
1 parent c139bcc commit 04859aa
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 10 deletions.
10 changes: 10 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/aws/aws-sdk-go-v2/credentials v1.13.35
github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5
github.com/fxamacker/cbor/v2 v2.5.0
github.com/prometheus/client_golang v1.16.0
golang.org/x/sync v0.3.0
)

Expand All @@ -26,5 +27,14 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.5 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 // indirect
github.com/aws/smithy-go v1.14.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/x448/float16 v0.8.4 // indirect
golang.org/x/sys v0.8.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
)
31 changes: 30 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,47 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 h1:CQBFElb0LS8RojMJlxRSo/HXipvT
github.com/aws/aws-sdk-go-v2/service/sts v1.21.5/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU=
github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ=
github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/fxamacker/cbor/v2 v2.5.0 h1:oHsG0V/Q6E/wqTS2O1Cozzsy69nqCiguo5Q1a1ADivE=
github.com/fxamacker/cbor/v2 v2.5.0/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4=
github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM=
github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc=
github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=
github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
94 changes: 85 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"log"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"time"
Expand All @@ -22,6 +23,9 @@ import (
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/fxamacker/cbor/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/sync/singleflight"
)

Expand Down Expand Up @@ -237,6 +241,10 @@ type tileCachingHandler struct {
s3Bucket string // The S3 bucket to use for caching tiles. Must not be empty.

cacheGroup *singleflight.Group // The singleflight.Group to use for deduplicating simultaneous requests (a.k.a. "request collapsing") for tiles. Must not be nil.

requestsMetric *prometheus.CounterVec
partialTiles prometheus.Counter
singleFlightShared prometheus.Counter
}

func (tch *tileCachingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -284,8 +292,9 @@ func (tch *tileCachingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
//
// When Trillian gets a request that is past the end of the log, it returns
// 400 (for better or worse), so we emulate that here.
tch.requestsMetric.WithLabelValues("error", "ct_log_past_end").Inc()
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte("requested range is past the end of the log"))
fmt.Fprintln(w, "requested range is past the end of the log")
return
} else {
contents.Entries = contents.Entries[prefixToRemove:]
Expand All @@ -296,6 +305,12 @@ func (tch *tileCachingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
contents.Entries = contents.Entries[:requestedLen]
}

if w.Header().Get("X-Source") == "S3" {
tch.requestsMetric.WithLabelValues("success", "s3").Inc()
} else {
tch.requestsMetric.WithLabelValues("success", "ct_log").Inc()
}

w.Header().Set("X-Response-Len", fmt.Sprintf("%d", len(contents.Entries)))
w.WriteHeader(http.StatusOK)

Expand Down Expand Up @@ -325,11 +340,15 @@ func (tch *tileCachingHandler) getAndCacheTile(ctx context.Context, tile tile) (
source tileSource
}

innerContents, err, _ := singleflightDo(tch.cacheGroup, dedupKey, func() (entriesAndSource, error) {
innerContents, err, shared := singleflightDo(tch.cacheGroup, dedupKey, func() (entriesAndSource, error) {
contents, source, err := tch.getAndCacheTileUncollapsed(ctx, tile)
return entriesAndSource{contents, source}, err
})

if shared {
tch.singleFlightShared.Inc()
}

// The value from our singleflightDo closure is always non-nil, so we don't
// need an err != nil check here.
return innerContents.entries, innerContents.source, err
Expand All @@ -344,23 +363,27 @@ func (tch *tileCachingHandler) getAndCacheTileUncollapsed(ctx context.Context, t
}

if !errors.Is(err, noSuchKey{}) {
tch.requestsMetric.WithLabelValues("error", "s3_get").Inc()
return nil, sourceS3, fmt.Errorf("error reading tile from s3: %w", err)
}

contents, err = getTileFromBackend(ctx, tile)
if err != nil {
tch.requestsMetric.WithLabelValues("error", "ct_log_get").Inc()
return nil, sourceCTLog, fmt.Errorf("error reading tile from backend: %w", err)
}

// If we got a partial tile, assume we are at the end of the log and the last
// tile isn't filled up yet. In that case, don't write to S3, but still return
// results to the user.
if tch.isPartialTile(contents) {
tch.partialTiles.Inc()
return contents, sourceCTLog, nil
}

err = tch.writeToS3(ctx, tile, contents)
if err != nil {
tch.requestsMetric.WithLabelValues("error", "s3_put").Inc()
return nil, sourceCTLog, fmt.Errorf("error writing tile to S3: %w", err)
}
return contents, sourceCTLog, nil
Expand All @@ -387,7 +410,8 @@ func main() {
tileSize := flag.Int("tile-size", 0, "tile size. Must match the value used by the backend")
s3bucket := flag.String("s3-bucket", "", "s3 bucket to use for caching")
s3prefix := flag.String("s3-prefix", "", "prefix for s3 keys. defaults to value of -backend")
listenAddress := flag.String("listen-address", ":8080", "address to listen on")
listenAddress := flag.String("listen-address", ":7962", "address to listen on")
metricsAddress := flag.String("metrics-address", ":7963", "address to listen on")

// fullRequestTimeout is the max allowed time the handler can read from S3 and return or read from S3, read from backend, write to S3, and return.
fullRequestTimeout := flag.Duration("full-request-timeout", 4*time.Second, "max time to spend in the HTTP handler")
Expand Down Expand Up @@ -420,13 +444,41 @@ func main() {
}
svc := s3.NewFromConfig(cfg)

promRegistry := newStatsRegistry(*metricsAddress)

requestsMetric := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "requests",
Help: "total number of requests, by result and source",
},
[]string{"result", "source"},
)
promRegistry.MustRegister(requestsMetric)

partialTiles := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "partial_tiles",
Help: "number of requests not cached due to partial tile returned from CT log",
})
promRegistry.MustRegister(partialTiles)

singleFlightShared := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "single_flight_shared",
Help: "how many inbound requests were coalesced into a single set of backend requests",
})
promRegistry.MustRegister(singleFlightShared)

handler := &tileCachingHandler{
logURL: *logURL,
tileSize: *tileSize,
s3Service: svc,
s3Prefix: *s3prefix,
s3Bucket: *s3bucket,
cacheGroup: &singleflight.Group{},
logURL: *logURL,
tileSize: *tileSize,
s3Service: svc,
s3Prefix: *s3prefix,
s3Bucket: *s3bucket,
cacheGroup: &singleflight.Group{},
requestsMetric: requestsMetric,
partialTiles: partialTiles,
singleFlightShared: singleFlightShared,
}

srv := http.Server{
Expand All @@ -440,3 +492,27 @@ func main() {

log.Fatal(srv.ListenAndServe())
}

func newStatsRegistry(listenAddress string) prometheus.Registerer {
registry := prometheus.NewRegistry()
registry.MustRegister(collectors.NewGoCollector())
registry.MustRegister(collectors.NewProcessCollector(
collectors.ProcessCollectorOpts{}))

server := http.Server{
Addr: listenAddress,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
IdleTimeout: 5 * time.Minute,
ReadHeaderTimeout: 2 * time.Second,
Handler: promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
}
go func() {
err := server.ListenAndServe()
if err != nil {
log.Printf("unable to start metrics server on %s: %s\n", listenAddress, err)
os.Exit(1)
}
}()
return registry
}

0 comments on commit 04859aa

Please sign in to comment.