Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics #13

Merged
merged 5 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/aws/aws-sdk-go-v2/credentials v1.13.35
github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5
github.com/fxamacker/cbor/v2 v2.5.0
github.com/prometheus/client_golang v1.16.0
golang.org/x/sync v0.3.0
)

Expand All @@ -26,5 +27,14 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.5 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 // indirect
github.com/aws/smithy-go v1.14.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/x448/float16 v0.8.4 // indirect
golang.org/x/sys v0.8.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
)
31 changes: 30 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,47 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 h1:CQBFElb0LS8RojMJlxRSo/HXipvT
github.com/aws/aws-sdk-go-v2/service/sts v1.21.5/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU=
github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ=
github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/fxamacker/cbor/v2 v2.5.0 h1:oHsG0V/Q6E/wqTS2O1Cozzsy69nqCiguo5Q1a1ADivE=
github.com/fxamacker/cbor/v2 v2.5.0/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4=
github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM=
github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc=
github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=
github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
5 changes: 5 additions & 0 deletions integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sync/singleflight"
)

Expand Down Expand Up @@ -149,6 +150,10 @@ func TestIntegration(t *testing.T) {
s3Bucket: "bucket",

cacheGroup: &singleflight.Group{},

requestsMetric: prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"result", "source"}),
partialTiles: prometheus.NewCounter(prometheus.CounterOpts{}),
singleFlightShared: prometheus.NewCounter(prometheus.CounterOpts{}),
}

// Invalid URL; should 404
Expand Down
94 changes: 85 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"log"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"time"
Expand All @@ -22,6 +23,9 @@ import (
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/fxamacker/cbor/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/sync/singleflight"
)

Expand Down Expand Up @@ -237,6 +241,10 @@ type tileCachingHandler struct {
s3Bucket string // The S3 bucket to use for caching tiles. Must not be empty.

cacheGroup *singleflight.Group // The singleflight.Group to use for deduplicating simultaneous requests (a.k.a. "request collapsing") for tiles. Must not be nil.

requestsMetric *prometheus.CounterVec
partialTiles prometheus.Counter
singleFlightShared prometheus.Counter
}

func (tch *tileCachingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -284,8 +292,9 @@ func (tch *tileCachingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
//
// When Trillian gets a request that is past the end of the log, it returns
// 400 (for better or worse), so we emulate that here.
tch.requestsMetric.WithLabelValues("error", "ct_log_past_end").Inc()
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte("requested range is past the end of the log"))
fmt.Fprintln(w, "requested range is past the end of the log")
jsha marked this conversation as resolved.
Show resolved Hide resolved
return
} else {
contents.Entries = contents.Entries[prefixToRemove:]
Expand All @@ -296,6 +305,12 @@ func (tch *tileCachingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
contents.Entries = contents.Entries[:requestedLen]
}

if w.Header().Get("X-Source") == "S3" {
tch.requestsMetric.WithLabelValues("success", "s3_get").Inc()
} else {
tch.requestsMetric.WithLabelValues("success", "ct_log_get").Inc()
}

w.Header().Set("X-Response-Len", fmt.Sprintf("%d", len(contents.Entries)))
w.WriteHeader(http.StatusOK)

Expand Down Expand Up @@ -325,11 +340,15 @@ func (tch *tileCachingHandler) getAndCacheTile(ctx context.Context, tile tile) (
source tileSource
}

innerContents, err, _ := singleflightDo(tch.cacheGroup, dedupKey, func() (entriesAndSource, error) {
innerContents, err, shared := singleflightDo(tch.cacheGroup, dedupKey, func() (entriesAndSource, error) {
contents, source, err := tch.getAndCacheTileUncollapsed(ctx, tile)
return entriesAndSource{contents, source}, err
})

if shared {
tch.singleFlightShared.Inc()
}

// The value from our singleflightDo closure is always non-nil, so we don't
// need an err != nil check here.
return innerContents.entries, innerContents.source, err
Expand All @@ -344,23 +363,27 @@ func (tch *tileCachingHandler) getAndCacheTileUncollapsed(ctx context.Context, t
}

if !errors.Is(err, noSuchKey{}) {
tch.requestsMetric.WithLabelValues("error", "s3_get").Inc()
return nil, sourceS3, fmt.Errorf("error reading tile from s3: %w", err)
}

contents, err = getTileFromBackend(ctx, tile)
if err != nil {
tch.requestsMetric.WithLabelValues("error", "ct_log_get").Inc()
return nil, sourceCTLog, fmt.Errorf("error reading tile from backend: %w", err)
}

// If we got a partial tile, assume we are at the end of the log and the last
// tile isn't filled up yet. In that case, don't write to S3, but still return
// results to the user.
if tch.isPartialTile(contents) {
tch.partialTiles.Inc()
return contents, sourceCTLog, nil
}

err = tch.writeToS3(ctx, tile, contents)
if err != nil {
tch.requestsMetric.WithLabelValues("error", "s3_put").Inc()
return nil, sourceCTLog, fmt.Errorf("error writing tile to S3: %w", err)
}
return contents, sourceCTLog, nil
Expand All @@ -387,7 +410,8 @@ func main() {
tileSize := flag.Int("tile-size", 0, "tile size. Must match the value used by the backend")
s3bucket := flag.String("s3-bucket", "", "s3 bucket to use for caching")
s3prefix := flag.String("s3-prefix", "", "prefix for s3 keys. defaults to value of -backend")
listenAddress := flag.String("listen-address", ":8080", "address to listen on")
listenAddress := flag.String("listen-address", ":7962", "address to listen on")
metricsAddress := flag.String("metrics-address", ":7963", "address to listen on for metrics")

// fullRequestTimeout is the max allowed time the handler can read from S3 and return or read from S3, read from backend, write to S3, and return.
fullRequestTimeout := flag.Duration("full-request-timeout", 4*time.Second, "max time to spend in the HTTP handler")
Expand Down Expand Up @@ -420,13 +444,41 @@ func main() {
}
svc := s3.NewFromConfig(cfg)

promRegistry := newStatsRegistry(*metricsAddress)

requestsMetric := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "ctile_requests",
Help: "total number of requests, by result and source",
},
[]string{"result", "source"},
jsha marked this conversation as resolved.
Show resolved Hide resolved
)
promRegistry.MustRegister(requestsMetric)

partialTiles := prometheus.NewCounter(
jsha marked this conversation as resolved.
Show resolved Hide resolved
prometheus.CounterOpts{
Name: "ctile_partial_tiles",
Help: "number of requests not cached due to partial tile returned from CT log",
})
promRegistry.MustRegister(partialTiles)

singleFlightShared := prometheus.NewCounter(
jsha marked this conversation as resolved.
Show resolved Hide resolved
prometheus.CounterOpts{
Name: "ctile_single_flight_shared",
Help: "number of inbound requests coalesced into a single set of backend requests",
})
promRegistry.MustRegister(singleFlightShared)

handler := &tileCachingHandler{
logURL: *logURL,
tileSize: *tileSize,
s3Service: svc,
s3Prefix: *s3prefix,
s3Bucket: *s3bucket,
cacheGroup: &singleflight.Group{},
logURL: *logURL,
tileSize: *tileSize,
s3Service: svc,
s3Prefix: *s3prefix,
s3Bucket: *s3bucket,
cacheGroup: &singleflight.Group{},
requestsMetric: requestsMetric,
partialTiles: partialTiles,
singleFlightShared: singleFlightShared,
}

srv := http.Server{
Expand All @@ -440,3 +492,27 @@ func main() {

log.Fatal(srv.ListenAndServe())
}

func newStatsRegistry(listenAddress string) prometheus.Registerer {
registry := prometheus.NewRegistry()
registry.MustRegister(collectors.NewGoCollector())
registry.MustRegister(collectors.NewProcessCollector(
collectors.ProcessCollectorOpts{}))

server := http.Server{
Addr: listenAddress,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
IdleTimeout: 5 * time.Minute,
ReadHeaderTimeout: 2 * time.Second,
Handler: promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
}
go func() {
err := server.ListenAndServe()
if err != nil {
log.Printf("unable to start metrics server on %s: %s\n", listenAddress, err)
os.Exit(1)
}
}()
return registry
}