Skip to content

Commit

Permalink
Add AC hit rate metrics with prometheus labels
Browse files Browse the repository at this point in the history
This is a draft, not ready to be merged. I wait with test cases until
getting feedback about the main functionality.

Same functionality as in #350

 - Prometheus counter for cache hit ratio of only AC requests.

 - Support for prometheus labels based on custom HTTP and gRPC headers.

but implemented in an alternative way:

 - disk.io implements two interfaces: cache.CasAcCache, cache.Stats

 - cache.metricsdecorator is decorator for cache.CasAcCache and
   provides prometheus metrics.

Pros with this alternative implementation:

 - Should allow non-prometheus metrics as requested in
   #355

 - Avoid the question about if the counters should be placed in
   disk.go or http.go/grpc*.go. If placing in disk.go, there are
   issues about how to avoid incrementing counter twice for the
   same request (both in Get and in GetValidatedActionResult)
   and at the same time count found AC but missing CAS, as cache miss.

 - Makes headers available also for logic in disk.go, which could be
   useful for other functionality in the future.

 - Metrics can be separated from logging, and still not require
   injecting counter increment invocations in tons of places.
   Incrementing from a few well defined places minimize the risk
   for bugs in the metrics.
  • Loading branch information
ulrfa committed Sep 26, 2020
1 parent 0a6dd55 commit 4b2b8b2
Show file tree
Hide file tree
Showing 19 changed files with 478 additions and 131 deletions.
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ go_library(
"//cache/gcsproxy:go_default_library",
"//cache/httpproxy:go_default_library",
"//cache/s3proxy:go_default_library",
"//cache/metricsdecorator:go_default_library",
"//config:go_default_library",
"//server:go_default_library",
"//utils/idle:go_default_library",
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,23 @@ host: localhost

# If true, enable experimental remote asset API support:
#experimental_remote_asset_api: true

# Allows mapping HTTP and gRPC headers to prometheus
# labels. Headers can be set by bazel client as:
# --remote_header=os=ubuntu18-04. Not all counters are
# affected.
#metrics:
# categories:
# os:
# - rhel7
# - rhel8
# - ubuntu16-04
# - ubuntu18-04
# branch:
# - master
# user:
# - ci

```

## Docker
Expand Down
3 changes: 3 additions & 0 deletions cache/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ go_library(
srcs = ["cache.go"],
importpath = "github.com/buchgr/bazel-remote/cache",
visibility = ["//visibility:public"],
deps = [
"@com_github_bazelbuild_remote_apis//build/bazel/remote/execution/v2:go_default_library",
],
)
43 changes: 43 additions & 0 deletions cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"encoding/hex"
"io"
"path/filepath"

pb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2"
)

// EntryKind describes the kind of cache entry
Expand Down Expand Up @@ -50,6 +52,47 @@ func (e *Error) Error() string {
return e.Text
}

// Represent the context of an incoming request. For now it acts as an
// adapter providing a common interface to headers from HTTP and gRPC
// requests. In the future it could be extend if additional
// information needs to be associated with a request, or be propagated
// from HTTP/gRPC servers towards disk cache, or perhaps further
// to proxies.
type RequestContext interface {
// Return values for HTTP/gRPC header in the associated
// request. Returns a slice since there could be several
// headers with same name. Returns empty slice if no
// headers exist with the requested name.
// The headerName is expected in lowercase.
GetHeader(headerName string) (headerValues []string)
}

// TODO Document interface
type CasAcCache interface {
// TODO change to io.ReadCloser?
Put(kind EntryKind, hash string, size int64, rdr io.Reader, reqCtx RequestContext) error

Get(kind EntryKind, hash string, size int64, reqCtx RequestContext) (io.ReadCloser, int64, error)

Contains(kind EntryKind, hash string, size int64, reqCtx RequestContext) (bool, int64)

GetValidatedActionResult(hash string, reqCtx RequestContext) (*pb.ActionResult, []byte, error)
}

// TODO Document interface
type Stats interface {
Stats() (totalSize int64, reservedSize int64, numItems int)
MaxSize() int64
}

// TODO Should the proxy interface also be extended with RequestContext parameter? To allow
// for example forwarding of custom headers from client to proxy, or support for HTTP
// headers like Max-Forwards.

// TODO Could the disk and proxies implement same interface? But proxies are not supporting
// GetValidatedActionResult and that method is important to have in the interface
// for cache.metricdecorator.

// Proxy is the interface that (optional) proxy backends must implement.
// Implementations are expected to be safe for concurrent use.
type Proxy interface {
Expand Down
23 changes: 12 additions & 11 deletions cache/disk/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/golang/protobuf/proto"
)

// TODO remove these counters?
var (
cacheHits = promauto.NewCounter(prometheus.CounterOpts{
Name: "bazel_remote_disk_cache_hits",
Expand Down Expand Up @@ -230,7 +231,7 @@ func (c *Cache) loadExistingFiles() error {
// Put stores a stream of `size` bytes from `r` into the cache.
// If `hash` is not the empty string, and the contents don't match it,
// a non-nil error is returned.
func (c *Cache) Put(kind cache.EntryKind, hash string, size int64, r io.Reader) (rErr error) {
func (c *Cache) Put(kind cache.EntryKind, hash string, size int64, r io.Reader, reqCtx cache.RequestContext) (rErr error) {
if size < 0 {
return fmt.Errorf("Invalid (negative) size: %d", size)
}
Expand Down Expand Up @@ -471,7 +472,7 @@ func (c *Cache) availableOrTryProxy(key string, size int64, blobPath string) (rc
// item is not found, the io.ReadCloser will be nil. If some error occurred
// when processing the request, then it is returned. Callers should provide
// the `size` of the item to be retrieved, or -1 if unknown.
func (c *Cache) Get(kind cache.EntryKind, hash string, size int64) (rc io.ReadCloser, s int64, rErr error) {
func (c *Cache) Get(kind cache.EntryKind, hash string, size int64, reqCtx cache.RequestContext) (rc io.ReadCloser, s int64, rErr error) {

// The hash format is checked properly in the http/grpc code.
// Just perform a simple/fast check here, to catch bad tests.
Expand Down Expand Up @@ -575,7 +576,7 @@ func (c *Cache) Get(kind cache.EntryKind, hash string, size int64) (rc io.ReadCl
// one) will be checked.
//
// Callers should provide the `size` of the item, or -1 if unknown.
func (c *Cache) Contains(kind cache.EntryKind, hash string, size int64) (bool, int64) {
func (c *Cache) Contains(kind cache.EntryKind, hash string, size int64, reqCtx cache.RequestContext) (bool, int64) {

// The hash format is checked properly in the http/grpc code.
// Just perform a simple/fast check here, to catch bad tests.
Expand Down Expand Up @@ -648,9 +649,9 @@ func cacheFilePath(kind cache.EntryKind, cacheDir string, hash string) string {
// value from the CAS if it and all its dependencies are also available. If
// not, nil values are returned. If something unexpected went wrong, return
// an error.
func (c *Cache) GetValidatedActionResult(hash string) (*pb.ActionResult, []byte, error) {
func (c *Cache) GetValidatedActionResult(hash string, reqCtx cache.RequestContext) (*pb.ActionResult, []byte, error) {

rc, sizeBytes, err := c.Get(cache.AC, hash, -1)
rc, sizeBytes, err := c.Get(cache.AC, hash, -1, reqCtx)
if rc != nil {
defer rc.Close()
}
Expand All @@ -675,15 +676,15 @@ func (c *Cache) GetValidatedActionResult(hash string) (*pb.ActionResult, []byte,

for _, f := range result.OutputFiles {
if len(f.Contents) == 0 {
found, _ := c.Contains(cache.CAS, f.Digest.Hash, f.Digest.SizeBytes)
found, _ := c.Contains(cache.CAS, f.Digest.Hash, f.Digest.SizeBytes, reqCtx)
if !found {
return nil, nil, nil // aka "not found"
}
}
}

for _, d := range result.OutputDirectories {
r, size, err := c.Get(cache.CAS, d.TreeDigest.Hash, d.TreeDigest.SizeBytes)
r, size, err := c.Get(cache.CAS, d.TreeDigest.Hash, d.TreeDigest.SizeBytes, reqCtx)
if r == nil {
return nil, nil, err // aka "not found", or an err if non-nil
}
Expand Down Expand Up @@ -714,7 +715,7 @@ func (c *Cache) GetValidatedActionResult(hash string) (*pb.ActionResult, []byte,
if f.Digest == nil {
continue
}
found, _ := c.Contains(cache.CAS, f.Digest.Hash, f.Digest.SizeBytes)
found, _ := c.Contains(cache.CAS, f.Digest.Hash, f.Digest.SizeBytes, reqCtx)
if !found {
return nil, nil, nil // aka "not found"
}
Expand All @@ -725,7 +726,7 @@ func (c *Cache) GetValidatedActionResult(hash string) (*pb.ActionResult, []byte,
if f.Digest == nil {
continue
}
found, _ := c.Contains(cache.CAS, f.Digest.Hash, f.Digest.SizeBytes)
found, _ := c.Contains(cache.CAS, f.Digest.Hash, f.Digest.SizeBytes, reqCtx)
if !found {
return nil, nil, nil // aka "not found"
}
Expand All @@ -734,14 +735,14 @@ func (c *Cache) GetValidatedActionResult(hash string) (*pb.ActionResult, []byte,
}

if result.StdoutDigest != nil {
found, _ := c.Contains(cache.CAS, result.StdoutDigest.Hash, result.StdoutDigest.SizeBytes)
found, _ := c.Contains(cache.CAS, result.StdoutDigest.Hash, result.StdoutDigest.SizeBytes, reqCtx)
if !found {
return nil, nil, nil // aka "not found"
}
}

if result.StderrDigest != nil {
found, _ := c.Contains(cache.CAS, result.StderrDigest.Hash, result.StderrDigest.SizeBytes)
found, _ := c.Contains(cache.CAS, result.StderrDigest.Hash, result.StderrDigest.SizeBytes, reqCtx)
if !found {
return nil, nil, nil // aka "not found"
}
Expand Down
Loading

0 comments on commit 4b2b8b2

Please sign in to comment.