Skip to content

Commit

Permalink
[metrics] add lru evict atime metric
Browse files Browse the repository at this point in the history
  • Loading branch information
joeljeske committed Apr 4, 2022
1 parent 8328d83 commit 37b3143
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 13 deletions.
78 changes: 65 additions & 13 deletions cache/disk/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"strconv"
"strings"
"sync"
"time"

"github.com/buchgr/bazel-remote/cache"
"github.com/buchgr/bazel-remote/cache/disk/casblob"
Expand All @@ -31,6 +32,8 @@ import (
pb "github.com/buchgr/bazel-remote/genproto/build/bazel/remote/execution/v2"
"google.golang.org/protobuf/proto"

"github.com/prometheus/client_golang/prometheus"

"golang.org/x/sync/semaphore"
)

Expand Down Expand Up @@ -83,6 +86,8 @@ type diskCache struct {

mu sync.Mutex
lru SizedLRU

gaugeCacheAge prometheus.Gauge
}

type nameAndInfo struct {
Expand Down Expand Up @@ -135,6 +140,11 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
// I suppose it's better to slow down processing than to crash
// when hitting the 10k limit or to run out of disk space.
fileRemovalSem: semaphore.NewWeighted(5000),

gaugeCacheAge: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "bazel_remote_disk_cache_age_seconds",
Help: "The file `atime` of oldest item in the LRU cache. Depending on filemount opts (e.g. relatime), the resolution may be meausured in 'days' and not accurate to the second",
}),
}

cc := CacheConfig{diskCache: &c}
Expand All @@ -143,19 +153,7 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
// This function is only called while the lock is held
// by the current goroutine.
onEvict := func(key Key, value lruItem) {
ks := key.(string)
hash := ks[len(ks)-sha256.Size*2:]
var kind cache.EntryKind = cache.AC
if strings.HasPrefix(ks, "cas") {
kind = cache.CAS
} else if strings.HasPrefix(ks, "ac") {
kind = cache.AC
} else if strings.HasPrefix(ks, "raw") {
kind = cache.RAW
}

f := filepath.Join(dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))

f := c.getElementPath(key, value)
// Run in a goroutine so we can release the lock sooner.
go c.removeFile(f)
}
Expand Down Expand Up @@ -211,6 +209,60 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
// Non-test users must call this to expose metrics.
func (c *diskCache) RegisterMetrics() {
c.lru.RegisterMetrics()

prometheus.MustRegister(c.gaugeCacheAge)

// Update the cache age metric on a static interval
// Note: this could be modeled as a GuageFunc that updates as needed
// but since the updater func must lock the cache mu, it was deemed
// necessary to have greater control of when to get the cache age
go c.pollCacheAge()
}

// Update metric every minute with the current age of the cache
func (c *diskCache) pollCacheAge() {
ticker := time.NewTicker(60 * time.Second)
for ; true; <-ticker.C {
c.updateCacheAgeMetric()
}
}

// Get the back item in the cache, and store its atime in a metric
func (c *diskCache) updateCacheAgeMetric() {
c.mu.Lock()

key, value := c.lru.PeakOldestCacheItem()
age := 0.0

if key != nil {
f := c.getElementPath(key, value)
ts, err := atime.Stat(f)

if err != nil {
log.Printf("ERROR: failed to determine cache age: %v, unable to stat %s", err, f)
} else {
age = time.Now().Sub(ts).Seconds()
}
}

c.mu.Unlock()

c.gaugeCacheAge.Set(age)
}

func (c *diskCache) getElementPath(key Key, value lruItem) string {
ks := key.(string)
hash := ks[len(ks)-sha256.Size*2:]
var kind cache.EntryKind = cache.AC
if strings.HasPrefix(ks, "cas") {
kind = cache.CAS
} else if strings.HasPrefix(ks, "ac") {
kind = cache.AC
} else if strings.HasPrefix(ks, "raw") {
kind = cache.RAW
}

return filepath.Join(c.dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))
}

func (c *diskCache) removeFile(f string) {
Expand Down
10 changes: 10 additions & 0 deletions cache/disk/lru.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,13 @@ func (c *SizedLRU) removeElement(e *list.Element) {
func roundUp4k(n int64) int64 {
return (n + BlockSize - 1) & -BlockSize
}

// Get the back item of the LRU cache.
func (c *SizedLRU) PeakOldestCacheItem() (Key, lruItem) {
ele := c.ll.Back()
if ele != nil {
kv := ele.Value.(*entry)
return kv.key, kv.value
}
return nil, lruItem{}
}

0 comments on commit 37b3143

Please sign in to comment.