From cb79ed18909d00b274cdd92fceeb6a54e35396ac Mon Sep 17 00:00:00 2001 From: Joel Jeske Date: Wed, 30 Mar 2022 14:59:10 -0500 Subject: [PATCH] [metrics] add lru evict atime metric --- cache/disk/disk.go | 37 +++++++++++++++++++++++-------------- cache/disk/lru.go | 42 +++++++++++++++++++++++++++++++++++------- cache/disk/lru_test.go | 18 +++++++++--------- 3 files changed, 67 insertions(+), 30 deletions(-) diff --git a/cache/disk/disk.go b/cache/disk/disk.go index 395d109cc..5c64907f9 100644 --- a/cache/disk/disk.go +++ b/cache/disk/disk.go @@ -20,6 +20,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/buchgr/bazel-remote/cache" "github.com/buchgr/bazel-remote/cache/disk/casblob" @@ -143,24 +144,17 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) { // This function is only called while the lock is held // by the current goroutine. onEvict := func(key Key, value lruItem) { - ks := key.(string) - hash := ks[len(ks)-sha256.Size*2:] - var kind cache.EntryKind = cache.AC - if strings.HasPrefix(ks, "cas") { - kind = cache.CAS - } else if strings.HasPrefix(ks, "ac") { - kind = cache.AC - } else if strings.HasPrefix(ks, "raw") { - kind = cache.RAW - } - - f := filepath.Join(dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random)) - + f := c.getElementPath(key, value) // Run in a goroutine so we can release the lock sooner. go c.removeFile(f) } - c.lru = NewSizedLRU(maxSizeBytes, onEvict) + getElementAtime := func(key Key, value lruItem) (time.Time, error) { + f := c.getElementPath(key, value) + return atime.Stat(f) + } + + c.lru = NewSizedLRU(maxSizeBytes, onEvict, getElementAtime) // Apply options. for _, o := range opts { @@ -213,6 +207,21 @@ func (c *diskCache) RegisterMetrics() { c.lru.RegisterMetrics() } +func (c *diskCache) getElementPath(key Key, value lruItem) string { + ks := key.(string) + hash := ks[len(ks)-sha256.Size*2:] + var kind cache.EntryKind = cache.AC + if strings.HasPrefix(ks, "cas") { + kind = cache.CAS + } else if strings.HasPrefix(ks, "ac") { + kind = cache.AC + } else if strings.HasPrefix(ks, "raw") { + kind = cache.RAW + } + + return filepath.Join(c.dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random)) +} + func (c *diskCache) removeFile(f string) { if err := c.fileRemovalSem.Acquire(context.Background(), 1); err != nil { log.Printf("ERROR: failed to aquire semaphore: %v, unable to remove %s", err, f) diff --git a/cache/disk/lru.go b/cache/disk/lru.go index f3de12aa9..8e48342e1 100644 --- a/cache/disk/lru.go +++ b/cache/disk/lru.go @@ -4,6 +4,7 @@ import ( "container/list" "errors" "fmt" + "time" "github.com/prometheus/client_golang/prometheus" ) @@ -19,6 +20,8 @@ type Key interface{} // EvictCallback is the type of callbacks that are invoked when items are evicted. type EvictCallback func(key Key, value lruItem) +type ElementAtimeFunc func(key Key, value lruItem) (time.Time, error) + // SizedLRU is an LRU cache that will keep its total size below maxSize by evicting // items. // SizedLRU is not thread-safe. @@ -44,6 +47,9 @@ type SizedLRU struct { onEvict EvictCallback + getElementAtime ElementAtimeFunc + + gaugeCacheAge prometheus.GaugeFunc gaugeCacheSizeBytes prometheus.Gauge gaugeCacheLogicalBytes prometheus.Gauge counterEvictedBytes prometheus.Counter @@ -60,13 +66,19 @@ type entry struct { const BlockSize = 4096 // NewSizedLRU returns a new SizedLRU cache -func NewSizedLRU(maxSize int64, onEvict EvictCallback) SizedLRU { - return SizedLRU{ - maxSize: maxSize, - ll: list.New(), - cache: make(map[interface{}]*list.Element), - onEvict: onEvict, - +func NewSizedLRU(maxSize int64, onEvict EvictCallback, getElementAtime ElementAtimeFunc) SizedLRU { + var c SizedLRU + c = SizedLRU{ + maxSize: maxSize, + ll: list.New(), + cache: make(map[interface{}]*list.Element), + onEvict: onEvict, + getElementAtime: getElementAtime, + + gaugeCacheAge: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: "bazel_remote_disk_cache_age_seconds", + Help: "The file `atime` of oldest item in the LRU cache. Depending on filemount opts (e.g. relatime), the resolution may be meausured in 'days' and not accurate to the second", + }, func() float64 { return c.getCacheAge() }), gaugeCacheSizeBytes: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "bazel_remote_disk_cache_size_bytes", Help: "The current number of bytes in the disk backend", @@ -84,9 +96,11 @@ func NewSizedLRU(maxSize int64, onEvict EvictCallback) SizedLRU { Help: "The total number of bytes removed from disk backend, due to put of already existing key", }), } + return c } func (c *SizedLRU) RegisterMetrics() { + prometheus.MustRegister(c.gaugeCacheAge) prometheus.MustRegister(c.gaugeCacheSizeBytes) prometheus.MustRegister(c.gaugeCacheLogicalBytes) prometheus.MustRegister(c.counterEvictedBytes) @@ -280,3 +294,17 @@ func (c *SizedLRU) removeElement(e *list.Element) { func roundUp4k(n int64) int64 { return (n + BlockSize - 1) & -BlockSize } + +// Get `now() - atime` of the back item in the LRU cache +func (c *SizedLRU) getCacheAge() float64 { + e := c.ll.Back() + if e != nil { + kv := e.Value.(*entry) + ts, err := c.getElementAtime(kv.key, kv.value) + if err != nil { + return time.Now().Sub(ts).Seconds() + } + } + + return 0.0 +} diff --git a/cache/disk/lru_test.go b/cache/disk/lru_test.go index 819379e7f..66a0de01d 100644 --- a/cache/disk/lru_test.go +++ b/cache/disk/lru_test.go @@ -20,7 +20,7 @@ func checkSizeAndNumItems(t *testing.T, lru SizedLRU, expSize int64, expNum int) func TestBasics(t *testing.T) { maxSize := int64(BlockSize) - lru := NewSizedLRU(maxSize, nil) + lru := NewSizedLRU(maxSize, nil, nil) // Empty cache if maxSize != lru.MaxSize() { @@ -64,7 +64,7 @@ func TestEviction(t *testing.T) { evictions = append(evictions, key.(int)) } - lru := NewSizedLRU(10*BlockSize, onEvict) + lru := NewSizedLRU(10*BlockSize, onEvict, nil) expectedSizesNumItems := []struct { expBlocks int64 @@ -101,7 +101,7 @@ func TestEviction(t *testing.T) { func TestRejectBigItem(t *testing.T) { // Bounded caches should reject big items - lru := NewSizedLRU(10, nil) + lru := NewSizedLRU(10, nil, nil) ok := lru.Add("hello", lruItem{size: 11, sizeOnDisk: 11}) if ok { @@ -114,7 +114,7 @@ func TestRejectBigItem(t *testing.T) { func TestReserveZeroAlwaysPossible(t *testing.T) { largeItem := lruItem{size: math.MaxInt64, sizeOnDisk: math.MaxInt64} - lru := NewSizedLRU(math.MaxInt64, nil) + lru := NewSizedLRU(math.MaxInt64, nil, nil) lru.Add("foo", largeItem) ok, err := lru.Reserve(0) if err != nil { @@ -129,7 +129,7 @@ func TestReserveAtCapacity(t *testing.T) { var ok bool var err error - lru := NewSizedLRU(math.MaxInt64, nil) + lru := NewSizedLRU(math.MaxInt64, nil, nil) ok, err = lru.Reserve(math.MaxInt64) if err != nil { @@ -173,7 +173,7 @@ func TestReserveOverflow(t *testing.T) { var ok bool var err error - lru = NewSizedLRU(1, nil) + lru = NewSizedLRU(1, nil, nil) ok, err = lru.Reserve(1) if err != nil { @@ -191,7 +191,7 @@ func TestReserveOverflow(t *testing.T) { t.Fatal("Expected overflow") } - lru = NewSizedLRU(10, nil) + lru = NewSizedLRU(10, nil, nil) ok, err = lru.Reserve(math.MaxInt64) if err != nil { t.Fatal(err) @@ -206,7 +206,7 @@ func TestUnreserve(t *testing.T) { var err error cap := int64(10) - lru := NewSizedLRU(cap, nil) + lru := NewSizedLRU(cap, nil, nil) for i := int64(1); i <= cap; i++ { ok, err = lru.Reserve(1) @@ -245,7 +245,7 @@ func TestUnreserve(t *testing.T) { } func TestAddWithSpaceReserved(t *testing.T) { - lru := NewSizedLRU(roundUp4k(2), nil) + lru := NewSizedLRU(roundUp4k(2), nil, nil) ok, err := lru.Reserve(1) if err != nil {