grafana
diff --git a/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎cache/cache.go
Lines changed: 13 additions & 0 deletions b/‎cache/cache.go
Lines changed: 13 additions & 0 deletions
diff --git a/‎cache/client.go
Lines changed: 46 additions & 17 deletions b/‎cache/client.go
Lines changed: 46 additions & 17 deletions
diff --git a/‎cache/compression.go
Lines changed: 8 additions & 0 deletions b/‎cache/compression.go
Lines changed: 8 additions & 0 deletions
diff --git a/‎cache/lru.go
Lines changed: 35 additions & 0 deletions b/‎cache/lru.go
Lines changed: 35 additions & 0 deletions
diff --git a/‎cache/lru_test.go
Lines changed: 33 additions & 0 deletions b/‎cache/lru_test.go
Lines changed: 33 additions & 0 deletions
diff --git a/‎cache/memcached_client.go
Lines changed: 60 additions & 7 deletions b/‎cache/memcached_client.go
Lines changed: 60 additions & 7 deletions
@@ -230,6 +230,7 @@
 * [ENHANCEMENT] Runtimeconfig: support gzip-compressed files with `.gz` extension. #571
 * [ENHANCEMENT] grpcclient: Support custom gRPC compressors. #583
 * [ENHANCEMENT] Adapt `metrics.SendSumOfGaugesPerTenant` to use `metrics.MetricOption`. #584
+* [ENHANCEMENT] Cache: Add `.Add()` and `.Set()` methods to cache clients. #591
 * [CHANGE] Backoff: added `Backoff.ErrCause()` which is like `Backoff.Err()` but returns the context cause if backoff is terminated because the context has been canceled. #538
 * [BUGFIX] spanlogger: Support multiple tenant IDs. #59
 * [BUGFIX] Memberlist: fixed corrupted packets when sending compound messages with more than 255 messages or messages bigger than 64KB. #85
 
@@ -13,6 +13,11 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )
 
+var (
+	ErrNotStored  = errors.New("item not stored")
+	ErrInvalidTTL = errors.New("invalid TTL")
+)
+
 // Cache is a high level interface to interact with a cache.
 type Cache interface {
 	// GetMulti fetches multiple keys at once from a cache. In case of error,
@@ -28,6 +33,14 @@ type Cache interface {
 	// any underlying async operations fail, the errors will be tracked/logged.
 	SetMultiAsync(data map[string][]byte, ttl time.Duration)
 
+	// Set stores a key and value into a cache.
+	Set(ctx context.Context, key string, value []byte, ttl time.Duration) error
+
+	// Add stores a key and value into a cache only if it does not already exist. If the
+	// item was not stored because an entry already exists in the cache, ErrNotStored will
+	// be returned.
+	Add(ctx context.Context, key string, value []byte, ttl time.Duration) error
+
 	// Delete deletes a key from a cache. This is a synchronous operation. If an asynchronous
 	// set operation for key is still pending to be processed, it will wait for it to complete
 	// before performing deletion.
 
@@ -17,6 +17,7 @@ import (
 // Common functionality shared between the Memcached and Redis Cache implementations
 
 const (
+	opAdd            = "add"
 	opSet            = "set"
 	opGetMulti       = "getmulti"
 	opDelete         = "delete"
@@ -29,6 +30,8 @@ const (
 	reasonMaxItemSize     = "max-item-size"
 	reasonAsyncBufferFull = "async-buffer-full"
 	reasonMalformedKey    = "malformed-key"
+	reasonInvalidTTL      = "invalid-ttl"
+	reasonNotStored       = "not-stored"
 	reasonConnectTimeout  = "connect-timeout"
 	reasonTimeout         = "request-timeout"
 	reasonServerError     = "server-error"
@@ -74,6 +77,7 @@ func newClientMetrics(reg prometheus.Registerer) *clientMetrics {
 		Help: "Total number of operations against cache.",
 	}, []string{"operation"})
 	cm.operations.WithLabelValues(opGetMulti)
+	cm.operations.WithLabelValues(opAdd)
 	cm.operations.WithLabelValues(opSet)
 	cm.operations.WithLabelValues(opDelete)
 	cm.operations.WithLabelValues(opIncrement)
@@ -85,10 +89,12 @@ func newClientMetrics(reg prometheus.Registerer) *clientMetrics {
 		Name: "operation_failures_total",
 		Help: "Total number of operations against cache that failed.",
 	}, []string{"operation", "reason"})
-	for _, op := range []string{opGetMulti, opSet, opDelete, opIncrement, opFlush, opTouch, opCompareAndSwap} {
+	for _, op := range []string{opGetMulti, opAdd, opSet, opDelete, opIncrement, opFlush, opTouch, opCompareAndSwap} {
 		cm.failures.WithLabelValues(op, reasonConnectTimeout)
 		cm.failures.WithLabelValues(op, reasonTimeout)
 		cm.failures.WithLabelValues(op, reasonMalformedKey)
+		cm.failures.WithLabelValues(op, reasonInvalidTTL)
+		cm.failures.WithLabelValues(op, reasonNotStored)
 		cm.failures.WithLabelValues(op, reasonServerError)
 		cm.failures.WithLabelValues(op, reasonNetworkError)
 		cm.failures.WithLabelValues(op, reasonOther)
@@ -99,6 +105,7 @@ func newClientMetrics(reg prometheus.Registerer) *clientMetrics {
 		Help: "Total number of operations against cache that have been skipped.",
 	}, []string{"operation", "reason"})
 	cm.skipped.WithLabelValues(opGetMulti, reasonMaxItemSize)
+	cm.skipped.WithLabelValues(opAdd, reasonMaxItemSize)
 	cm.skipped.WithLabelValues(opSet, reasonMaxItemSize)
 	cm.skipped.WithLabelValues(opSet, reasonAsyncBufferFull)
 
@@ -112,6 +119,7 @@ func newClientMetrics(reg prometheus.Registerer) *clientMetrics {
 		NativeHistogramMinResetDuration: time.Hour,
 	}, []string{"operation"})
 	cm.duration.WithLabelValues(opGetMulti)
+	cm.duration.WithLabelValues(opAdd)
 	cm.duration.WithLabelValues(opSet)
 	cm.duration.WithLabelValues(opDelete)
 	cm.duration.WithLabelValues(opIncrement)
@@ -129,6 +137,7 @@ func newClientMetrics(reg prometheus.Registerer) *clientMetrics {
 		[]string{"operation"},
 	)
 	cm.dataSize.WithLabelValues(opGetMulti)
+	cm.dataSize.WithLabelValues(opAdd)
 	cm.dataSize.WithLabelValues(opSet)
 	cm.dataSize.WithLabelValues(opCompareAndSwap)
 
@@ -172,22 +181,12 @@ func (c *baseClient) setAsync(key string, value []byte, ttl time.Duration, f fun
 	}
 
 	err := c.asyncQueue.submit(func() {
-		start := time.Now()
-		c.metrics.operations.WithLabelValues(opSet).Inc()
-
-		err := f(key, value, ttl)
-		if err != nil {
-			level.Debug(c.logger).Log(
-				"msg", "failed to store item to cache",
-				"key", key,
-				"sizeBytes", len(value),
-				"err", err,
-			)
-			c.trackError(opSet, err)
-		}
-
-		c.metrics.dataSize.WithLabelValues(opSet).Observe(float64(len(value)))
-		c.metrics.duration.WithLabelValues(opSet).Observe(time.Since(start).Seconds())
+		// Because this operation is executed in a separate goroutine: We run the operation without
+		// a context (it is expected to keep running no matter what happens) and we don't return the
+		// error (it will be tracked via metrics instead of being returned to the caller).
+		_ = c.storeOperation(context.Background(), key, value, ttl, opSet, func(_ context.Context, key string, value []byte, ttl time.Duration) error {
+			return f(key, value, ttl)
+		})
 	})
 
 	if err != nil {
@@ -196,6 +195,32 @@ func (c *baseClient) setAsync(key string, value []byte, ttl time.Duration, f fun
 	}
 }
 
+func (c *baseClient) storeOperation(ctx context.Context, key string, value []byte, ttl time.Duration, operation string, f func(ctx context.Context, key string, value []byte, ttl time.Duration) error) error {
+	if c.maxItemSize > 0 && uint64(len(value)) > c.maxItemSize {
+		c.metrics.skipped.WithLabelValues(operation, reasonMaxItemSize).Inc()
+		return nil
+	}
+
+	start := time.Now()
+	c.metrics.operations.WithLabelValues(operation).Inc()
+
+	err := f(ctx, key, value, ttl)
+	if err != nil {
+		level.Debug(c.logger).Log(
+			"msg", "failed to store item to cache",
+			"operation", operation,
+			"key", key,
+			"sizeBytes", len(value),
+			"err", err,
+		)
+		c.trackError(operation, err)
+	}
+
+	c.metrics.dataSize.WithLabelValues(operation).Observe(float64(len(value)))
+	c.metrics.duration.WithLabelValues(operation).Observe(time.Since(start).Seconds())
+	return err
+}
+
 // wait submits an async task and blocks until it completes. This can be used during
 // tests to ensure that async "sets" have completed before attempting to read them.
 func (c *baseClient) wait() error {
@@ -255,6 +280,10 @@ func (c *baseClient) trackError(op string, err error) {
 		} else {
 			c.metrics.failures.WithLabelValues(op, reasonNetworkError).Inc()
 		}
+	case errors.Is(err, ErrNotStored):
+		c.metrics.failures.WithLabelValues(op, reasonNotStored).Inc()
+	case errors.Is(err, ErrInvalidTTL):
+		c.metrics.failures.WithLabelValues(op, reasonInvalidTTL).Inc()
 	case errors.Is(err, memcache.ErrMalformedKey):
 		c.metrics.failures.WithLabelValues(op, reasonMalformedKey).Inc()
 	case errors.Is(err, memcache.ErrServerError):
 
@@ -85,6 +85,14 @@ func (s *SnappyCache) SetMultiAsync(data map[string][]byte, ttl time.Duration) {
 	s.next.SetMultiAsync(encoded, ttl)
 }
 
+func (s *SnappyCache) Set(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+	return s.next.Set(ctx, key, snappy.Encode(nil, value), ttl)
+}
+
+func (s *SnappyCache) Add(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+	return s.next.Add(ctx, key, snappy.Encode(nil, value), ttl)
+}
+
 // GetMulti implements Cache.
 func (s *SnappyCache) GetMulti(ctx context.Context, keys []string, opts ...Option) map[string][]byte {
 	found := s.next.GetMulti(ctx, keys, opts...)
 
@@ -103,6 +103,41 @@ func (l *LRUCache) SetMultiAsync(data map[string][]byte, ttl time.Duration) {
 	}
 }
 
+func (l *LRUCache) Set(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+	err := l.c.Set(ctx, key, value, ttl)
+
+	l.mtx.Lock()
+	defer l.mtx.Unlock()
+
+	expires := time.Now().Add(ttl)
+	l.lru.Add(key, &Item{
+		Data:      value,
+		ExpiresAt: expires,
+	})
+
+	return err
+}
+
+func (l *LRUCache) Add(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+	err := l.c.Add(ctx, key, value, ttl)
+
+	// When a caller uses the Add method, the presence of absence of an entry in the cache
+	// has significance. In order to maintain the semantics of that, we only add an entry to
+	// the LRU when it was able to be successfully added to the shared cache.
+	if err == nil {
+		l.mtx.Lock()
+		defer l.mtx.Unlock()
+
+		expires := time.Now().Add(ttl)
+		l.lru.Add(key, &Item{
+			Data:      value,
+			ExpiresAt: expires,
+		})
+	}
+
+	return err
+}
+
 func (l *LRUCache) GetMulti(ctx context.Context, keys []string, opts ...Option) (result map[string][]byte) {
 	l.requests.Add(float64(len(keys)))
 	l.mtx.Lock()
 
@@ -94,3 +94,36 @@ func TestLRUCache_Evictions(t *testing.T) {
 		cache_memory_items_count{name="test"} 2
 	`), "cache_memory_items_count"))
 }
+
+func TestLRUCache_SetAdd(t *testing.T) {
+	const maxItems = 10
+
+	ctx := context.Background()
+	reg := prometheus.NewPedanticRegistry()
+	lru, err := WrapWithLRUCache(NewMockCache(), "test", reg, maxItems, 2*time.Hour)
+	require.NoError(t, err)
+
+	// Trying to .Add() a key that already exists should result in an error
+	require.NoError(t, lru.Set(ctx, "key_1", []byte("value_1"), time.Minute))
+	require.NoError(t, lru.Set(ctx, "key_2", []byte("value_2"), time.Minute))
+	require.NoError(t, lru.Set(ctx, "key_3", []byte("value_3"), time.Minute))
+	require.ErrorIs(t, lru.Add(ctx, "key_1", []byte("value_1_2"), time.Minute), ErrNotStored)
+
+	require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
+		# HELP cache_memory_items_count Total number of items currently in the in-memory cache.
+		# TYPE cache_memory_items_count gauge
+		cache_memory_items_count{name="test"} 3
+	`), "cache_memory_items_count"))
+
+	result := lru.GetMulti(ctx, []string{"key_1", "key_2", "key_3"})
+	require.Equal(t, map[string][]byte{
+		"key_1": []byte("value_1"),
+		"key_2": []byte("value_2"),
+		"key_3": []byte("value_3"),
+	}, result)
+
+	// Ensure we cache back entries from the underlying cache.
+	item, ok := lru.lru.Get("key_1")
+	require.True(t, ok, "expected to fetch %s from inner LRU cache, got %+v", "key_1", item)
+	require.Equal(t, []byte("value_1"), item.Data)
+}
@@ -28,6 +28,7 @@ import (
 
 const (
 	dnsProviderUpdateInterval = 30 * time.Second
+	maxTTL                    = 30 * 24 * time.Hour
 )
 
 var (
@@ -43,6 +44,7 @@ var (
 type memcachedClientBackend interface {
 	GetMulti(keys []string, opts ...memcache.Option) (map[string]*memcache.Item, error)
 	Set(item *memcache.Item) error
+	Add(item *memcache.Item) error
 	Delete(key string) error
 	Decrement(key string, delta uint64) (uint64, error)
 	Increment(key string, delta uint64) (uint64, error)
@@ -322,14 +324,47 @@ func (c *MemcachedClient) SetAsync(key string, value []byte, ttl time.Duration)
 	c.setAsync(key, value, ttl, c.setSingleItem)
 }
 
+func (c *MemcachedClient) Set(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+	return c.storeOperation(ctx, key, value, ttl, opSet, func(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			return c.setSingleItem(key, value, ttl)
+		}
+	})
+}
+
+func (c *MemcachedClient) Add(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+	return c.storeOperation(ctx, key, value, ttl, opAdd, func(ctx context.Context, key string, value []byte, ttl time.Duration) error {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			ttlSeconds, ok := toSeconds(ttl)
+			if !ok {
+				return fmt.Errorf("%w: for set operation on %s %s", ErrInvalidTTL, key, ttl)
+			}
+
+			err := c.client.Add(&memcache.Item{
+				Key:        key,
+				Value:      value,
+				Expiration: ttlSeconds,
+			})
+
+			if errors.Is(err, memcache.ErrNotStored) {
+				return fmt.Errorf("%w: for add operation on %s", ErrNotStored, key)
+			}
+
+			return err
+		}
+	})
+}
+
 func (c *MemcachedClient) setSingleItem(key string, value []byte, ttl time.Duration) error {
-	ttlSeconds := int32(ttl.Seconds())
-	// If a TTL of exactly 0 is passed, we honor it and pass it to Memcached which will
-	// interpret it as an infinite TTL. However, if we get a non-zero TTL that is truncated
-	// to 0 seconds, we discard the update since the caller didn't intend to set an infinite
-	// TTL.
-	if ttl != 0 && ttlSeconds <= 0 {
-		return nil
+	ttlSeconds, ok := toSeconds(ttl)
+	if !ok {
+		return fmt.Errorf("%w: for set operation on %s %s", ErrInvalidTTL, key, ttl)
 	}
 
 	return c.client.Set(&memcache.Item{
@@ -339,6 +374,24 @@ func (c *MemcachedClient) setSingleItem(key string, value []byte, ttl time.Durat
 	})
 }
 
+// toSeconds converts a time.Duration to seconds as an int32 and returns a boolean
+// indicating if the value is valid to be used as a TTL. Durations might not be valid
+// to be used for a TTL if they are non-zero but less than a second long (Memcached
+// uses seconds for TTL units but "0" to mean infinite TTL) or if they are longer than
+// 30 days (Memcached treats TTLs more than 30 days as UNIX timestamps).
+func toSeconds(d time.Duration) (int32, bool) {
+	if d > maxTTL {
+		return 0, false
+	}
+
+	secs := int32(d.Seconds())
+	if d != 0 && secs <= 0 {
+		return 0, false
+	}
+
+	return secs, true
+}
+
 func toMemcacheOptions(opts ...Option) []memcache.Option {
 	if len(opts) == 0 {
 		return nil