Skip to content

Commit

Permalink
ruler: conditionally enable caching of rule group contents
Browse files Browse the repository at this point in the history
Add a new experimental flag to enable caching of rule group contents
using the rule store cache. Rule groups are cached using the same TTL
as rule group listings.

Part of #9386

Signed-off-by: Nick Pillitteri <nick.pillitteri@grafana.com>
  • Loading branch information
56quarters committed Oct 15, 2024
1 parent 197314a commit b033be5
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
* [FEATURE] Alertmanager: limit added for maximum size of the Grafana configuration (`-alertmanager.max-config-size-bytes`). #9402
* [FEATURE] Ingester: Experimental support for ingesting out-of-order native histograms. This is disabled by default and can be enabled by setting `-ingester.ooo-native-histograms-ingestion-enabled` to `true`. #7175
* [FEATURE] Distributor: Added `-api.skip-label-count-validation-header-enabled` option to allow skipping label count validation on the HTTP write path based on `X-Mimir-SkipLabelCountValidation` header being `true` or not. #9576
* [FEATURE] Ruler: Add experimental support for caching the contents of rule groups. This is disabled by default and can be enabled by setting `-ruler-storage.cache.rule-group-enabled`. #9595
* [ENHANCEMENT] mimirtool: Adds bearer token support for mimirtool's analyze ruler/prometheus commands. #9587
* [ENHANCEMENT] Ruler: Support `exclude_alerts` parameter in `<prometheus-http-prefix>/api/v1/rules` endpoint. #9300
* [ENHANCEMENT] Distributor: add a metric to track tenants who are sending newlines in their label values called `cortex_distributor_label_values_with_newlines_total`. #9400
Expand Down
11 changes: 11 additions & 0 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -13669,6 +13669,17 @@
"required": false,
"desc": "",
"blockEntries": [
{
"kind": "field",
"name": "rule_group_enabled",
"required": false,
"desc": "Enabling caching of rule group contents if a cache backend is configured.",
"fieldValue": null,
"fieldDefaultValue": false,
"fieldFlag": "ruler-storage.cache.rule-group-enabled",
"fieldType": "boolean",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "backend",
Expand Down
2 changes: 2 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -2497,6 +2497,8 @@ Usage of ./cmd/mimir/mimir:
[deprecated] Username to use when connecting to Redis.
-ruler-storage.cache.redis.write-timeout duration
[deprecated] Client write timeout. (default 3s)
-ruler-storage.cache.rule-group-enabled
[experimental] Enabling caching of rule group contents if a cache backend is configured.
-ruler-storage.filesystem.dir string
Local filesystem storage directory. (default "ruler")
-ruler-storage.gcs.bucket-name string
Expand Down
2 changes: 2 additions & 0 deletions docs/sources/mimir/configure/about-versioning.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ The following features are currently experimental:
- Allow control over rule sync intervals.
- `ruler.outbound-sync-queue-poll-interval`
- `ruler.inbound-sync-queue-poll-interval`
- Cache rule group contents.
- `-ruler-storage.cache.rule-group-enabled`
- Distributor
- Metrics relabeling
- `-distributor.metric-relabeling-enabled`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2166,6 +2166,11 @@ local:
[directory: <string> | default = ""]
cache:
# (experimental) Enabling caching of rule group contents if a cache backend is
# configured.
# CLI flag: -ruler-storage.cache.rule-group-enabled
[rule_group_enabled: <boolean> | default = false]
# Backend for ruler storage cache, if not empty. The cache is supported for
# any storage backend except "local". Supported values: memcached, redis.
# CLI flag: -ruler-storage.cache.backend
Expand Down
24 changes: 19 additions & 5 deletions pkg/ruler/rulestore/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,21 @@ type Config struct {
bucket.Config `yaml:",inline"`
Local LocalStoreConfig `yaml:"local"`

// RulerCache holds the configuration used for the ruler storage cache.
RulerCache RulerCacheConfig `yaml:"cache"`
}

// RulerCacheConfig is configuration for the cache used by ruler storage as well as
// additional ruler storage specific configuration.
//
// NOTE: This is temporary while caching of rule groups is being tested. This will be removed
// in the future and cache.BackendConfig will be moved back to the Config struct above.
type RulerCacheConfig struct {
// RuleGroupEnabled enables caching of rule group contents
RuleGroupEnabled bool `yaml:"rule_group_enabled" category:"experimental"`

// Cache holds the configuration used for the ruler storage cache.
Cache cache.BackendConfig `yaml:"cache"`
Cache cache.BackendConfig `yaml:",inline"`
}

// RegisterFlags registers the backend storage config.
Expand All @@ -41,17 +54,18 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.Local.RegisterFlagsWithPrefix(prefix, f)
cfg.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, "ruler", f)

f.StringVar(&cfg.Cache.Backend, prefix+"cache.backend", "", fmt.Sprintf("Backend for ruler storage cache, if not empty. The cache is supported for any storage backend except %q. Supported values: %s.", BackendLocal, strings.Join(supportedCacheBackends, ", ")))
cfg.Cache.Memcached.RegisterFlagsWithPrefix(prefix+"cache.memcached.", f)
cfg.Cache.Redis.RegisterFlagsWithPrefix(prefix+"cache.redis.", f)
f.BoolVar(&cfg.RulerCache.RuleGroupEnabled, prefix+"cache.rule-group-enabled", false, "Enabling caching of rule group contents if a cache backend is configured.")
f.StringVar(&cfg.RulerCache.Cache.Backend, prefix+"cache.backend", "", fmt.Sprintf("Backend for ruler storage cache, if not empty. The cache is supported for any storage backend except %q. Supported values: %s.", BackendLocal, strings.Join(supportedCacheBackends, ", ")))
cfg.RulerCache.Cache.Memcached.RegisterFlagsWithPrefix(prefix+"cache.memcached.", f)
cfg.RulerCache.Cache.Redis.RegisterFlagsWithPrefix(prefix+"cache.redis.", f)
}

func (cfg *Config) Validate() error {
if err := cfg.Config.Validate(); err != nil {
return err
}

return cfg.Cache.Validate()
return cfg.RulerCache.Cache.Validate()
}

// IsDefaults returns true if the storage options have not been set.
Expand Down
19 changes: 18 additions & 1 deletion pkg/ruler/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func wrapBucketWithCache(bkt objstore.Bucket, cfg rulestore.Config, cacheTTL tim

cacheCfg := bucketcache.NewCachingBucketConfig()

cacheClient, err := cache.CreateClient("ruler-storage-cache", cfg.Cache, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg))
cacheClient, err := cache.CreateClient("ruler-storage-cache", cfg.RulerCache.Cache, logger, prometheus.WrapRegistererWithPrefix("thanos_", reg))
if err != nil {
return nil, errors.Wrapf(err, "ruler-storage-cache")
}
Expand All @@ -77,6 +77,12 @@ func wrapBucketWithCache(bkt objstore.Bucket, cfg rulestore.Config, cacheTTL tim
codec := bucketcache.SnappyIterCodec{IterCodec: bucketcache.JSONIterCodec{}}
cacheCfg.CacheIter("iter", cacheClient, isNotTenantsDir, cacheTTL, codec)

// Only cache the contents of rule groups if enabled. This is an experimental feature and we need to be able
// to disable it. Once this feature is validated, it will be enabled unconditionally.
if cfg.RulerCache.RuleGroupEnabled {
cacheCfg.CacheGet("rule-group", cacheClient, isRuleGroup, maxItemSize(cfg.RulerCache.Cache), cacheTTL, cacheTTL, cacheTTL)
}

return bucketcache.NewCachingBucket("ruler", bkt, cacheCfg, logger, reg)
}

Expand All @@ -87,3 +93,14 @@ func isNotTenantsDir(name string) bool {
func isRuleGroup(name string) bool {
return strings.HasPrefix(name, "rules/")
}

func maxItemSize(cfg cache.BackendConfig) int {
switch cfg.Backend {
case cache.BackendMemcached:
return cfg.Memcached.MaxItemSize
case cache.BackendRedis:
return cfg.Redis.MaxItemSize
default:
return 0
}
}

0 comments on commit b033be5

Please sign in to comment.