Skip to content

Commit

Permalink
feat: remote graphite retries (#1085)
Browse files Browse the repository at this point in the history
  • Loading branch information
AleksandrMatsko authored Nov 5, 2024
1 parent 63742c9 commit bbff8da
Show file tree
Hide file tree
Showing 21 changed files with 1,400 additions and 93 deletions.
4 changes: 2 additions & 2 deletions clock/clock.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ func NewSystemClock() *SystemClock {
return &SystemClock{}
}

// Now returns now time.Time with UTC location.
// NowUTC returns now time.Time with UTC location.
func (t *SystemClock) NowUTC() time.Time {
return time.Now().UTC()
}

// Now returns now time.Time as a Unix time.
// NowUnix returns current time in a Unix time format.
func (t *SystemClock) NowUnix() int64 {
return time.Now().Unix()
}
61 changes: 52 additions & 9 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"

"github.com/moira-alert/moira"
"github.com/moira-alert/moira/metric_source/retries"
"github.com/moira-alert/moira/metrics"

"github.com/moira-alert/moira/image_store/s3"
Expand Down Expand Up @@ -230,15 +231,54 @@ type remoteCommon interface {
getRemoteCommon() *RemoteCommonConfig
}

// RetriesConfig is a settings for retry policy when performing requests to remote sources.
// Stop retrying when ONE of the following conditions is satisfied:
// - Time passed since first try is greater than MaxElapsedTime;
// - Already MaxRetriesCount done.
type RetriesConfig struct {
// InitialInterval between requests.
InitialInterval string `yaml:"initial_interval"`
// RandomizationFactor is used in exponential backoff to add some randomization
// when calculating next interval between requests.
// It will be used in multiplication like:
// RandomizedInterval = RetryInterval * (random value in range [1 - RandomizationFactor, 1 + RandomizationFactor])
RandomizationFactor float64 `yaml:"randomization_factor"`
// Each new RetryInterval will be multiplied on Multiplier.
Multiplier float64 `yaml:"multiplier"`
// MaxInterval is the cap for RetryInterval. Note that it doesn't cap the RandomizedInterval.
MaxInterval string `yaml:"max_interval"`
// MaxElapsedTime caps the time passed from first try. If time passed is greater than MaxElapsedTime than stop retrying.
MaxElapsedTime string `yaml:"max_elapsed_time"`
// MaxRetriesCount is the amount of allowed retries. So at most MaxRetriesCount will be performed.
MaxRetriesCount uint64 `yaml:"max_retries_count"`
}

func (config RetriesConfig) getRetriesSettings() retries.Config {
return retries.Config{
InitialInterval: to.Duration(config.InitialInterval),
RandomizationFactor: config.RandomizationFactor,
Multiplier: config.Multiplier,
MaxInterval: to.Duration(config.MaxInterval),
MaxElapsedTime: to.Duration(config.MaxElapsedTime),
MaxRetriesCount: config.MaxRetriesCount,
}
}

// GraphiteRemoteConfig is remote graphite settings structure.
type GraphiteRemoteConfig struct {
RemoteCommonConfig `yaml:",inline"`
// Timeout for remote requests
// Timeout for remote requests.
Timeout string `yaml:"timeout"`
// Username for basic auth
// Username for basic auth.
User string `yaml:"user"`
// Password for basic auth
// Password for basic auth.
Password string `yaml:"password"`
// Retries configuration for general requests to remote graphite.
Retries RetriesConfig `yaml:"retries"`
// HealthcheckTimeout is timeout for remote api health check requests.
HealthcheckTimeout string `yaml:"health_check_timeout"`
// HealthCheckRetries configuration for healthcheck requests to remote graphite.
HealthCheckRetries RetriesConfig `yaml:"health_check_retries"`
}

func (config GraphiteRemoteConfig) getRemoteCommon() *RemoteCommonConfig {
Expand All @@ -248,12 +288,15 @@ func (config GraphiteRemoteConfig) getRemoteCommon() *RemoteCommonConfig {
// GetRemoteSourceSettings returns remote config parsed from moira config files.
func (config *GraphiteRemoteConfig) GetRemoteSourceSettings() *graphiteRemoteSource.Config {
return &graphiteRemoteSource.Config{
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
Retries: config.Retries.getRetriesSettings(),
HealthcheckTimeout: to.Duration(config.HealthcheckTimeout),
HealthcheckRetries: config.HealthCheckRetries.getRetriesSettings(),
}
}

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ require (
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/dyatlov/go-opengraph/opengraph v0.0.0-20220524092352-606d7b1e5f8a // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/go-openapi/jsonpointer v0.20.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ github.com/bwmarrin/discordgo v0.25.0 h1:NXhdfHRNxtwso6FPdzW2i3uBvvU7UIQTghmV2T4
github.com/bwmarrin/discordgo v0.25.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
github.com/carlosdp/twiliogo v0.0.0-20161027183705-b26045ebb9d1 h1:hXakhQtPnXH839q1pBl/GqfTSchqE+R5Fqn98Iu7UQM=
github.com/carlosdp/twiliogo v0.0.0-20161027183705-b26045ebb9d1/go.mod h1:pAxCBpjl/0JxYZlWGP/Dyi8f/LQSCQD2WAsG/iNzqQ8=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
Expand Down
15 changes: 14 additions & 1 deletion local/api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,21 @@ graphite_remote:
cluster_name: Graphite 1
url: "http://graphite:80/render"
check_interval: 60s
timeout: 60s
metrics_ttl: 168h
timeout: 60s
retries:
initial_interval: 60s
randomization_factor: 0.5
multiplier: 1.5
max_interval: 120s
max_retries_count: 3
health_check_timeout: 6s
health_check_retries:
initial_interval: 20s
randomization_factor: 0.5
multiplier: 1.5
max_interval: 80s
max_retries_count: 3
prometheus_remote:
- cluster_id: default
cluster_name: Prometheus 1
Expand Down
15 changes: 14 additions & 1 deletion local/checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,21 @@ graphite_remote:
cluster_name: Graphite 1
url: "http://graphite:80/render"
check_interval: 60s
timeout: 60s
metrics_ttl: 168h
timeout: 60s
retries:
initial_interval: 60s
randomization_factor: 0.5
multiplier: 1.5
max_interval: 120s
max_retries_count: 3
health_check_timeout: 6s
health_check_retries:
initial_interval: 20s
randomization_factor: 0.5
multiplier: 1.5
max_interval: 80s
max_retries_count: 3
prometheus_remote:
- cluster_id: default
cluster_name: Prometheus 1
Expand Down
15 changes: 14 additions & 1 deletion local/notifier.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,21 @@ graphite_remote:
cluster_name: Graphite 1
url: "http://graphite:80/render"
check_interval: 60s
timeout: 60s
metrics_ttl: 168h
timeout: 60s
retries:
initial_interval: 60s
randomization_factor: 0.5
multiplier: 1.5
max_interval: 120s
max_retries_count: 3
health_check_timeout: 6s
health_check_retries:
initial_interval: 20s
randomization_factor: 0.5
multiplier: 1.5
max_interval: 80s
max_retries_count: 3
prometheus_remote:
- cluster_id: default
cluster_name: Prometheus 1
Expand Down
21 changes: 14 additions & 7 deletions metric_source/remote/config.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
package remote

import "time"
import (
"time"

"github.com/moira-alert/moira/metric_source/retries"
)

// Config represents config from remote storage.
type Config struct {
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
User string
Password string
URL string `validate:"required,url"`
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration `validate:"required,gt=0s"`
User string
Password string
HealthcheckTimeout time.Duration `validate:"required,gt=0s"`
Retries retries.Config
HealthcheckRetries retries.Config
}
96 changes: 96 additions & 0 deletions metric_source/remote/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package remote

import (
"errors"
"fmt"
"testing"
"time"

"github.com/go-playground/validator/v10"
"github.com/moira-alert/moira"

"github.com/moira-alert/moira/metric_source/retries"

. "github.com/smartystreets/goconvey/convey"
)

func TestConfigWithValidateStruct(t *testing.T) {
Convey("Test validating retries config", t, func() {
type testcase struct {
caseDesc string
conf Config
errIsNil bool
}

var (
testInitialInterval = time.Second * 5
testMaxInterval = time.Second * 10
testRetriesCount uint64 = 10
validatorErr = validator.ValidationErrors{}
)

testRetriesConf := retries.Config{
InitialInterval: testInitialInterval,
MaxInterval: testMaxInterval,
MaxRetriesCount: testRetriesCount,
}

cases := []testcase{
{
caseDesc: "with empty config",
conf: Config{},
errIsNil: false,
},
{
caseDesc: "with retries config set",
conf: Config{
Retries: testRetriesConf,
HealthcheckRetries: testRetriesConf,
},
errIsNil: false,
},
{
caseDesc: "with retries config set and some url",
conf: Config{
URL: "http://test-graphite",
Retries: testRetriesConf,
HealthcheckRetries: testRetriesConf,
},
errIsNil: false,
},
{
caseDesc: "with retries config set, some url, timeout",
conf: Config{
Timeout: time.Second,
URL: "http://test-graphite",
Retries: testRetriesConf,
HealthcheckRetries: testRetriesConf,
},
errIsNil: false,
},
{
caseDesc: "with valid config",
conf: Config{
Timeout: time.Second,
HealthcheckTimeout: time.Millisecond,
URL: "http://test-graphite",
Retries: testRetriesConf,
HealthcheckRetries: testRetriesConf,
},
errIsNil: true, // nil,
},
}

for i := range cases {
Convey(fmt.Sprintf("Case %d: %s", i+1, cases[i].caseDesc), func() {
err := moira.ValidateStruct(cases[i].conf)

if cases[i].errIsNil {
So(err, ShouldBeNil)
} else {
So(errors.As(err, &validatorErr), ShouldBeTrue)
}
})
}
})
}
Loading

0 comments on commit bbff8da

Please sign in to comment.