Skip to content

Commit

Permalink
enhance health util to avoid new
Browse files Browse the repository at this point in the history
  • Loading branch information
boqiu committed May 9, 2024
1 parent d116da2 commit 431e287
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 88 deletions.
20 changes: 6 additions & 14 deletions health/counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,9 @@ type CounterConfig struct {
// Counter represents an error tolerant health counter, which allows failures in short time
// and periodically remind unhealthy if unrecovered in time.
type Counter struct {
CounterConfig

failures uint64
}

func NewCounter(config CounterConfig) *Counter {
return &Counter{
CounterConfig: config,
}
}

// IsSuccess indicates whether any failure occurred.
func (counter *Counter) IsSuccess() bool {
return counter.failures == 0
Expand All @@ -29,14 +21,14 @@ func (counter *Counter) IsSuccess() bool {
// `recovered`: indicates if recovered from unhealthy status.
//
// `failures`: indicates the number of failures before success.
func (counter *Counter) OnSuccess() (recovered bool, failures uint64) {
func (counter *Counter) OnSuccess(config CounterConfig) (recovered bool, failures uint64) {
// last time was success status
if counter.failures == 0 {
return
}

// report health now after a long time
if failures = counter.failures; failures > counter.Threshold {
if failures = counter.failures; failures > config.Threshold {
recovered = true
}

Expand All @@ -53,17 +45,17 @@ func (counter *Counter) OnSuccess() (recovered bool, failures uint64) {
// `unrecovered`: indicates continous failures and unrecovered in a long time.
//
// `failures`: indicates the number of failures so far.
func (counter *Counter) OnFailure() (unhealthy bool, unrecovered bool, failures uint64) {
func (counter *Counter) OnFailure(config CounterConfig) (unhealthy bool, unrecovered bool, failures uint64) {
counter.failures++

// error tolerant in short time
if failures = counter.failures; failures <= counter.Threshold {
if failures = counter.failures; failures <= config.Threshold {
return
}

if delta := failures - counter.Threshold - 1; delta == 0 {
if delta := failures - config.Threshold - 1; delta == 0 {
unhealthy = true
} else if delta%counter.Remind == 0 {
} else if delta%config.Remind == 0 {
unrecovered = true
}

Expand Down
52 changes: 25 additions & 27 deletions health/counter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,90 +6,88 @@ import (
"github.com/stretchr/testify/assert"
)

func newTestCounter() *Counter {
return NewCounter(CounterConfig{
Threshold: 5,
Remind: 10,
})
var testCounterConfig = CounterConfig{
Threshold: 5,
Remind: 10,
}

func TestCounterContinousSuccess(t *testing.T) {
counter := newTestCounter()
var counter Counter

recovered, failures := counter.OnSuccess()
recovered, failures := counter.OnSuccess(testCounterConfig)
assert.False(t, recovered)
assert.Equal(t, uint64(0), failures)
}

func TestCounterFailedShortTime(t *testing.T) {
counter := newTestCounter()
var counter Counter

// first failure
unhealthy, unrecovered, failures := counter.OnFailure()
unhealthy, unrecovered, failures := counter.OnFailure(testCounterConfig)
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, uint64(1), failures)

// continous failure in short time
unhealthy, unrecovered, failures = counter.OnFailure()
unhealthy, unrecovered, failures = counter.OnFailure(testCounterConfig)
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, uint64(2), failures)

// recovered
recovered, failures := counter.OnSuccess()
recovered, failures := counter.OnSuccess(testCounterConfig)
assert.False(t, recovered)
assert.Equal(t, uint64(2), failures)
}

func TestCounterThreshold(t *testing.T) {
counter := newTestCounter()
var counter Counter

// continous failure in short time
for i := uint64(1); i <= counter.Threshold; i++ {
unhealthy, unrecovered, failures := counter.OnFailure()
for i := uint64(1); i <= testCounterConfig.Threshold; i++ {
unhealthy, unrecovered, failures := counter.OnFailure(testCounterConfig)
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, i, failures)

}

// continous failure in long time
unhealthy, unrecovered, failures := counter.OnFailure()
unhealthy, unrecovered, failures := counter.OnFailure(testCounterConfig)
assert.True(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, counter.Threshold+1, failures)
assert.Equal(t, testCounterConfig.Threshold+1, failures)

// continous failure in long time, but not reached to remind counter
unhealthy, unrecovered, failures = counter.OnFailure()
unhealthy, unrecovered, failures = counter.OnFailure(testCounterConfig)
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, counter.Threshold+2, failures)
assert.Equal(t, testCounterConfig.Threshold+2, failures)

// recovered
recovered, failures := counter.OnSuccess()
recovered, failures := counter.OnSuccess(testCounterConfig)
assert.True(t, recovered)
assert.Equal(t, counter.Threshold+2, failures)
assert.Equal(t, testCounterConfig.Threshold+2, failures)
}

func TestCounterRemind(t *testing.T) {
counter := newTestCounter()
var counter Counter

// continous failure in short time
for i := uint64(1); i <= counter.Threshold+counter.Remind; i++ {
_, unrecovered, failures := counter.OnFailure()
for i := uint64(1); i <= testCounterConfig.Threshold+testCounterConfig.Remind; i++ {
_, unrecovered, failures := counter.OnFailure(testCounterConfig)
assert.False(t, unrecovered)
assert.Equal(t, i, failures)
}

// continous failure and reached remind time
unhealthy, unrecovered, failures := counter.OnFailure()
unhealthy, unrecovered, failures := counter.OnFailure(testCounterConfig)
assert.False(t, unhealthy)
assert.True(t, unrecovered)
assert.Equal(t, counter.Threshold+counter.Remind+1, failures)
assert.Equal(t, testCounterConfig.Threshold+testCounterConfig.Remind+1, failures)

// recovered
recovered, failures := counter.OnSuccess()
recovered, failures := counter.OnSuccess(testCounterConfig)
assert.True(t, recovered)
assert.Equal(t, counter.Threshold+counter.Remind+1, failures)
assert.Equal(t, testCounterConfig.Threshold+testCounterConfig.Remind+1, failures)
}
26 changes: 9 additions & 17 deletions health/timed_counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,10 @@ type TimedCounterConfig struct {
// TimedCounter represents an error tolerant health counter, which allows failures in short time
// and periodically remind unhealthy if unrecovered in time.
type TimedCounter struct {
TimedCounterConfig

failedAt time.Time // first failure time
reports int // number of times to report unhealthy
}

func NewTimedCounter(config TimedCounterConfig) *TimedCounter {
return &TimedCounter{
TimedCounterConfig: config,
}
}

// IsSuccess indicates whether any failure occurred.
func (counter *TimedCounter) IsSuccess() bool {
return counter.failedAt.IsZero()
Expand All @@ -32,18 +24,18 @@ func (counter *TimedCounter) IsSuccess() bool {
// `recovered`: indicates if recovered from unhealthy status.
//
// `elapsed`: indicates the duration since the first failure time.
func (counter *TimedCounter) OnSuccess() (recovered bool, elapsed time.Duration) {
return counter.onSuccessAt(time.Now())
func (counter *TimedCounter) OnSuccess(config TimedCounterConfig) (recovered bool, elapsed time.Duration) {
return counter.onSuccessAt(config, time.Now())
}

func (counter *TimedCounter) onSuccessAt(now time.Time) (recovered bool, elapsed time.Duration) {
func (counter *TimedCounter) onSuccessAt(config TimedCounterConfig, now time.Time) (recovered bool, elapsed time.Duration) {
// last time was success status
if counter.failedAt.IsZero() {
return
}

// report health now after a long time
if elapsed = now.Sub(counter.failedAt); elapsed > counter.Threshold {
if elapsed = now.Sub(counter.failedAt); elapsed > config.Threshold {
recovered = true
}

Expand All @@ -61,18 +53,18 @@ func (counter *TimedCounter) onSuccessAt(now time.Time) (recovered bool, elapsed
// `unrecovered`: indicates continous failures and unrecovered in a long time.
//
// `elapsed`: indicates the duration since the first failure time.
func (counter *TimedCounter) OnFailure() (unhealthy bool, unrecovered bool, elapsed time.Duration) {
return counter.onFailureAt(time.Now())
func (counter *TimedCounter) OnFailure(config TimedCounterConfig) (unhealthy bool, unrecovered bool, elapsed time.Duration) {
return counter.onFailureAt(config, time.Now())
}

func (counter *TimedCounter) onFailureAt(now time.Time) (unhealthy bool, unrecovered bool, elapsed time.Duration) {
func (counter *TimedCounter) onFailureAt(config TimedCounterConfig, now time.Time) (unhealthy bool, unrecovered bool, elapsed time.Duration) {
// record the first failure time
if counter.failedAt.IsZero() {
counter.failedAt = now
}

// error tolerant in short time
if elapsed = now.Sub(counter.failedAt); elapsed <= counter.Threshold {
if elapsed = now.Sub(counter.failedAt); elapsed <= config.Threshold {
return
}

Expand All @@ -84,7 +76,7 @@ func (counter *TimedCounter) onFailureAt(now time.Time) (unhealthy bool, unrecov
}

// remind time not reached
if remind := counter.Threshold + counter.Remind*time.Duration(counter.reports); elapsed <= remind {
if remind := config.Threshold + config.Remind*time.Duration(counter.reports); elapsed <= remind {
return
}

Expand Down
58 changes: 28 additions & 30 deletions health/timed_counter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,92 +7,90 @@ import (
"github.com/stretchr/testify/assert"
)

func newTestTimedCounter() *TimedCounter {
return NewTimedCounter(TimedCounterConfig{
Threshold: time.Minute,
Remind: 5 * time.Minute,
})
var testTimedCounterConfig = TimedCounterConfig{
Threshold: time.Minute,
Remind: 5 * time.Minute,
}

func TestTimedCounterContinousSuccess(t *testing.T) {
counter := newTestTimedCounter()
var counter TimedCounter

recovered, elapsed := counter.onSuccessAt(time.Now().Add(counter.Threshold + 1))
recovered, elapsed := counter.onSuccessAt(testTimedCounterConfig, time.Now().Add(testTimedCounterConfig.Threshold+1))
assert.False(t, recovered)
assert.Equal(t, time.Duration(0), elapsed)
}

func TestTimedCounterFailedShortTime(t *testing.T) {
counter := newTestTimedCounter()
var counter TimedCounter
now := time.Now()

// first failure
unhealthy, unrecovered, elapsed := counter.onFailureAt(now)
unhealthy, unrecovered, elapsed := counter.onFailureAt(testTimedCounterConfig, now)
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, time.Duration(0), elapsed)

// continous failure in short time
unhealthy, unrecovered, elapsed = counter.onFailureAt(now.Add(counter.Threshold - 2))
unhealthy, unrecovered, elapsed = counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold-2))
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, counter.Threshold-2, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold-2, elapsed)

// recovered
recovered, elapsed := counter.onSuccessAt(now.Add(counter.Threshold - 1))
recovered, elapsed := counter.onSuccessAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold-1))
assert.False(t, recovered)
assert.Equal(t, counter.Threshold-1, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold-1, elapsed)
}

func TestTimedCounterThreshold(t *testing.T) {
counter := newTestTimedCounter()
var counter TimedCounter
now := time.Now()

// first failure
counter.onFailureAt(now)
counter.onFailureAt(testTimedCounterConfig, now)

// continous failure in short time
counter.onFailureAt(now.Add(counter.Threshold - 1))
counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold-1))

// continous failure in long time
unhealthy, unrecovered, elapsed := counter.onFailureAt(now.Add(counter.Threshold + 1))
unhealthy, unrecovered, elapsed := counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold+1))
assert.True(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, counter.Threshold+1, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold+1, elapsed)

// recovered
recovered, elapsed := counter.onSuccessAt(now.Add(counter.Threshold + 2))
recovered, elapsed := counter.onSuccessAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold+2))
assert.True(t, recovered)
assert.Equal(t, counter.Threshold+2, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold+2, elapsed)
}

func TestTimedCounterRemind(t *testing.T) {
counter := newTestTimedCounter()
var counter TimedCounter
now := time.Now()

// first failure
counter.onFailureAt(now)
counter.onFailureAt(testTimedCounterConfig, now)

// continous failure in short time
counter.onFailureAt(now.Add(counter.Threshold - 1))
counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold-1))

// continous failure in long time
counter.onFailureAt(now.Add(counter.Threshold + 1))
counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold+1))

// continous failure in long time, but not reached remind time
unhealthy, unrecovered, elapsed := counter.onFailureAt(now.Add(counter.Threshold + 2))
unhealthy, unrecovered, elapsed := counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold+2))
assert.False(t, unhealthy)
assert.False(t, unrecovered)
assert.Equal(t, counter.Threshold+2, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold+2, elapsed)

// continous failure and reached remind time
unhealthy, unrecovered, elapsed = counter.onFailureAt(now.Add(counter.Threshold + 2 + counter.Remind))
unhealthy, unrecovered, elapsed = counter.onFailureAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold+2+testTimedCounterConfig.Remind))
assert.False(t, unhealthy)
assert.True(t, unrecovered)
assert.Equal(t, counter.Threshold+2+counter.Remind, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold+2+testTimedCounterConfig.Remind, elapsed)

// recovered
recovered, elapsed := counter.onSuccessAt(now.Add(counter.Threshold + 3 + counter.Remind))
recovered, elapsed := counter.onSuccessAt(testTimedCounterConfig, now.Add(testTimedCounterConfig.Threshold+3+testTimedCounterConfig.Remind))
assert.True(t, recovered)
assert.Equal(t, counter.Threshold+3+counter.Remind, elapsed)
assert.Equal(t, testTimedCounterConfig.Threshold+3+testTimedCounterConfig.Remind, elapsed)
}

0 comments on commit 431e287

Please sign in to comment.