Skip to content

Commit

Permalink
Partitions in health checks (#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
qrnvttrl authored Mar 5, 2024
1 parent 88d2feb commit 0ac6699
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 17 deletions.
29 changes: 16 additions & 13 deletions rest/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type HealthCheck interface {
// ServiceName returns the name of the service that is health checked.
ServiceName() string
// Check is a function returning a service status and an error.
Check(ctx context.Context) (HealthStatus, error)
Check(ctx context.Context) (HealthResult, error)
}

// HealthResponse is returned by the API when executing a health check.
Expand All @@ -41,17 +41,16 @@ type HealthResponse struct {
Status HealthStatus `json:"status"`
// Message gives additional information on the overall health state.
Message string `json:"message"`
// Services is map of services by name with their individual health results.
// Services contain the individual health results of the services as evaluated by the HealthCheck interface. The overall HealthStatus is then derived automatically from the results of the health checks.
//
// Note that the individual HealthResults evaluated by the HealthCheck interface may again consist of a plurality services. While this is only optional it allows for creating nested health structures. These can be used for more sophisticated scenarios like evaluating platform health describing service availability in different locations or similar.
//
// If using nested HealthResults, the status of the parent service can be derived automatically from the status of its children by leaving the parent's health status field blank.
Services map[string]HealthResult `json:"services"`
}

// HealthResult holds the health state of a service.
type HealthResult struct {
// Status indicates the health of the service.
Status HealthStatus `json:"status"`
// Message gives additional information on the health of a service.
Message string `json:"message"`
}
type HealthResult HealthResponse

type healthResource struct {
log *slog.Logger
Expand Down Expand Up @@ -139,16 +138,17 @@ func (h *healthResource) check(request *restful.Request, response *restful.Respo
result := chanResult{
name: name,
HealthResult: HealthResult{
Status: HealthStatusHealthy,
Message: "",
Status: HealthStatusHealthy,
Message: "",
Services: map[string]HealthResult{},
},
}
defer func() {
resultChan <- result
}()

var err error
result.Status, err = healthCheck.Check(ctx)
result.HealthResult, err = healthCheck.Check(ctx)
if err != nil {
result.Message = err.Error()
h.log.Error("unhealthy service", "name", name, "status", result.Status, "error", err)
Expand All @@ -163,7 +163,6 @@ func (h *healthResource) check(request *restful.Request, response *restful.Respo
for r := range resultChan {
r := r
result.Services[r.name] = r.HealthResult

}
finished <- true
}()
Expand Down Expand Up @@ -194,7 +193,11 @@ func DeriveOverallHealthStatus(services map[string]HealthResult) HealthStatus {
unhealthy int
)

for _, service := range services {
for k, service := range services {
if len(service.Services) > 0 && service.Status == "" {
service.Status = DeriveOverallHealthStatus(service.Services)
}
services[k] = service
switch service.Status {
case HealthStatusHealthy:
case HealthStatusDegraded:
Expand Down
47 changes: 43 additions & 4 deletions rest/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,17 @@ func (e *succeedingCheck) ServiceName() string {
return "success"
}

func (e *succeedingCheck) Check(ctx context.Context) (HealthStatus, error) {
return HealthStatusHealthy, nil
func (e *succeedingCheck) Check(ctx context.Context) (HealthResult, error) {
return HealthResult{
Message: "",
Services: map[string]HealthResult{
"successPartition": {
Status: HealthStatusHealthy,
Message: "",
Services: map[string]HealthResult{},
},
},
}, nil
}

type failingCheck struct{}
Expand All @@ -29,8 +38,17 @@ func (e *failingCheck) ServiceName() string {
return "fail"
}

func (e *failingCheck) Check(ctx context.Context) (HealthStatus, error) {
return HealthStatusUnhealthy, fmt.Errorf("facing an issue")
func (e *failingCheck) Check(ctx context.Context) (HealthResult, error) {
return HealthResult{
Message: "",
Services: map[string]HealthResult{
"failPartition": {
Status: HealthStatusUnhealthy,
Message: "facing an issue",
Services: map[string]HealthResult{},
},
},
}, fmt.Errorf("facing an issue")
}

func TestNewHealth(t *testing.T) {
Expand Down Expand Up @@ -74,10 +92,24 @@ func TestNewHealth(t *testing.T) {
"success": {
Status: HealthStatusHealthy,
Message: "",
Services: map[string]HealthResult{
"successPartition": {
Status: HealthStatusHealthy,
Message: "",
Services: map[string]HealthResult{},
},
},
},
"fail": {
Status: HealthStatusUnhealthy,
Message: "facing an issue",
Services: map[string]HealthResult{
"failPartition": {
Status: HealthStatusUnhealthy,
Message: "facing an issue",
Services: map[string]HealthResult{},
},
},
},
},
},
Expand All @@ -97,6 +129,13 @@ func TestNewHealth(t *testing.T) {
"success": {
Status: HealthStatusHealthy,
Message: "",
Services: map[string]HealthResult{
"successPartition": {
Status: HealthStatusHealthy,
Message: "",
Services: map[string]HealthResult{},
},
},
},
},
},
Expand Down

0 comments on commit 0ac6699

Please sign in to comment.