Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incident muting & notification suppression #200

Merged
merged 10 commits into from
Jun 25, 2024
7 changes: 7 additions & 0 deletions cmd/icinga-notifications/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/icinga/icinga-notifications/internal/icinga2"
"github.com/icinga/icinga-notifications/internal/incident"
"github.com/icinga/icinga-notifications/internal/listener"
"github.com/icinga/icinga-notifications/internal/object"
"github.com/okzk/sdnotify"
"os"
"os/signal"
Expand Down Expand Up @@ -102,6 +103,12 @@ func main() {
logger.Fatalf("Cannot load incidents from database: %+v", err)
}

// Restore all muted objects that do not have an active incident yet, so that we do not trigger notifications
// for them even though they are muted, and also not to override the actual mute reason with a made-up one.
if err := object.RestoreMutedObjects(ctx, db); err != nil {
logger.Fatalf("Failed to restore muted objects: %+v", err)
}

// Wait to load open incidents from the database before either starting Event Stream Clients or starting the Listener.
icinga2Launcher.Ready()

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/emersion/go-smtp v0.21.2
github.com/goccy/go-yaml v1.11.3
github.com/google/uuid v1.6.0
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/icinga/icinga-go-library v0.2.0
github.com/jhillyerd/enmime v1.2.0
github.com/jmoiron/sqlx v1.4.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/icinga/icinga-go-library v0.2.0 h1:1or5s3KMEJGdhFbMzlN8NPw1NCd/3ntsKLw5et4/9XI=
github.com/icinga/icinga-go-library v0.2.0/go.mod h1:YN7XJN3W0FodD+j4kirO89zk2tgvanXWt1RMV8UgOLo=
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
Expand Down
58 changes: 44 additions & 14 deletions internal/event/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ import (
// ErrSuperfluousStateChange indicates a superfluous state change being ignored and stopping further processing.
var ErrSuperfluousStateChange = errors.New("ignoring superfluous state change")

// ErrSuperfluousMuteUnmuteEvent indicates that a superfluous mute or unmute event is being ignored and is
// triggered when trying to mute/unmute an already muted/unmuted incident.
var ErrSuperfluousMuteUnmuteEvent = errors.New("ignoring superfluous (un)mute event")

// Event received of a specified Type for internal processing.
//
// The JSON struct tags are being used to unmarshal a JSON representation received from the listener.Listener. Some
Expand All @@ -34,6 +38,9 @@ type Event struct {
Username string `json:"username"`
Message string `json:"message"`

Mute types.Bool `json:"mute"`
MuteReason string `json:"mute_reason"`
yhabteab marked this conversation as resolved.
Show resolved Hide resolved

ID int64 `json:"-"`
}

Expand All @@ -49,7 +56,9 @@ const (
TypeFlappingEnd = "flapping-end"
TypeFlappingStart = "flapping-start"
TypeIncidentAge = "incident-age"
TypeMute = "mute"
TypeState = "state"
TypeUnmute = "unmute"
)

// Validate validates the current event state.
Expand All @@ -66,6 +75,15 @@ func (e *Event) Validate() error {
if e.Severity != SeverityNone && e.Type != TypeState {
return fmt.Errorf("invalid event: if 'severity' is set, 'type' must be set to %q", TypeState)
}
if e.Type == TypeMute && (!e.Mute.Valid || !e.Mute.Bool) {
return fmt.Errorf("invalid event: 'mute' must be true if 'type' is set to %q", TypeMute)
}
if e.Type == TypeUnmute && (!e.Mute.Valid || e.Mute.Bool) {
return fmt.Errorf("invalid event: 'mute' must be false if 'type' is set to %q", TypeUnmute)
}
if e.Mute.Valid && e.Mute.Bool && e.MuteReason == "" {
return fmt.Errorf("invalid event: 'mute_reason' must not be empty if 'mute' is set")
}

switch e.Type {
case "":
Expand All @@ -80,13 +98,21 @@ func (e *Event) Validate() error {
TypeFlappingEnd,
TypeFlappingStart,
TypeIncidentAge,
TypeState:
TypeMute,
TypeState,
TypeUnmute:
return nil
default:
return fmt.Errorf("invalid event: unsupported event type %q", e.Type)
}
}

// SetMute alters the event mute and mute reason.
func (e *Event) SetMute(muted bool, reason string) {
e.Mute = types.Bool{Valid: true, Bool: muted}
e.MuteReason = reason
}

func (e *Event) String() string {
return fmt.Sprintf("[time=%s type=%q severity=%s]", e.Time, e.Type, e.Severity.String())
}
Expand Down Expand Up @@ -146,13 +172,15 @@ func (e *Event) Sync(ctx context.Context, tx *sqlx.Tx, db *database.DB, objectId

// EventRow represents a single event database row and isn't an in-memory representation of an event.
type EventRow struct {
ID int64 `db:"id"`
Time types.UnixMilli `db:"time"`
ObjectID types.Binary `db:"object_id"`
Type types.String `db:"type"`
Severity Severity `db:"severity"`
Username types.String `db:"username"`
Message types.String `db:"message"`
ID int64 `db:"id"`
Time types.UnixMilli `db:"time"`
ObjectID types.Binary `db:"object_id"`
Type types.String `db:"type"`
Severity Severity `db:"severity"`
Username types.String `db:"username"`
Message types.String `db:"message"`
Mute types.Bool `db:"mute"`
MuteReason types.String `db:"mute_reason"`
}

// TableName implements the contracts.TableNamer interface.
Expand All @@ -162,11 +190,13 @@ func (er *EventRow) TableName() string {

func NewEventRow(e *Event, objectId types.Binary) *EventRow {
return &EventRow{
Time: types.UnixMilli(e.Time),
ObjectID: objectId,
Type: utils.ToDBString(e.Type),
Severity: e.Severity,
Username: utils.ToDBString(e.Username),
Message: utils.ToDBString(e.Message),
Time: types.UnixMilli(e.Time),
ObjectID: objectId,
Type: utils.ToDBString(e.Type),
Severity: e.Severity,
Username: utils.ToDBString(e.Username),
Message: utils.ToDBString(e.Message),
Mute: e.Mute,
MuteReason: utils.ToDBString(e.MuteReason),
}
}
75 changes: 45 additions & 30 deletions internal/icinga2/api_responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,16 @@ type CheckResult struct {
//
// NOTE:
// - An empty Service field indicates a host downtime.
// - If a downtime was added by a ScheduledDowntime object, ConfigOwner is set to the name of that object and can
// only be cancelled by its owner. Otherwise, it is empty and indicates user-created downtimes (via API or/and UI).
//
// https://icinga.com/docs/icinga-2/latest/doc/09-object-types/#objecttype-downtime
type Downtime struct {
Host string `json:"host_name"`
Service string `json:"service_name"`
Author string `json:"author"`
Comment string `json:"comment"`
Host string `json:"host_name"`
Service string `json:"service_name"`
Author string `json:"author"`
Comment string `json:"comment"`
ConfigOwner string `json:"config_owner"`

// RemoveTime is used to indicate whether a downtime was ended automatically or cancelled prematurely by a user.
// It is set to zero time for the former case, otherwise to the timestamp at which time has been cancelled.
Expand Down Expand Up @@ -132,6 +135,7 @@ type HostServiceRuntimeAttributes struct {
LastStateChange UnixFloat `json:"last_state_change"`
DowntimeDepth int `json:"downtime_depth"`
Acknowledgement int `json:"acknowledgement"`
IsFlapping bool `json:"flapping"`
AcknowledgementLastChange UnixFloat `json:"acknowledgement_last_change"`
}

Expand All @@ -157,6 +161,8 @@ const (
typeDowntimeStarted = "DowntimeStarted"
typeDowntimeTriggered = "DowntimeTriggered"
typeFlapping = "Flapping"
typeObjectCreated = "ObjectCreated"
typeObjectDeleted = "ObjectDeleted"
)

// StateChange represents the Icinga 2 API Event Stream StateChange response for host/service state changes.
Expand All @@ -178,38 +184,27 @@ type StateChange struct {
Acknowledgement bool `json:"acknowledgement"`
}

// AcknowledgementSet represents the Icinga 2 API Event Stream AcknowledgementSet response for acknowledgements set on hosts/services.
// Acknowledgement represents the Icinga 2 API Event Stream AcknowledgementSet or AcknowledgementCleared
// response for acknowledgements set/cleared on/from hosts/services.
//
// NOTE:
// - An empty Service field indicates a host acknowledgement.
// - State might be StateHost{Up,Down} for hosts or StateService{Ok,Warning,Critical,Unknown} for services.
// - StateType might be StateTypeSoft or StateTypeHard.
// - EventType is either set to typeAcknowledgementSet or typeAcknowledgementCleared.
// - Author and Comment fields are always empty when EventType is set to typeAcknowledgementCleared.
//
// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementset
type AcknowledgementSet struct {
// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared
type Acknowledgement struct {
Timestamp UnixFloat `json:"timestamp"`
Host string `json:"host"`
Service string `json:"service"`
State int `json:"state"`
StateType int `json:"state_type"`
Author string `json:"author"`
Comment string `json:"comment"`
}

// AcknowledgementCleared represents the Icinga 2 API Event Stream AcknowledgementCleared response for acknowledgements cleared on hosts/services.
//
// NOTE:
// - An empty Service field indicates a host acknowledgement.
// - State might be StateHost{Up,Down} for hosts or StateService{Ok,Warning,Critical,Unknown} for services.
// - StateType might be StateTypeSoft or StateTypeHard.
//
// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-acknowledgementcleared
type AcknowledgementCleared struct {
Timestamp UnixFloat `json:"timestamp"`
Host string `json:"host"`
Service string `json:"service"`
State int `json:"state"`
StateType int `json:"state_type"`
EventType string `json:"type"`
}

// CommentAdded represents the Icinga 2 API Event Stream CommentAdded response for added host/service comments.
Expand Down Expand Up @@ -264,13 +259,33 @@ type DowntimeTriggered struct {
//
// NOTE:
// - An empty Service field indicates a host being in flapping state.
// - State includes the current state of the Checkable at the point in time at which it enters or exits the flapping state.
// - CurrentFlapping indicates the current flapping value of a Checkable in percent.
// - ThresholdLow is the low/min flapping threshold value set by the user (CurrentFlapping < ThresholdLow = flapping end).
// - ThresholdHigh is the high/max flapping threshold value set by the user (CurrentFlapping > ThresholdHigh = flapping start).
//
yhabteab marked this conversation as resolved.
Show resolved Hide resolved
// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#event-stream-type-flapping
type Flapping struct {
Timestamp UnixFloat `json:"timestamp"`
Host string `json:"host"`
Service string `json:"service"`
IsFlapping bool `json:"is_flapping"`
Timestamp UnixFloat `json:"timestamp"`
Host string `json:"host"`
Service string `json:"service"`
IsFlapping bool `json:"is_flapping"`
State int `json:"state"`
CurrentFlapping int `json:"current_flapping"`
ThresholdLow int `json:"threshold_low"`
ThresholdHigh int `json:"threshold_high"`
}

// ObjectCreatedDeleted represents the Icinga 2 API stream object created/deleted response.
//
// NOTE:
// - The ObjectName field already contains the composed name of the checkable if the ObjectType is `Service`.
// - The EventType field indicates which event type is currently being streamed and is either
// set to typeObjectCreated or typeObjectDeleted.
type ObjectCreatedDeleted struct {
ObjectName string `json:"object_name"`
ObjectType string `json:"object_type"`
EventType string `json:"type"`
}

// UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream.
Expand Down Expand Up @@ -305,10 +320,8 @@ func UnmarshalEventStreamResponse(bytes []byte) (any, error) {
switch responseType {
case typeStateChange:
resp = new(StateChange)
case typeAcknowledgementSet:
resp = new(AcknowledgementSet)
case typeAcknowledgementCleared:
resp = new(AcknowledgementCleared)
case typeAcknowledgementSet, typeAcknowledgementCleared:
resp = new(Acknowledgement)
case typeCommentAdded:
resp = new(CommentAdded)
case typeCommentRemoved:
Expand All @@ -323,6 +336,8 @@ func UnmarshalEventStreamResponse(bytes []byte) (any, error) {
resp = new(DowntimeTriggered)
case typeFlapping:
resp = new(Flapping)
case typeObjectCreated, typeObjectDeleted:
resp = new(ObjectCreatedDeleted)
default:
return nil, fmt.Errorf("unsupported type %q", responseType)
}
Expand Down
Loading
Loading