diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cf8ecd0 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +language: go + +go: + - 1.8.1 + +# Unconditionally place the repo at GOPATH/src/${go_import_path} to support +# forks. +go_import_path: github.com/m-lab/alertmanager-github-receiver + +script: +# Run query "unit tests". +- go test -v github.com/m-lab/alertmanager-github-receiver/... diff --git a/alerts/handler.go b/alerts/handler.go new file mode 100644 index 0000000..78ef147 --- /dev/null +++ b/alerts/handler.go @@ -0,0 +1,126 @@ +// Copyright 2017 alertmanager-github-receiver Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +////////////////////////////////////////////////////////////////////////////// +package alerts + +import ( + "encoding/json" + + "github.com/google/go-github/github" + //"github.com/kr/pretty" + "io/ioutil" + "log" + "net/http" + + "github.com/prometheus/alertmanager/notify" +) + +type ReceiverClient interface { + CloseIssue(issue *github.Issue) (*github.Issue, error) + CreateIssue(title, body string) (*github.Issue, error) + ListOpenIssues() ([]*github.Issue, error) +} + +type ReceiverHandler struct { + Client ReceiverClient +} + +// ServeHTTP receives and processes alertmanager notifications. If the alert +// is firing and a github issue does not yet exist, one is created. If the +// alert is resolved and a github issue exists, then it is closed. +func (rh *ReceiverHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request) { + // Verify that request is a POST. + if req.Method != http.MethodPost { + log.Printf("Client used unsupported method: %s: %s", req.Method, req.RemoteAddr) + rw.WriteHeader(http.StatusMethodNotAllowed) + return + } + + // Read request body. + alertBytes, err := ioutil.ReadAll(req.Body) + if err != nil { + log.Printf("Failed to read request body: %s", err) + rw.WriteHeader(http.StatusInternalServerError) + return + } + + // The WebhookMessage is dependent on alertmanager version. Parse it. + msg := ¬ify.WebhookMessage{} + if err := json.Unmarshal(alertBytes, msg); err != nil { + log.Printf("Failed to parse webhook message from %s: %s", req.RemoteAddr, err) + log.Printf("%s", string(alertBytes)) + rw.WriteHeader(http.StatusBadRequest) + return + } + // log.Print(pretty.Sprint(msg)) + + // Handle the webhook message. + log.Printf("Handling alert: %s", id(msg)) + if err := rh.processAlert(msg); err != nil { + log.Printf("Failed to handle alert: %s: %s", id(msg), err) + rw.WriteHeader(http.StatusInternalServerError) + return + } + log.Printf("Completed alert: %s", id(msg)) + rw.WriteHeader(http.StatusOK) + // Empty response. +} + +// processAlert processes an alertmanager webhook message. +func (rh *ReceiverHandler) processAlert(msg *notify.WebhookMessage) error { + // TODO(dev): replace list-and-search with search using labels. + // TODO(dev): Cache list results. + // List known issues from github. + issues, err := rh.Client.ListOpenIssues() + if err != nil { + return err + } + + // Search for an issue that matches the notification message from AM. + msgTitle := formatTitle(msg) + var foundIssue *github.Issue + for _, issue := range issues { + if msgTitle == *issue.Title { + log.Printf("Found matching issue: %s\n", msgTitle) + foundIssue = issue + break + } + } + + // The message is currently firing and we did not find a matching + // issue from github, so create a new issue. + if msg.Data.Status == "firing" && foundIssue == nil { + msgBody := formatIssueBody(msg) + _, err := rh.Client.CreateIssue(msgTitle, msgBody) + return err + } + + // TODO(dev): every alert is an incident. So, open issues are a sign of + // either a real problem or a bad alert. Stop auto-closing issues once we + // are confident that the github receiver is well behaved. + + // The message is resolved and we found a matching open issue from github, + // so close the issue. + if msg.Data.Status == "resolved" && foundIssue != nil { + // NOTE: there can be multiple "resolved" messages for the same + // alert. Prometheus evaluates rules every `evaluation_interval`. + // And, alertmanager preserves an alert until `resolve_timeout`. So + // expect (resolve_timeout / evaluation_interval) messages. + _, err := rh.Client.CloseIssue(foundIssue) + return err + } + + // log.Printf("Unsupported WebhookMessage.Data.Status: %s", msg.Data.Status) + return nil +} diff --git a/alerts/handler_test.go b/alerts/handler_test.go new file mode 100644 index 0000000..d2b3a14 --- /dev/null +++ b/alerts/handler_test.go @@ -0,0 +1,162 @@ +// Copyright 2017 alertmanager-github-receiver Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +////////////////////////////////////////////////////////////////////////////// +package alerts_test + +import ( + "bytes" + "encoding/json" + "fmt" + "github.com/google/go-github/github" + "io/ioutil" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/prometheus/alertmanager/notify" + "github.com/prometheus/alertmanager/template" + "github.com/m-lab/alertmanager-github-receiver/alerts" +) + +type fakeClient struct { + listIssues []*github.Issue + createdIssue *github.Issue + closedIssue *github.Issue +} + +func (f *fakeClient) ListOpenIssues() ([]*github.Issue, error) { + fmt.Println("list open issues") + return f.listIssues, nil +} + +func (f *fakeClient) CreateIssue(title, body string) (*github.Issue, error) { + fmt.Println("create issue") + f.createdIssue = createIssue(title, body) + return f.createdIssue, nil +} + +func (f *fakeClient) CloseIssue(issue *github.Issue) (*github.Issue, error) { + fmt.Println("close issue") + f.closedIssue = issue + return issue, nil +} + +func createWebhookMessage(alertname, status string) *bytes.Buffer { + msg := ¬ify.WebhookMessage{ + Data: &template.Data{ + Receiver: "webhook", + Status: status, + Alerts: template.Alerts{ + template.Alert{ + Status: status, + Labels: template.KV{"dev": "sda3", "instance": "example4", "alertname": alertname}, + Annotations: template.KV{"description": "This is how to handle the alert"}, + StartsAt: time.Unix(1498614000, 0), + GeneratorURL: "http://generator.url/", + }, + }, + GroupLabels: template.KV{"alertname": alertname}, + CommonLabels: template.KV{"alertname": alertname}, + ExternalURL: "http://localhost:9093", + }, + Version: "4", + GroupKey: fmt.Sprintf("{}:{alertname=\"%s\"}", alertname), + } + if status == "resolved" { + msg.Data.Alerts[0].EndsAt = time.Unix(1498618000, 0) + } + b, _ := json.Marshal(msg) + return bytes.NewBuffer(b) + // return msg +} + +func createIssue(title, body string) *github.Issue { + return &github.Issue{ + Title: github.String(title), + Body: github.String(body), + } +} + +func TestReceiverHandler(t *testing.T) { + // Test: resolve an existing issue. + // * msg is "resolved" + // * issue returned by list + // * issue is closed + postBody := createWebhookMessage("DiskRunningFull", "resolved") + // Create a response recorder. + rw := httptest.NewRecorder() + // Create a synthetic request object for ServeHTTP. + req, err := http.NewRequest("POST", "/v1/receiver", postBody) + if err != nil { + t.Fatal(err) + } + + // Provide a pre-existing issue to close. + f := &fakeClient{ + listIssues: []*github.Issue{ + createIssue("DiskRunningFull", "body1"), + }, + } + handler := alerts.ReceiverHandler{f} + handler.ServeHTTP(rw, req) + resp := rw.Result() + + // Check the results. + body, _ := ioutil.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + t.Errorf("ReceiverHandler got %d; want %d", resp.StatusCode, http.StatusOK) + } + if f.closedIssue == nil { + t.Fatalf("ReceiverHandler failed to close issue") + } + if *f.closedIssue.Title != "DiskRunningFull" { + t.Errorf("ReceiverHandler closed wrong issue; got %q want \"DiskRunningFull\"", + *f.closedIssue.Title) + } + t.Logf("body: %s", body) + + // Test: create a new issue. + // * msg is "firing" + // * issue list is empty. + // * issue is created + postBody = createWebhookMessage("DiskRunningFull", "firing") + // Create a response recorder. + rw = httptest.NewRecorder() + // Create a synthetic request object for ServeHTTP. + req, err = http.NewRequest("POST", "/v1/receiver", postBody) + if err != nil { + t.Fatal(err) + } + + // No pre-existing issues to close. + f = &fakeClient{} + handler = alerts.ReceiverHandler{f} + handler.ServeHTTP(rw, req) + resp = rw.Result() + + // Check the results. + body, _ = ioutil.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + t.Errorf("ReceiverHandler got %d; want %d", resp.StatusCode, http.StatusOK) + } + if f.createdIssue == nil { + t.Fatalf("ReceiverHandler failed to close issue") + } + if *f.createdIssue.Title != "DiskRunningFull" { + t.Errorf("ReceiverHandler closed wrong issue; got %q want \"DiskRunningFull\"", + *f.closedIssue.Title) + } + t.Logf("body: %s", body) +} diff --git a/alerts/template.go b/alerts/template.go new file mode 100644 index 0000000..e2a5538 --- /dev/null +++ b/alerts/template.go @@ -0,0 +1,98 @@ +// Copyright 2017 alertmanager-github-receiver Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +////////////////////////////////////////////////////////////////////////////// + +package alerts + +import ( + "bytes" + "fmt" + "github.com/prometheus/alertmanager/notify" + "html/template" + "log" +) + +const ( + // alertMD reports all alert labels and annotations in a markdown format + // that renders correctly in github issues. + // + // Example: + // + // Alertmanager URL: http://localhost:9093 + // + // * firing + // + // Labels: + // + // - alertname = DiskRunningFull + // - dev = sda1 + // - instance = example1 + // + // Annotations: + // + // - test = value + // + // * firing + // + // Labels: + // + // - alertname = DiskRunningFull + // - dev = sda2 + // - instance = example2 + alertMD = ` +Alertmanager URL: {{.Data.ExternalURL}} +{{range .Data.Alerts}} + * {{.Status}} {{.GeneratorURL}} + {{if .Labels}} + Labels: + {{- end}} + {{range $key, $value := .Labels}} + - {{$key}} = {{$value -}} + {{end}} + {{if .Annotations}} + Annotations: + {{- end}} + {{range $key, $value := .Annotations}} + - {{$key}} = {{$value -}} + {{end}} +{{end}} + +TODO: add graph url from annotations. +` +) + +var ( + alertTemplate = template.Must(template.New("alert").Parse(alertMD)) +) + +func id(msg *notify.WebhookMessage) string { + return fmt.Sprintf("0x%x", msg.GroupKey) +} + +// formatTitle constructs an issue title from a webhook message. +func formatTitle(msg *notify.WebhookMessage) string { + return fmt.Sprintf("%s", msg.Data.GroupLabels["alertname"]) +} + +// formatIssueBody constructs an issue body from a webhook message. +func formatIssueBody(msg *notify.WebhookMessage) string { + var buf bytes.Buffer + err := alertTemplate.Execute(&buf, msg) + if err != nil { + log.Printf("Error executing template: %s", err) + return "" + } + s := buf.String() + return fmt.Sprintf("\n%s", id(msg), s) +} diff --git a/cmd/github_receiver/main.go b/cmd/github_receiver/main.go index 6a4b154..4d18eba 100644 --- a/cmd/github_receiver/main.go +++ b/cmd/github_receiver/main.go @@ -20,10 +20,10 @@ package main import ( "flag" "fmt" + "github.com/m-lab/alertmanager-github-receiver/alerts" + "github.com/m-lab/alertmanager-github-receiver/issues" "net/http" "os" - - "github.com/stephen-soltesz/alertmanager-github-receiver/issues" ) var ( @@ -51,8 +51,7 @@ func init() { func serveListener(client *issues.Client) { http.Handle("/", &issues.ListHandler{client}) - // TODO: enable alert receiver. - // http.Handle("/v1/receiver", &alerts.ReceiverHandler{client}) + http.Handle("/v1/receiver", &alerts.ReceiverHandler{client}) http.ListenAndServe(":9393", nil) } diff --git a/issues/handler_test.go b/issues/handler_test.go index 93ef500..1903ae1 100644 --- a/issues/handler_test.go +++ b/issues/handler_test.go @@ -21,7 +21,7 @@ import ( "net/http/httptest" "testing" - "github.com/stephen-soltesz/alertmanager-github-receiver/issues" + "github.com/m-lab/alertmanager-github-receiver/issues" ) type fakeClient struct { diff --git a/issues/issues_test.go b/issues/issues_test.go index 9aeeb61..0dae5e7 100644 --- a/issues/issues_test.go +++ b/issues/issues_test.go @@ -27,7 +27,7 @@ import ( "github.com/kr/pretty" "github.com/google/go-github/github" - "github.com/stephen-soltesz/alertmanager-github-receiver/issues" + "github.com/m-lab/alertmanager-github-receiver/issues" ) // Global vars for tests.