From 4d1f96fdb36038ff63bb0b9addb825a782d54dce Mon Sep 17 00:00:00 2001 From: Jack Lindamood Date: Mon, 11 Mar 2024 15:53:23 -0700 Subject: [PATCH] fix: fix for temporary errors (#201) * fix: fix for temporary errors The atlantis API isn't very graceful when errors happen. Best we can do is log it and move on. * fix: ready for v1 --- .github/workflows/build.yml | 2 +- action.yml | 2 +- cmd/atlantis-drift-detection/main.go | 13 +++++++++---- internal/atlantis/client.go | 20 +++++++++++++++++--- internal/notification/multi.go | 9 +++++++++ internal/notification/notification.go | 2 ++ internal/notification/slackwebhook.go | 4 ++++ internal/notification/workflow.go | 12 +++++++++--- internal/notification/zap.go | 6 ++++++ 9 files changed, 58 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 266ed1b9..d7c65c67 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,7 +54,7 @@ jobs: type=ref,event=branch type=ref,event=tag type=ref,event=pr - type=semver,pattern={{major}},enable=${{ github.event_name == 'push' && contains(github.ref, 'refs/tags/') }} + type=semver,pattern=v{{major}},enable=${{ github.event_name == 'push' && contains(github.ref, 'refs/tags/') }} images: | ghcr.io/cresta/atlantis-drift-detection - name: Build and push diff --git a/action.yml b/action.yml index 7dac7ecd..d217e7ce 100644 --- a/action.yml +++ b/action.yml @@ -7,4 +7,4 @@ runs: using: 'docker' # TODO: Figure out a way to auto update this. It's very useful for speeding up the action to not have it build the # container each run - image: 'docker://ghcr.io/cresta/atlantis-drift-detection:v0.0.11' \ No newline at end of file + image: 'docker://ghcr.io/cresta/atlantis-drift-detection:v1' \ No newline at end of file diff --git a/cmd/atlantis-drift-detection/main.go b/cmd/atlantis-drift-detection/main.go index 2bec8852..e71dc18d 100644 --- a/cmd/atlantis-drift-detection/main.go +++ b/cmd/atlantis-drift-detection/main.go @@ -3,6 +3,10 @@ package main import ( "context" "fmt" + "net/http" + "os" + "time" + "github.com/cresta/atlantis-drift-detection/internal/atlantis" "github.com/cresta/atlantis-drift-detection/internal/drifter" "github.com/cresta/atlantis-drift-detection/internal/notification" @@ -11,9 +15,6 @@ import ( "github.com/cresta/gogit" "github.com/cresta/gogithub" "github.com/joho/godotenv" - "net/http" - "os" - "time" // Empty import allows pinning to version atlantis uses _ "github.com/nlopes/slack" @@ -100,7 +101,11 @@ func main() { logger.Info("setting up slack webhook notification") notif.Notifications = append(notif.Notifications, slackClient) } - ghClient, err := gogithub.NewGQLClient(ctx, logger, nil) + var existingConfig *gogithub.NewGQLClientConfig + if os.Getenv("GITHUB_TOKEN") != "" { + existingConfig = &gogithub.NewGQLClientConfig{Token: os.Getenv("GITHUB_TOKEN")} + } + ghClient, err := gogithub.NewGQLClient(ctx, logger, existingConfig) if err != nil { logger.Panic("failed to create github client", zap.Error(err)) } diff --git a/internal/atlantis/client.go b/internal/atlantis/client.go index 9f07af13..324b781e 100644 --- a/internal/atlantis/client.go +++ b/internal/atlantis/client.go @@ -5,11 +5,12 @@ import ( "context" "encoding/json" "fmt" - "github.com/runatlantis/atlantis/server/controllers" - "github.com/runatlantis/atlantis/server/events/command" "io" "net/http" "strings" + + "github.com/runatlantis/atlantis/server/controllers" + "github.com/runatlantis/atlantis/server/events/command" ) type Client struct { @@ -65,6 +66,10 @@ type TemporaryError interface { error } +type errorResponse struct { + Error string `json:"error"` +} + func (p *possiblyTemporaryError) Temporary() bool { return true } @@ -107,11 +112,20 @@ func (c *Client) PlanSummary(ctx context.Context, req *PlanSummaryRequest) (*Pla if err := resp.Body.Close(); err != nil { return nil, fmt.Errorf("unable to close response body: %w", err) } + if resp.StatusCode == http.StatusUnauthorized { + var errResp errorResponse + if err := json.NewDecoder(&fullBody).Decode(&errResp); err != nil { + return nil, fmt.Errorf("unauthorized request to %s: %w", destination, err) + } + return nil, fmt.Errorf("unauthorized request to %s: %s", destination, errResp.Error) + } var bodyResult command.Result if err := json.NewDecoder(&fullBody).Decode(&bodyResult); err != nil { retErr := fmt.Errorf("error decoding plan response(code:%d)(status:%s)(body:%s): %w", resp.StatusCode, resp.Status, fullBody.String(), err) - if resp.StatusCode == http.StatusServiceUnavailable { + if resp.StatusCode == http.StatusServiceUnavailable || resp.StatusCode == http.StatusInternalServerError { + // This is a bit of a hack, but atlantis sometimes returns errors we can't fully process. These could be + // because the workspace won't apply, or because the service is just overloaded. We cannot tell. return nil, &possiblyTemporaryError{retErr} } return nil, retErr diff --git a/internal/notification/multi.go b/internal/notification/multi.go index 26b92495..2e93b5da 100644 --- a/internal/notification/multi.go +++ b/internal/notification/multi.go @@ -6,6 +6,15 @@ type Multi struct { Notifications []Notification } +func (m *Multi) TemporaryError(ctx context.Context, dir string, workspace string, err error) error { + for _, n := range m.Notifications { + if err := n.TemporaryError(ctx, dir, workspace, err); err != nil { + return err + } + } + return nil +} + func (m *Multi) ExtraWorkspaceInRemote(ctx context.Context, dir string, workspace string) error { for _, n := range m.Notifications { if err := n.ExtraWorkspaceInRemote(ctx, dir, workspace); err != nil { diff --git a/internal/notification/notification.go b/internal/notification/notification.go index e8c4cada..2f65d3c7 100644 --- a/internal/notification/notification.go +++ b/internal/notification/notification.go @@ -22,4 +22,6 @@ type Notification interface { ExtraWorkspaceInRemote(ctx context.Context, dir string, workspace string) error MissingWorkspaceInRemote(ctx context.Context, dir string, workspace string) error PlanDrift(ctx context.Context, dir string, workspace string) error + // TemporaryError is called when an error occurs but we can't really tell what it means + TemporaryError(ctx context.Context, dir string, workspace string, err error) error } diff --git a/internal/notification/slackwebhook.go b/internal/notification/slackwebhook.go index aadfd50a..790b86b3 100644 --- a/internal/notification/slackwebhook.go +++ b/internal/notification/slackwebhook.go @@ -13,6 +13,10 @@ type SlackWebhook struct { HTTPClient *http.Client } +func (s *SlackWebhook) TemporaryError(ctx context.Context, dir string, workspace string, err error) error { + return s.sendSlackMessage(ctx, fmt.Sprintf("Unknown error in remote\nDirectory: %s\nWorkspace: %s\nError: %s", dir, workspace, err.Error())) +} + func NewSlackWebhook(webhookURL string, HTTPClient *http.Client) *SlackWebhook { if webhookURL == "" { return nil diff --git a/internal/notification/workflow.go b/internal/notification/workflow.go index 87d33bfb..70f685f0 100644 --- a/internal/notification/workflow.go +++ b/internal/notification/workflow.go @@ -2,8 +2,9 @@ package notification import ( "context" - "github.com/cresta/gogithub" "sync" + + "github.com/cresta/gogithub" ) func NewWorkflow(ghClient gogithub.GitHub, owner string, repo string, id string, ref string) *Workflow { @@ -30,11 +31,16 @@ type Workflow struct { directoriesDone map[string]struct{} } -func (w *Workflow) ExtraWorkspaceInRemote(ctx context.Context, dir string, workspace string) error { +func (w *Workflow) TemporaryError(_ context.Context, _ string, _ string, _ error) error { + // Ignored + return nil +} + +func (w *Workflow) ExtraWorkspaceInRemote(_ context.Context, _ string, _ string) error { return nil } -func (w *Workflow) MissingWorkspaceInRemote(ctx context.Context, dir string, workspace string) error { +func (w *Workflow) MissingWorkspaceInRemote(_ context.Context, _ string, _ string) error { return nil } diff --git a/internal/notification/zap.go b/internal/notification/zap.go index 7ca8206b..f8dc4daa 100644 --- a/internal/notification/zap.go +++ b/internal/notification/zap.go @@ -2,6 +2,7 @@ package notification import ( "context" + "go.uber.org/zap" ) @@ -9,6 +10,11 @@ type Zap struct { Logger *zap.Logger } +func (I *Zap) TemporaryError(_ context.Context, dir string, workspace string, err error) error { + I.Logger.Error("Unknown error in remote", zap.String("dir", dir), zap.String("workspace", workspace), zap.Error(err)) + return nil +} + func (I *Zap) PlanDrift(_ context.Context, dir string, workspace string) error { I.Logger.Info("Plan has drifted", zap.String("dir", dir), zap.String("workspace", workspace)) return nil