From e2b552d6951eb802969e9023f7145bbb78180cf5 Mon Sep 17 00:00:00 2001 From: Oded Ben Ozer Date: Mon, 24 Jun 2024 13:54:10 +0200 Subject: [PATCH] Handle/mitigate crash and context issues (#11) * Use new context (not a child) * Remove calls to a function that exits the process. * Add more "context"(info, like for humans) to errors * Address another err with no context issue * Respond the webhook only based on payload parsing. Actual Event processing is move to background thread. This require some refactoring, created ReciveWebhook and ReciveEventFile functions to represent the different behavior in Web Server VS CLI triggering while keeping to the GH stuff in the GH package * Cancel whole drift work on context deadline * Move error function return value to the standard position Handle cases where GetContents returns nil HTTP response (like in Context cancellation) --- cmd/telefonistka/bump-version-overwrite.go | 2 +- cmd/telefonistka/bump-version-regex.go | 2 +- cmd/telefonistka/bump-version-yaml.go | 2 +- cmd/telefonistka/event.go | 25 +------- cmd/telefonistka/server.go | 11 ++-- internal/pkg/argocd/argocd.go | 24 +++++--- .../pkg/argocd/argocd_copied_from_upstream.go | 22 ++++--- internal/pkg/githubapi/github.go | 60 ++++++++++++++++--- internal/pkg/githubapi/promotion.go | 3 + 9 files changed, 94 insertions(+), 57 deletions(-) diff --git a/cmd/telefonistka/bump-version-overwrite.go b/cmd/telefonistka/bump-version-overwrite.go index fa6b4f5..e2df86d 100644 --- a/cmd/telefonistka/bump-version-overwrite.go +++ b/cmd/telefonistka/bump-version-overwrite.go @@ -73,7 +73,7 @@ func bumpVersionOverwrite(targetRepo string, targetFile string, file string, git ghPrClientDetails.PrLogger = log.WithFields(log.Fields{}) // TODO what fields should be here? defaultBranch, _ := ghPrClientDetails.GetDefaultBranch() - initialFileContent, err, statusCode := githubapi.GetFileContent(ghPrClientDetails, defaultBranch, targetFile) + initialFileContent, statusCode, err := githubapi.GetFileContent(ghPrClientDetails, defaultBranch, targetFile) if statusCode == 404 { ghPrClientDetails.PrLogger.Infof("File %s was not found\n", targetFile) } else if err != nil { diff --git a/cmd/telefonistka/bump-version-regex.go b/cmd/telefonistka/bump-version-regex.go index 0e9fa25..fc3d50f 100644 --- a/cmd/telefonistka/bump-version-regex.go +++ b/cmd/telefonistka/bump-version-regex.go @@ -71,7 +71,7 @@ func bumpVersionRegex(targetRepo string, targetFile string, regex string, replac r := regexp.MustCompile(regex) defaultBranch, _ := ghPrClientDetails.GetDefaultBranch() - initialFileContent, err, _ := githubapi.GetFileContent(ghPrClientDetails, defaultBranch, targetFile) + initialFileContent, _, err := githubapi.GetFileContent(ghPrClientDetails, defaultBranch, targetFile) if err != nil { ghPrClientDetails.PrLogger.Errorf("Fail to fetch file content:%s\n", err) os.Exit(1) diff --git a/cmd/telefonistka/bump-version-yaml.go b/cmd/telefonistka/bump-version-yaml.go index 7642662..c130b77 100644 --- a/cmd/telefonistka/bump-version-yaml.go +++ b/cmd/telefonistka/bump-version-yaml.go @@ -74,7 +74,7 @@ func bumpVersionYaml(targetRepo string, targetFile string, address string, value defaultBranch, _ := ghPrClientDetails.GetDefaultBranch() - initialFileContent, err, _ := githubapi.GetFileContent(ghPrClientDetails, defaultBranch, targetFile) + initialFileContent, _, err := githubapi.GetFileContent(ghPrClientDetails, defaultBranch, targetFile) if err != nil { ghPrClientDetails.PrLogger.Errorf("Fail to fetch file content:%s\n", err) os.Exit(1) diff --git a/cmd/telefonistka/event.go b/cmd/telefonistka/event.go index 885fb42..3ff94ca 100644 --- a/cmd/telefonistka/event.go +++ b/cmd/telefonistka/event.go @@ -1,14 +1,9 @@ package telefonistka import ( - "bytes" - "context" - "io" - "net/http" "os" lru "github.com/hashicorp/golang-lru/v2" - log "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/wayfair-incubator/telefonistka/internal/pkg/githubapi" ) @@ -32,27 +27,9 @@ func init() { //nolint:gochecknoinits } func event(eventType string, eventFilePath string) { - ctx := context.Background() - - log.Infof("Event type: %s", eventType) - log.Infof("Proccesing file: %s", eventFilePath) - - payload, err := os.ReadFile(eventFilePath) - if err != nil { - panic(err) - } - - // To use the same code path as for Webhook I'm creating an http request with the payload from the file. - // This might not be very smart. - - h, _ := http.NewRequest("POST", "", nil) //nolint:noctx - h.Body = io.NopCloser(bytes.NewReader(payload)) - h.Header.Set("Content-Type", "application/json") - h.Header.Set("X-GitHub-Event", eventType) - mainGhClientCache, _ := lru.New[string, githubapi.GhClientPair](128) prApproverGhClientCache, _ := lru.New[string, githubapi.GhClientPair](128) - githubapi.HandleEvent(h, ctx, mainGhClientCache, prApproverGhClientCache, nil) + githubapi.ReciveEventFile(eventFilePath, eventType, mainGhClientCache, prApproverGhClientCache) } func getEnv(key, fallback string) string { diff --git a/cmd/telefonistka/server.go b/cmd/telefonistka/server.go index 6f53483..076b28f 100644 --- a/cmd/telefonistka/server.go +++ b/cmd/telefonistka/server.go @@ -1,7 +1,6 @@ package telefonistka import ( - "context" "net/http" "os" "time" @@ -39,9 +38,13 @@ func init() { //nolint:gochecknoinits func handleWebhook(githubWebhookSecret []byte, mainGhClientCache *lru.Cache[string, githubapi.GhClientPair], prApproverGhClientCache *lru.Cache[string, githubapi.GhClientPair]) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) - defer cancel() - githubapi.HandleEvent(r, ctx, mainGhClientCache, prApproverGhClientCache, githubWebhookSecret) + err := githubapi.ReciveWebhook(r, mainGhClientCache, prApproverGhClientCache, githubWebhookSecret) + if err != nil { + log.Errorf("error handling webhook: %v", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) } } diff --git a/internal/pkg/argocd/argocd.go b/internal/pkg/argocd/argocd.go index 14ebc50..dc746f7 100644 --- a/internal/pkg/argocd/argocd.go +++ b/internal/pkg/argocd/argocd.go @@ -51,7 +51,7 @@ type DiffResult struct { func generateArgocdAppDiff(ctx context.Context, app *argoappv1.Application, proj *argoappv1.AppProject, resources *application.ManagedResourcesResponse, argoSettings *settings.Settings, diffOptions *DifferenceOption) (foundDiffs bool, diffElements []DiffElement, err error) { liveObjs, err := cmdutil.LiveObjects(resources.Items) if err != nil { - return false, nil, err + return false, nil, fmt.Errorf("Failed to get live objects: %v", err) } items := make([]objKeyLiveTarget, 0) @@ -59,12 +59,18 @@ func generateArgocdAppDiff(ctx context.Context, app *argoappv1.Application, proj for _, mfst := range diffOptions.res.Manifests { obj, err := argoappv1.UnmarshalToUnstructured(mfst) if err != nil { - return false, nil, err + return false, nil, fmt.Errorf("Failed to unmarshal manifest: %v", err) } unstructureds = append(unstructureds, obj) } - groupedObjs := groupObjsByKey(unstructureds, liveObjs, app.Spec.Destination.Namespace) - items = groupObjsForDiff(resources, groupedObjs, items, argoSettings, app.InstanceName(argoSettings.ControllerNamespace), app.Spec.Destination.Namespace) + groupedObjs, err := groupObjsByKey(unstructureds, liveObjs, app.Spec.Destination.Namespace) + if err != nil { + return false, nil, fmt.Errorf("Failed to group objects by key: %v", err) + } + items, err = groupObjsForDiff(resources, groupedObjs, items, argoSettings, app.InstanceName(argoSettings.ControllerNamespace), app.Spec.Destination.Namespace) + if err != nil { + return false, nil, fmt.Errorf("Failed to group objects for diff: %v", err) + } for _, item := range items { var diffElement DiffElement @@ -85,11 +91,11 @@ func generateArgocdAppDiff(ctx context.Context, app *argoappv1.Application, proj WithNoCache(). Build() if err != nil { - return false, nil, err + return false, nil, fmt.Errorf("Failed to build diff config: %v", err) } diffRes, err := argodiff.StateDiff(item.live, item.target, diffConfig) if err != nil { - return false, nil, err + return false, nil, fmt.Errorf("Failed to diff objects: %v", err) } if diffRes.Modified || item.target == nil || item.live == nil { @@ -105,7 +111,7 @@ func generateArgocdAppDiff(ctx context.Context, app *argoappv1.Application, proj live = item.live err = json.Unmarshal(diffRes.PredictedLive, target) if err != nil { - return false, nil, err + return false, nil, fmt.Errorf("Failed to unmarshal predicted live object: %v", err) } } else { live = item.live @@ -117,7 +123,7 @@ func generateArgocdAppDiff(ctx context.Context, app *argoappv1.Application, proj diffElement.Diff, err = diffLiveVsTargetObject(live, target) if err != nil { - return false, nil, err + return false, nil, fmt.Errorf("Failed to diff live objects: %v", err) } } diffElements = append(diffElements, diffElement) @@ -151,7 +157,7 @@ func createArgoCdClient() (apiclient.Client, error) { clientset, err := apiclient.NewClient(opts) if err != nil { - return nil, err + return nil, fmt.Errorf("Error creating ArgoCD API client: %v", err) } return clientset, nil } diff --git a/internal/pkg/argocd/argocd_copied_from_upstream.go b/internal/pkg/argocd/argocd_copied_from_upstream.go index 303479e..d78b6b8 100644 --- a/internal/pkg/argocd/argocd_copied_from_upstream.go +++ b/internal/pkg/argocd/argocd_copied_from_upstream.go @@ -2,6 +2,7 @@ package argocd import ( "encoding/json" + "fmt" "github.com/argoproj/argo-cd/v2/controller" "github.com/argoproj/argo-cd/v2/pkg/apiclient/application" @@ -9,7 +10,6 @@ import ( argoappv1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" repoapiclient "github.com/argoproj/argo-cd/v2/reposerver/apiclient" "github.com/argoproj/argo-cd/v2/util/argo" - "github.com/argoproj/argo-cd/v2/util/errors" "github.com/argoproj/gitops-engine/pkg/sync/hook" "github.com/argoproj/gitops-engine/pkg/sync/ignore" "github.com/argoproj/gitops-engine/pkg/utils/kube" @@ -42,7 +42,7 @@ func (p *resourceInfoProvider) IsNamespaced(gk schema.GroupKind) (bool, error) { // This function creates a map of objects by key(object name/kind/ns) from the rendered manifests. // That map is used to compare the objects in the application with the objects in the cluster. // copied from https://github.com/argoproj/argo-cd/blob/4f6a8dce80f0accef7ed3b5510e178a6b398b331/cmd/argocd/commands/app.go#L1091-L1109 -func groupObjsByKey(localObs []*unstructured.Unstructured, liveObjs []*unstructured.Unstructured, appNamespace string) map[kube.ResourceKey]*unstructured.Unstructured { +func groupObjsByKey(localObs []*unstructured.Unstructured, liveObjs []*unstructured.Unstructured, appNamespace string) (map[kube.ResourceKey]*unstructured.Unstructured, error) { namespacedByGk := make(map[schema.GroupKind]bool) for i := range liveObjs { if liveObjs[i] != nil { @@ -51,7 +51,9 @@ func groupObjsByKey(localObs []*unstructured.Unstructured, liveObjs []*unstructu } } localObs, _, err := controller.DeduplicateTargetObjects(appNamespace, localObs, &resourceInfoProvider{namespacedByGk: namespacedByGk}) - errors.CheckError(err) + if err != nil { + return nil, fmt.Errorf("Failed to DeDuplicate target objects: %v", err) + } objByKey := make(map[kube.ResourceKey]*unstructured.Unstructured) for i := range localObs { obj := localObs[i] @@ -59,17 +61,19 @@ func groupObjsByKey(localObs []*unstructured.Unstructured, liveObjs []*unstructu objByKey[kube.GetResourceKey(obj)] = obj } } - return objByKey + return objByKey, nil } // This function create a slice of objects to be "diff'ed", each element contains the key, live(in-cluster API state) and target(rended manifest from git) object. // Copied from https://github.com/argoproj/argo-cd/blob/4f6a8dce80f0accef7ed3b5510e178a6b398b331/cmd/argocd/commands/app.go#L1341-L1372 -func groupObjsForDiff(resources *application.ManagedResourcesResponse, objs map[kube.ResourceKey]*unstructured.Unstructured, items []objKeyLiveTarget, argoSettings *settings.Settings, appName, namespace string) []objKeyLiveTarget { +func groupObjsForDiff(resources *application.ManagedResourcesResponse, objs map[kube.ResourceKey]*unstructured.Unstructured, items []objKeyLiveTarget, argoSettings *settings.Settings, appName, namespace string) ([]objKeyLiveTarget, error) { resourceTracking := argo.NewResourceTracking() for _, res := range resources.Items { live := &unstructured.Unstructured{} err := json.Unmarshal([]byte(res.NormalizedLiveState), &live) - errors.CheckError(err) + if err != nil { + return nil, fmt.Errorf("Failed to unmarshal live object(%v): %v", res.Name, err) + } key := kube.ResourceKey{Name: res.Name, Namespace: res.Namespace, Group: res.Group, Kind: res.Kind} if key.Kind == kube.SecretKind && key.Group == "" { @@ -80,7 +84,9 @@ func groupObjsForDiff(resources *application.ManagedResourcesResponse, objs map[ if local, ok := objs[key]; ok || live != nil { if local != nil && !kube.IsCRD(local) { err = resourceTracking.SetAppInstance(local, argoSettings.AppLabelKey, appName, namespace, argoappv1.TrackingMethod(argoSettings.GetTrackingMethod())) - errors.CheckError(err) + if err != nil { + return nil, fmt.Errorf("Failed to set app instance label: %v", err) + } } items = append(items, objKeyLiveTarget{key, live, local}) @@ -95,5 +101,5 @@ func groupObjsForDiff(resources *application.ManagedResourcesResponse, objs map[ } items = append(items, objKeyLiveTarget{key, nil, local}) } - return items + return items, nil } diff --git a/internal/pkg/githubapi/github.go b/internal/pkg/githubapi/github.go index e5136b6..bf75c41 100644 --- a/internal/pkg/githubapi/github.go +++ b/internal/pkg/githubapi/github.go @@ -8,12 +8,15 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io" "net/http" + "os" "path" "regexp" "sort" "strings" "text/template" + "time" "github.com/cenkalti/backoff/v4" "github.com/google/go-github/v62/github" @@ -185,12 +188,36 @@ func HandlePREvent(eventPayload *github.PullRequestEvent, ghPrClientDetails GhPr } } -func HandleEvent(r *http.Request, ctx context.Context, mainGhClientCache *lru.Cache[string, GhClientPair], prApproverGhClientCache *lru.Cache[string, GhClientPair], githubWebhookSecret []byte) { +// ReciveEventFile this one is similar to ReciveWebhook but it's used for CLI triggering, i simulates a webhook event to use the same code path as the webhook handler. +func ReciveEventFile(eventType string, eventFilePath string, mainGhClientCache *lru.Cache[string, GhClientPair], prApproverGhClientCache *lru.Cache[string, GhClientPair]) { + log.Infof("Event type: %s", eventType) + log.Infof("Proccesing file: %s", eventFilePath) + + payload, err := os.ReadFile(eventFilePath) + if err != nil { + panic(err) + } + eventPayloadInterface, err := github.ParseWebHook(eventType, payload) + if err != nil { + log.Errorf("could not parse webhook: err=%s\n", err) + prom.InstrumentWebhookHit("parsing_failed") + return + } + r, _ := http.NewRequest("POST", "", nil) //nolint:noctx + r.Body = io.NopCloser(bytes.NewReader(payload)) + r.Header.Set("Content-Type", "application/json") + r.Header.Set("X-GitHub-Event", eventType) + + handleEvent(eventPayloadInterface, mainGhClientCache, prApproverGhClientCache, r, payload, eventType) +} + +// ReciveWebhook is the main entry point for the webhook handling it starts parases the webhook payload and start a thread to handle the event success/failure are dependant on the payload parsing only +func ReciveWebhook(r *http.Request, mainGhClientCache *lru.Cache[string, GhClientPair], prApproverGhClientCache *lru.Cache[string, GhClientPair], githubWebhookSecret []byte) error { payload, err := github.ValidatePayload(r, githubWebhookSecret) if err != nil { log.Errorf("error reading request body: err=%s\n", err) prom.InstrumentWebhookHit("validation_failed") - return + return err } eventType := github.WebHookType(r) @@ -198,9 +225,19 @@ func HandleEvent(r *http.Request, ctx context.Context, mainGhClientCache *lru.Ca if err != nil { log.Errorf("could not parse webhook: err=%s\n", err) prom.InstrumentWebhookHit("parsing_failed") - return + return err } prom.InstrumentWebhookHit("successful") + + go handleEvent(eventPayloadInterface, mainGhClientCache, prApproverGhClientCache, r, payload, eventType) + return nil +} + +func handleEvent(eventPayloadInterface interface{}, mainGhClientCache *lru.Cache[string, GhClientPair], prApproverGhClientCache *lru.Cache[string, GhClientPair], r *http.Request, payload []byte, eventType string) { + // We don't use the request context as it might have a short deadline and we don't want to stop event handling based on that + // But we do want to stop the event handling after a certain point, so: + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() var mainGithubClientPair GhClientPair var approverGithubClientPair GhClientPair @@ -971,7 +1008,7 @@ func ApprovePr(approverClient *github.Client, ghPrClientDetails GhPrClientDetail } func GetInRepoConfig(ghPrClientDetails GhPrClientDetails, defaultBranch string) (*cfg.Config, error) { - inRepoConfigFileContentString, err, _ := GetFileContent(ghPrClientDetails, defaultBranch, "telefonistka.yaml") + inRepoConfigFileContentString, _, err := GetFileContent(ghPrClientDetails, defaultBranch, "telefonistka.yaml") if err != nil { ghPrClientDetails.PrLogger.Errorf("Could not get in-repo configuration: err=%s\n", err) return nil, err @@ -983,18 +1020,23 @@ func GetInRepoConfig(ghPrClientDetails GhPrClientDetails, defaultBranch string) return c, err } -func GetFileContent(ghPrClientDetails GhPrClientDetails, branch string, filePath string) (string, error, int) { +func GetFileContent(ghPrClientDetails GhPrClientDetails, branch string, filePath string) (string, int, error) { rGetContentOps := github.RepositoryContentGetOptions{Ref: branch} fileContent, _, resp, err := ghPrClientDetails.GhClientPair.v3Client.Repositories.GetContents(ghPrClientDetails.Ctx, ghPrClientDetails.Owner, ghPrClientDetails.Repo, filePath, &rGetContentOps) - prom.InstrumentGhCall(resp) if err != nil { ghPrClientDetails.PrLogger.Errorf("Fail to get file:%s\n%v\n", err, resp) - return "", err, resp.StatusCode + if resp == nil { + return "", 0, err + } + prom.InstrumentGhCall(resp) + return "", resp.StatusCode, err + } else { + prom.InstrumentGhCall(resp) } fileContentString, err := fileContent.GetContent() if err != nil { ghPrClientDetails.PrLogger.Errorf("Fail to serlize file:%s\n", err) - return "", err, resp.StatusCode + return "", resp.StatusCode, err } - return fileContentString, nil, resp.StatusCode + return fileContentString, resp.StatusCode, nil } diff --git a/internal/pkg/githubapi/promotion.go b/internal/pkg/githubapi/promotion.go index 44ebd07..b9b013f 100644 --- a/internal/pkg/githubapi/promotion.go +++ b/internal/pkg/githubapi/promotion.go @@ -51,6 +51,9 @@ func contains(s []string, str string) bool { } func DetectDrift(ghPrClientDetails GhPrClientDetails) error { + if ghPrClientDetails.Ctx.Err() != nil { + return ghPrClientDetails.Ctx.Err() + } diffOutputMap := make(map[string]string) defaultBranch, _ := ghPrClientDetails.GetDefaultBranch() config, err := GetInRepoConfig(ghPrClientDetails, defaultBranch)