Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add prometheus alerts in support bundle #94

Merged
merged 2 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ require (
github.com/opencontainers/selinux v1.10.0 // indirect
github.com/pelletier/go-toml v1.9.4 // indirect
github.com/pquerna/cachecontrol v0.1.0 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9q
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
Expand Down Expand Up @@ -490,6 +491,7 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
Expand Down
125 changes: 107 additions & 18 deletions pkg/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package manager
import (
"archive/zip"
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
Expand All @@ -12,15 +13,14 @@ import (
"time"

"github.com/pkg/errors"
"github.com/rancher/wrangler/pkg/signals"
"github.com/sirupsen/logrus"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/rest"

"github.com/rancher/wrangler/pkg/signals"

"github.com/rancher/support-bundle-kit/pkg/manager/client"
"github.com/rancher/support-bundle-kit/pkg/types"
"github.com/rancher/support-bundle-kit/pkg/utils"
Expand Down Expand Up @@ -66,6 +66,11 @@ type SupportBundleManager struct {
expectedNodes map[string]string
}

type RunPhase struct {
Name types.ManagerPhase
Run func() error
}

func (m *SupportBundleManager) check() error {
if len(m.Namespaces) == 0 || len(m.Namespaces[0]) == 0 {
return errors.New("namespace is not specified")
Expand Down Expand Up @@ -108,10 +113,7 @@ func (m *SupportBundleManager) getBundlefilesize() (int64, error) {
}

func (m *SupportBundleManager) Run() error {
phases := []struct {
Name types.ManagerPhase
Run func() error
}{
requiredPhases := []RunPhase{
{
types.ManagerPhaseInit,
m.phaseInit,
Expand All @@ -120,10 +122,23 @@ func (m *SupportBundleManager) Run() error {
types.ManagerPhaseClusterBundle,
m.phaseCollectClusterBundle,
},

{
types.ManagerPhaseNodeBundle,
m.phaseCollectNodeBundles,
},
}

// optionalPhases should have independent phases
// if logic is dependent, put it into one function
optionalPhases := []RunPhase{
{
types.ManagerPhasePrometheusBundle,
m.phaseCollectPrometheusBundle,
},
}

postPhases := []RunPhase{
{
types.ManagerPhasePackaging,
m.phasePackaging,
Expand All @@ -134,21 +149,51 @@ func (m *SupportBundleManager) Run() error {
},
}

for i, phase := range phases {
logrus.Infof("Running phase %s", phase.Name)
m.status.SetPhase(phase.Name)
if err := phase.Run(); err != nil {
m.status.SetError(err.Error())
logrus.Errorf("Failed to run phase %s: %s", phase.Name, err.Error())
break
m.runAllPhases(requiredPhases, optionalPhases, postPhases)

<-m.context.Done()
return nil
}

func (m *SupportBundleManager) runAllPhases(requiredPhases []RunPhase, optionalPhases []RunPhase, postPhases []RunPhase) {
progressCount := 0
maxProgressCount := len(requiredPhases) + len(optionalPhases) + len(postPhases)

for _, phase := range requiredPhases {
if err := m.runPhase(phase, &progressCount, maxProgressCount); err != nil {
logrus.Errorf("Failed to run requiredPhases %s: %s", phase.Name, err.Error())
return
}
}

progress := 100 * (i + 1) / len(phases)
m.status.SetProgress(progress)
logrus.Infof("Succeed to run phase %s. Progress (%d).", phase.Name, progress)
for _, phase := range optionalPhases {
if err := m.runPhase(phase, &progressCount, maxProgressCount); err != nil {
logrus.Errorf("Failed to run optionalPhases %s: %s", phase.Name, err.Error())
// Since it's optional, don't return error.
c3y1huang marked this conversation as resolved.
Show resolved Hide resolved
continue
}
}

<-m.context.Done()
for _, phase := range postPhases {
if err := m.runPhase(phase, &progressCount, maxProgressCount); err != nil {
logrus.Errorf("Failed to run postPhases %s: %s", phase.Name, err.Error())
return
}
}
}

func (m *SupportBundleManager) runPhase(phase RunPhase, progressCount *int, maxProgressCount int) error {
logrus.Infof("Running phase %s", phase.Name)
m.status.SetPhase(phase.Name)
if err := phase.Run(); err != nil {
m.status.SetError(err.Error())
logrus.Errorf("Failed to run phase %s: %s", phase.Name, err.Error())
return err
}
*progressCount++
progress := 100 * (*progressCount) / maxProgressCount
m.status.SetProgress(progress)
logrus.Infof("Succeed to run phase %s. Progress (%d).", phase.Name, progress)
return nil
}

Expand Down Expand Up @@ -210,6 +255,50 @@ func (m *SupportBundleManager) phaseCollectClusterBundle() error {
return nil
}

func (m *SupportBundleManager) phaseCollectPrometheusBundle() error {
pods, err := m.k8s.GetPodsListByLabels("cattle-monitoring-system", "app.kubernetes.io/name=prometheus")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@c3y1huang Any objection to this feature? this feature adds a phase and checks as if the cluster has a Prometheus pod (especially run in the cattle-monitoring-system ns). If yes, it will try to extract the current alerts.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No objections since it's non-blocking (optionalPhase). Additionally, Longhorn could potentially benefit from this.

if err != nil {
if apierrors.IsNotFound(err) {
logrus.Info("prometheus pods not found")
return nil
}

return errors.Wrap(err, "failed to get prometheus pods")
}

if len(pods.Items) == 0 {
logrus.Info("prometheus pods not found")
return nil
}

if len(pods.Items) > 1 {
return fmt.Errorf("multiple %d prometheus pods found", len(pods.Items))
}

targetPod := pods.Items[0]
p, err := utils.NewPrometheus(targetPod.Status.PodIP)
if err != nil {
logrus.Debugf("host: %s, port: %d", targetPod.Status.PodIP, utils.PrometheusPort)
return errors.Wrap(err, "failed to new prometheus")
}

alerts, err := p.GetAlerts(m.context)
if err != nil {
return errors.Wrap(err, "failed to get prometheus alert")
}

b, err := json.MarshalIndent(alerts, "", "\t")
if err != nil {
return errors.Wrap(err, "failed to marshal prometheus alert")
}

if err := os.WriteFile(fmt.Sprintf("%s/prometheus-alerts.json", m.getWorkingDir()), b, 0644); err != nil {
return errors.Wrap(err, "failed to write prometheus alert")
}

return nil
}

func (m *SupportBundleManager) phaseCollectNodeBundles() error {
err := m.collectNodeBundles()
if err != nil {
Expand Down
11 changes: 6 additions & 5 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ const (
type ManagerPhase string

const (
ManagerPhaseInit = ManagerPhase("init")
ManagerPhaseClusterBundle = ManagerPhase("cluster bundle")
ManagerPhaseNodeBundle = ManagerPhase("node bundle")
ManagerPhasePackaging = ManagerPhase("package")
ManagerPhaseDone = ManagerPhase("done")
ManagerPhaseInit = ManagerPhase("init")
ManagerPhaseClusterBundle = ManagerPhase("cluster bundle")
ManagerPhasePrometheusBundle = ManagerPhase("prometheus bundle")
ManagerPhaseNodeBundle = ManagerPhase("node bundle")
ManagerPhasePackaging = ManagerPhase("package")
ManagerPhaseDone = ManagerPhase("done")
)

type ManagerStatus struct {
Expand Down
37 changes: 37 additions & 0 deletions pkg/utils/prometheus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package utils

import (
"context"
"fmt"

"github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/api/prometheus/v1"
)

const PrometheusPort = 9090

type Prometheus struct {
api v1.API
}

func NewPrometheus(host string) (*Prometheus, error) {
client, err := api.NewClient(api.Config{
Address: fmt.Sprintf("http://%s:%d", host, PrometheusPort),
})

if err != nil {
return nil, err
}

return &Prometheus{api: v1.NewAPI(client)}, nil
}

func (p *Prometheus) GetAlerts(ctx context.Context) ([]v1.Alert, error) {
result, err := p.api.Alerts(ctx)

if err != nil {
return []v1.Alert{}, err
}

return result.Alerts, nil
}
Loading