From b54dd5971b513b31d4119448804e5b11ca2da4e9 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Thu, 25 Jan 2024 16:16:51 +0800 Subject: [PATCH 1/2] feat: add prometheus alerts in support bundle Signed-off-by: Jack Yu --- go.mod | 2 +- go.sum | 2 + pkg/manager/manager.go | 54 +- pkg/types/types.go | 11 +- pkg/utils/prometheus.go | 37 + .../prometheus/client_golang/api/client.go | 156 +++ .../client_golang/api/prometheus/v1/api.go | 1198 +++++++++++++++++ vendor/modules.txt | 2 + 8 files changed, 1453 insertions(+), 9 deletions(-) create mode 100644 pkg/utils/prometheus.go create mode 100644 vendor/github.com/prometheus/client_golang/api/client.go create mode 100644 vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go diff --git a/go.mod b/go.mod index 2f23f996..5777caa1 100644 --- a/go.mod +++ b/go.mod @@ -182,7 +182,7 @@ require ( github.com/opencontainers/selinux v1.10.0 // indirect github.com/pelletier/go-toml v1.9.4 // indirect github.com/pquerna/cachecontrol v0.1.0 // indirect - github.com/prometheus/client_golang v1.14.0 // indirect + github.com/prometheus/client_golang v1.14.0 github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect diff --git a/go.sum b/go.sum index 0af43e97..c758ced8 100644 --- a/go.sum +++ b/go.sum @@ -417,6 +417,7 @@ github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9q github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -490,6 +491,7 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 87e7b147..18e53bfc 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -3,6 +3,7 @@ package manager import ( "archive/zip" "context" + "encoding/json" "fmt" "os" "os/exec" @@ -12,15 +13,14 @@ import ( "time" "github.com/pkg/errors" + "github.com/rancher/wrangler/pkg/signals" "github.com/sirupsen/logrus" - appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/rest" - "github.com/rancher/wrangler/pkg/signals" - "github.com/rancher/support-bundle-kit/pkg/manager/client" "github.com/rancher/support-bundle-kit/pkg/types" "github.com/rancher/support-bundle-kit/pkg/utils" @@ -120,6 +120,10 @@ func (m *SupportBundleManager) Run() error { types.ManagerPhaseClusterBundle, m.phaseCollectClusterBundle, }, + { + types.ManagerPhasePrometheusBundle, + m.phaseCollectPrometheusBundle, + }, { types.ManagerPhaseNodeBundle, m.phaseCollectNodeBundles, @@ -210,6 +214,50 @@ func (m *SupportBundleManager) phaseCollectClusterBundle() error { return nil } +func (m *SupportBundleManager) phaseCollectPrometheusBundle() error { + pods, err := m.k8s.GetPodsListByLabels("cattle-monitoring-system", "app.kubernetes.io/name=prometheus") + if err != nil { + if apierrors.IsNotFound(err) { + logrus.Info("prometheus pods not found") + return nil + } + + return errors.Wrap(err, "failed to get prometheus pods") + } + + if len(pods.Items) == 0 { + logrus.Info("prometheus pods not found") + return nil + } + + if len(pods.Items) > 1 { + return fmt.Errorf("multiple %d prometheus pods found", len(pods.Items)) + } + + targetPod := pods.Items[0] + p, err := utils.NewPrometheus(targetPod.Status.PodIP) + if err != nil { + logrus.Debugf("host: %s, port: %d", targetPod.Status.PodIP, utils.PrometheusPort) + return errors.Wrap(err, "failed to new prometheus") + } + + alerts, err := p.GetAlerts(m.context) + if err != nil { + return errors.Wrap(err, "failed to get prometheus alert") + } + + b, err := json.MarshalIndent(alerts, "", "\t") + if err != nil { + return errors.Wrap(err, "failed to marshal prometheus alert") + } + + if err := os.WriteFile(fmt.Sprintf("%s/prometheus-alerts.json", m.getWorkingDir()), b, 0644); err != nil { + return errors.Wrap(err, "failed to write prometheus alert") + } + + return nil +} + func (m *SupportBundleManager) phaseCollectNodeBundles() error { err := m.collectNodeBundles() if err != nil { diff --git a/pkg/types/types.go b/pkg/types/types.go index c14e2fc5..957c9c8f 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -29,11 +29,12 @@ const ( type ManagerPhase string const ( - ManagerPhaseInit = ManagerPhase("init") - ManagerPhaseClusterBundle = ManagerPhase("cluster bundle") - ManagerPhaseNodeBundle = ManagerPhase("node bundle") - ManagerPhasePackaging = ManagerPhase("package") - ManagerPhaseDone = ManagerPhase("done") + ManagerPhaseInit = ManagerPhase("init") + ManagerPhaseClusterBundle = ManagerPhase("cluster bundle") + ManagerPhasePrometheusBundle = ManagerPhase("prometheus bundle") + ManagerPhaseNodeBundle = ManagerPhase("node bundle") + ManagerPhasePackaging = ManagerPhase("package") + ManagerPhaseDone = ManagerPhase("done") ) type ManagerStatus struct { diff --git a/pkg/utils/prometheus.go b/pkg/utils/prometheus.go new file mode 100644 index 00000000..6765e615 --- /dev/null +++ b/pkg/utils/prometheus.go @@ -0,0 +1,37 @@ +package utils + +import ( + "context" + "fmt" + + "github.com/prometheus/client_golang/api" + "github.com/prometheus/client_golang/api/prometheus/v1" +) + +const PrometheusPort = 9090 + +type Prometheus struct { + api v1.API +} + +func NewPrometheus(host string) (*Prometheus, error) { + client, err := api.NewClient(api.Config{ + Address: fmt.Sprintf("http://%s:%d", host, PrometheusPort), + }) + + if err != nil { + return nil, err + } + + return &Prometheus{api: v1.NewAPI(client)}, nil +} + +func (p *Prometheus) GetAlerts(ctx context.Context) ([]v1.Alert, error) { + result, err := p.api.Alerts(ctx) + + if err != nil { + return []v1.Alert{}, err + } + + return result.Alerts, nil +} diff --git a/vendor/github.com/prometheus/client_golang/api/client.go b/vendor/github.com/prometheus/client_golang/api/client.go new file mode 100644 index 00000000..72a01309 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/api/client.go @@ -0,0 +1,156 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package api provides clients for the HTTP APIs. +package api + +import ( + "bytes" + "context" + "errors" + "net" + "net/http" + "net/url" + "path" + "strings" + "time" +) + +// DefaultRoundTripper is used if no RoundTripper is set in Config. +var DefaultRoundTripper http.RoundTripper = &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + TLSHandshakeTimeout: 10 * time.Second, +} + +// Config defines configuration parameters for a new client. +type Config struct { + // The address of the Prometheus to connect to. + Address string + + // Client is used by the Client to drive HTTP requests. If not provided, + // a new one based on the provided RoundTripper (or DefaultRoundTripper) will be used. + Client *http.Client + + // RoundTripper is used by the Client to drive HTTP requests. If not + // provided, DefaultRoundTripper will be used. + RoundTripper http.RoundTripper +} + +func (cfg *Config) roundTripper() http.RoundTripper { + if cfg.RoundTripper == nil { + return DefaultRoundTripper + } + return cfg.RoundTripper +} + +func (cfg *Config) client() http.Client { + if cfg.Client == nil { + return http.Client{ + Transport: cfg.roundTripper(), + } + } + return *cfg.Client +} + +func (cfg *Config) validate() error { + if cfg.Client != nil && cfg.RoundTripper != nil { + return errors.New("api.Config.RoundTripper and api.Config.Client are mutually exclusive") + } + return nil +} + +// Client is the interface for an API client. +type Client interface { + URL(ep string, args map[string]string) *url.URL + Do(context.Context, *http.Request) (*http.Response, []byte, error) +} + +// NewClient returns a new Client. +// +// It is safe to use the returned Client from multiple goroutines. +func NewClient(cfg Config) (Client, error) { + u, err := url.Parse(cfg.Address) + if err != nil { + return nil, err + } + u.Path = strings.TrimRight(u.Path, "/") + + if err := cfg.validate(); err != nil { + return nil, err + } + + return &httpClient{ + endpoint: u, + client: cfg.client(), + }, nil +} + +type httpClient struct { + endpoint *url.URL + client http.Client +} + +func (c *httpClient) URL(ep string, args map[string]string) *url.URL { + p := path.Join(c.endpoint.Path, ep) + + for arg, val := range args { + arg = ":" + arg + p = strings.ReplaceAll(p, arg, val) + } + + u := *c.endpoint + u.Path = p + + return &u +} + +func (c *httpClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, error) { + if ctx != nil { + req = req.WithContext(ctx) + } + resp, err := c.client.Do(req) + defer func() { + if resp != nil { + resp.Body.Close() + } + }() + + if err != nil { + return nil, nil, err + } + + var body []byte + done := make(chan struct{}) + go func() { + var buf bytes.Buffer + _, err = buf.ReadFrom(resp.Body) + body = buf.Bytes() + close(done) + }() + + select { + case <-ctx.Done(): + <-done + err = resp.Body.Close() + if err == nil { + err = ctx.Err() + } + case <-done: + } + + return resp, body, err +} diff --git a/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go b/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go new file mode 100644 index 00000000..f74139c7 --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go @@ -0,0 +1,1198 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package v1 provides bindings to the Prometheus HTTP API v1: +// http://prometheus.io/docs/querying/api/ +package v1 + +import ( + "context" + "errors" + "fmt" + "math" + "net/http" + "net/url" + "strconv" + "strings" + "time" + "unsafe" + + json "github.com/json-iterator/go" + + "github.com/prometheus/common/model" + + "github.com/prometheus/client_golang/api" +) + +func init() { + json.RegisterTypeEncoderFunc("model.SamplePair", marshalPointJSON, marshalPointJSONIsEmpty) + json.RegisterTypeDecoderFunc("model.SamplePair", unMarshalPointJSON) +} + +func unMarshalPointJSON(ptr unsafe.Pointer, iter *json.Iterator) { + p := (*model.SamplePair)(ptr) + if !iter.ReadArray() { + iter.ReportError("unmarshal model.SamplePair", "SamplePair must be [timestamp, value]") + return + } + t := iter.ReadNumber() + if err := p.Timestamp.UnmarshalJSON([]byte(t)); err != nil { + iter.ReportError("unmarshal model.SamplePair", err.Error()) + return + } + if !iter.ReadArray() { + iter.ReportError("unmarshal model.SamplePair", "SamplePair missing value") + return + } + + f, err := strconv.ParseFloat(iter.ReadString(), 64) + if err != nil { + iter.ReportError("unmarshal model.SamplePair", err.Error()) + return + } + p.Value = model.SampleValue(f) + + if iter.ReadArray() { + iter.ReportError("unmarshal model.SamplePair", "SamplePair has too many values, must be [timestamp, value]") + return + } +} + +func marshalPointJSON(ptr unsafe.Pointer, stream *json.Stream) { + p := *((*model.SamplePair)(ptr)) + stream.WriteArrayStart() + // Write out the timestamp as a float divided by 1000. + // This is ~3x faster than converting to a float. + t := int64(p.Timestamp) + if t < 0 { + stream.WriteRaw(`-`) + t = -t + } + stream.WriteInt64(t / 1000) + fraction := t % 1000 + if fraction != 0 { + stream.WriteRaw(`.`) + if fraction < 100 { + stream.WriteRaw(`0`) + } + if fraction < 10 { + stream.WriteRaw(`0`) + } + stream.WriteInt64(fraction) + } + stream.WriteMore() + stream.WriteRaw(`"`) + + // Taken from https://github.com/json-iterator/go/blob/master/stream_float.go#L71 as a workaround + // to https://github.com/json-iterator/go/issues/365 (jsoniter, to follow json standard, doesn't allow inf/nan) + buf := stream.Buffer() + abs := math.Abs(float64(p.Value)) + fmt := byte('f') + // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. + if abs != 0 { + if abs < 1e-6 || abs >= 1e21 { + fmt = 'e' + } + } + buf = strconv.AppendFloat(buf, float64(p.Value), fmt, -1, 64) + stream.SetBuffer(buf) + + stream.WriteRaw(`"`) + stream.WriteArrayEnd() +} + +func marshalPointJSONIsEmpty(ptr unsafe.Pointer) bool { + return false +} + +const ( + apiPrefix = "/api/v1" + + epAlerts = apiPrefix + "/alerts" + epAlertManagers = apiPrefix + "/alertmanagers" + epQuery = apiPrefix + "/query" + epQueryRange = apiPrefix + "/query_range" + epQueryExemplars = apiPrefix + "/query_exemplars" + epLabels = apiPrefix + "/labels" + epLabelValues = apiPrefix + "/label/:name/values" + epSeries = apiPrefix + "/series" + epTargets = apiPrefix + "/targets" + epTargetsMetadata = apiPrefix + "/targets/metadata" + epMetadata = apiPrefix + "/metadata" + epRules = apiPrefix + "/rules" + epSnapshot = apiPrefix + "/admin/tsdb/snapshot" + epDeleteSeries = apiPrefix + "/admin/tsdb/delete_series" + epCleanTombstones = apiPrefix + "/admin/tsdb/clean_tombstones" + epConfig = apiPrefix + "/status/config" + epFlags = apiPrefix + "/status/flags" + epBuildinfo = apiPrefix + "/status/buildinfo" + epRuntimeinfo = apiPrefix + "/status/runtimeinfo" + epTSDB = apiPrefix + "/status/tsdb" + epWalReplay = apiPrefix + "/status/walreplay" +) + +// AlertState models the state of an alert. +type AlertState string + +// ErrorType models the different API error types. +type ErrorType string + +// HealthStatus models the health status of a scrape target. +type HealthStatus string + +// RuleType models the type of a rule. +type RuleType string + +// RuleHealth models the health status of a rule. +type RuleHealth string + +// MetricType models the type of a metric. +type MetricType string + +const ( + // Possible values for AlertState. + AlertStateFiring AlertState = "firing" + AlertStateInactive AlertState = "inactive" + AlertStatePending AlertState = "pending" + + // Possible values for ErrorType. + ErrBadData ErrorType = "bad_data" + ErrTimeout ErrorType = "timeout" + ErrCanceled ErrorType = "canceled" + ErrExec ErrorType = "execution" + ErrBadResponse ErrorType = "bad_response" + ErrServer ErrorType = "server_error" + ErrClient ErrorType = "client_error" + + // Possible values for HealthStatus. + HealthGood HealthStatus = "up" + HealthUnknown HealthStatus = "unknown" + HealthBad HealthStatus = "down" + + // Possible values for RuleType. + RuleTypeRecording RuleType = "recording" + RuleTypeAlerting RuleType = "alerting" + + // Possible values for RuleHealth. + RuleHealthGood = "ok" + RuleHealthUnknown = "unknown" + RuleHealthBad = "err" + + // Possible values for MetricType + MetricTypeCounter MetricType = "counter" + MetricTypeGauge MetricType = "gauge" + MetricTypeHistogram MetricType = "histogram" + MetricTypeGaugeHistogram MetricType = "gaugehistogram" + MetricTypeSummary MetricType = "summary" + MetricTypeInfo MetricType = "info" + MetricTypeStateset MetricType = "stateset" + MetricTypeUnknown MetricType = "unknown" +) + +// Error is an error returned by the API. +type Error struct { + Type ErrorType + Msg string + Detail string +} + +func (e *Error) Error() string { + return fmt.Sprintf("%s: %s", e.Type, e.Msg) +} + +// Range represents a sliced time range. +type Range struct { + // The boundaries of the time range. + Start, End time.Time + // The maximum time between two slices within the boundaries. + Step time.Duration +} + +// API provides bindings for Prometheus's v1 API. +type API interface { + // Alerts returns a list of all active alerts. + Alerts(ctx context.Context) (AlertsResult, error) + // AlertManagers returns an overview of the current state of the Prometheus alert manager discovery. + AlertManagers(ctx context.Context) (AlertManagersResult, error) + // CleanTombstones removes the deleted data from disk and cleans up the existing tombstones. + CleanTombstones(ctx context.Context) error + // Config returns the current Prometheus configuration. + Config(ctx context.Context) (ConfigResult, error) + // DeleteSeries deletes data for a selection of series in a time range. + DeleteSeries(ctx context.Context, matches []string, startTime, endTime time.Time) error + // Flags returns the flag values that Prometheus was launched with. + Flags(ctx context.Context) (FlagsResult, error) + // LabelNames returns the unique label names present in the block in sorted order by given time range and matchers. + LabelNames(ctx context.Context, matches []string, startTime, endTime time.Time) ([]string, Warnings, error) + // LabelValues performs a query for the values of the given label, time range and matchers. + LabelValues(ctx context.Context, label string, matches []string, startTime, endTime time.Time) (model.LabelValues, Warnings, error) + // Query performs a query for the given time. + Query(ctx context.Context, query string, ts time.Time, opts ...Option) (model.Value, Warnings, error) + // QueryRange performs a query for the given range. + QueryRange(ctx context.Context, query string, r Range, opts ...Option) (model.Value, Warnings, error) + // QueryExemplars performs a query for exemplars by the given query and time range. + QueryExemplars(ctx context.Context, query string, startTime, endTime time.Time) ([]ExemplarQueryResult, error) + // Buildinfo returns various build information properties about the Prometheus server + Buildinfo(ctx context.Context) (BuildinfoResult, error) + // Runtimeinfo returns the various runtime information properties about the Prometheus server. + Runtimeinfo(ctx context.Context) (RuntimeinfoResult, error) + // Series finds series by label matchers. + Series(ctx context.Context, matches []string, startTime, endTime time.Time) ([]model.LabelSet, Warnings, error) + // Snapshot creates a snapshot of all current data into snapshots/- + // under the TSDB's data directory and returns the directory as response. + Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error) + // Rules returns a list of alerting and recording rules that are currently loaded. + Rules(ctx context.Context) (RulesResult, error) + // Targets returns an overview of the current state of the Prometheus target discovery. + Targets(ctx context.Context) (TargetsResult, error) + // TargetsMetadata returns metadata about metrics currently scraped by the target. + TargetsMetadata(ctx context.Context, matchTarget, metric, limit string) ([]MetricMetadata, error) + // Metadata returns metadata about metrics currently scraped by the metric name. + Metadata(ctx context.Context, metric, limit string) (map[string][]Metadata, error) + // TSDB returns the cardinality statistics. + TSDB(ctx context.Context) (TSDBResult, error) + // WalReplay returns the current replay status of the wal. + WalReplay(ctx context.Context) (WalReplayStatus, error) +} + +// AlertsResult contains the result from querying the alerts endpoint. +type AlertsResult struct { + Alerts []Alert `json:"alerts"` +} + +// AlertManagersResult contains the result from querying the alertmanagers endpoint. +type AlertManagersResult struct { + Active []AlertManager `json:"activeAlertManagers"` + Dropped []AlertManager `json:"droppedAlertManagers"` +} + +// AlertManager models a configured Alert Manager. +type AlertManager struct { + URL string `json:"url"` +} + +// ConfigResult contains the result from querying the config endpoint. +type ConfigResult struct { + YAML string `json:"yaml"` +} + +// FlagsResult contains the result from querying the flag endpoint. +type FlagsResult map[string]string + +// BuildinfoResult contains the results from querying the buildinfo endpoint. +type BuildinfoResult struct { + Version string `json:"version"` + Revision string `json:"revision"` + Branch string `json:"branch"` + BuildUser string `json:"buildUser"` + BuildDate string `json:"buildDate"` + GoVersion string `json:"goVersion"` +} + +// RuntimeinfoResult contains the result from querying the runtimeinfo endpoint. +type RuntimeinfoResult struct { + StartTime time.Time `json:"startTime"` + CWD string `json:"CWD"` + ReloadConfigSuccess bool `json:"reloadConfigSuccess"` + LastConfigTime time.Time `json:"lastConfigTime"` + CorruptionCount int `json:"corruptionCount"` + GoroutineCount int `json:"goroutineCount"` + GOMAXPROCS int `json:"GOMAXPROCS"` + GOGC string `json:"GOGC"` + GODEBUG string `json:"GODEBUG"` + StorageRetention string `json:"storageRetention"` +} + +// SnapshotResult contains the result from querying the snapshot endpoint. +type SnapshotResult struct { + Name string `json:"name"` +} + +// RulesResult contains the result from querying the rules endpoint. +type RulesResult struct { + Groups []RuleGroup `json:"groups"` +} + +// RuleGroup models a rule group that contains a set of recording and alerting rules. +type RuleGroup struct { + Name string `json:"name"` + File string `json:"file"` + Interval float64 `json:"interval"` + Rules Rules `json:"rules"` +} + +// Recording and alerting rules are stored in the same slice to preserve the order +// that rules are returned in by the API. +// +// Rule types can be determined using a type switch: +// +// switch v := rule.(type) { +// case RecordingRule: +// fmt.Print("got a recording rule") +// case AlertingRule: +// fmt.Print("got a alerting rule") +// default: +// fmt.Printf("unknown rule type %s", v) +// } +type Rules []interface{} + +// AlertingRule models a alerting rule. +type AlertingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels model.LabelSet `json:"labels"` + Annotations model.LabelSet `json:"annotations"` + Alerts []*Alert `json:"alerts"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` + State string `json:"state"` +} + +// RecordingRule models a recording rule. +type RecordingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Labels model.LabelSet `json:"labels,omitempty"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` +} + +// Alert models an active alert. +type Alert struct { + ActiveAt time.Time `json:"activeAt"` + Annotations model.LabelSet + Labels model.LabelSet + State AlertState + Value string +} + +// TargetsResult contains the result from querying the targets endpoint. +type TargetsResult struct { + Active []ActiveTarget `json:"activeTargets"` + Dropped []DroppedTarget `json:"droppedTargets"` +} + +// ActiveTarget models an active Prometheus scrape target. +type ActiveTarget struct { + DiscoveredLabels map[string]string `json:"discoveredLabels"` + Labels model.LabelSet `json:"labels"` + ScrapePool string `json:"scrapePool"` + ScrapeURL string `json:"scrapeUrl"` + GlobalURL string `json:"globalUrl"` + LastError string `json:"lastError"` + LastScrape time.Time `json:"lastScrape"` + LastScrapeDuration float64 `json:"lastScrapeDuration"` + Health HealthStatus `json:"health"` +} + +// DroppedTarget models a dropped Prometheus scrape target. +type DroppedTarget struct { + DiscoveredLabels map[string]string `json:"discoveredLabels"` +} + +// MetricMetadata models the metadata of a metric with its scrape target and name. +type MetricMetadata struct { + Target map[string]string `json:"target"` + Metric string `json:"metric,omitempty"` + Type MetricType `json:"type"` + Help string `json:"help"` + Unit string `json:"unit"` +} + +// Metadata models the metadata of a metric. +type Metadata struct { + Type MetricType `json:"type"` + Help string `json:"help"` + Unit string `json:"unit"` +} + +// queryResult contains result data for a query. +type queryResult struct { + Type model.ValueType `json:"resultType"` + Result interface{} `json:"result"` + + // The decoded value. + v model.Value +} + +// TSDBResult contains the result from querying the tsdb endpoint. +type TSDBResult struct { + HeadStats TSDBHeadStats `json:"headStats"` + SeriesCountByMetricName []Stat `json:"seriesCountByMetricName"` + LabelValueCountByLabelName []Stat `json:"labelValueCountByLabelName"` + MemoryInBytesByLabelName []Stat `json:"memoryInBytesByLabelName"` + SeriesCountByLabelValuePair []Stat `json:"seriesCountByLabelValuePair"` +} + +// TSDBHeadStats contains TSDB stats +type TSDBHeadStats struct { + NumSeries int `json:"numSeries"` + NumLabelPairs int `json:"numLabelPairs"` + ChunkCount int `json:"chunkCount"` + MinTime int `json:"minTime"` + MaxTime int `json:"maxTime"` +} + +// WalReplayStatus represents the wal replay status. +type WalReplayStatus struct { + Min int `json:"min"` + Max int `json:"max"` + Current int `json:"current"` +} + +// Stat models information about statistic value. +type Stat struct { + Name string `json:"name"` + Value uint64 `json:"value"` +} + +func (rg *RuleGroup) UnmarshalJSON(b []byte) error { + v := struct { + Name string `json:"name"` + File string `json:"file"` + Interval float64 `json:"interval"` + Rules []json.RawMessage `json:"rules"` + }{} + + if err := json.Unmarshal(b, &v); err != nil { + return err + } + + rg.Name = v.Name + rg.File = v.File + rg.Interval = v.Interval + + for _, rule := range v.Rules { + alertingRule := AlertingRule{} + if err := json.Unmarshal(rule, &alertingRule); err == nil { + rg.Rules = append(rg.Rules, alertingRule) + continue + } + recordingRule := RecordingRule{} + if err := json.Unmarshal(rule, &recordingRule); err == nil { + rg.Rules = append(rg.Rules, recordingRule) + continue + } + return errors.New("failed to decode JSON into an alerting or recording rule") + } + + return nil +} + +func (r *AlertingRule) UnmarshalJSON(b []byte) error { + v := struct { + Type string `json:"type"` + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + if v.Type == "" { + return errors.New("type field not present in rule") + } + if v.Type != string(RuleTypeAlerting) { + return fmt.Errorf("expected rule of type %s but got %s", string(RuleTypeAlerting), v.Type) + } + + rule := struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels model.LabelSet `json:"labels"` + Annotations model.LabelSet `json:"annotations"` + Alerts []*Alert `json:"alerts"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` + State string `json:"state"` + }{} + if err := json.Unmarshal(b, &rule); err != nil { + return err + } + r.Health = rule.Health + r.Annotations = rule.Annotations + r.Name = rule.Name + r.Query = rule.Query + r.Alerts = rule.Alerts + r.Duration = rule.Duration + r.Labels = rule.Labels + r.LastError = rule.LastError + r.EvaluationTime = rule.EvaluationTime + r.LastEvaluation = rule.LastEvaluation + r.State = rule.State + + return nil +} + +func (r *RecordingRule) UnmarshalJSON(b []byte) error { + v := struct { + Type string `json:"type"` + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + if v.Type == "" { + return errors.New("type field not present in rule") + } + if v.Type != string(RuleTypeRecording) { + return fmt.Errorf("expected rule of type %s but got %s", string(RuleTypeRecording), v.Type) + } + + rule := struct { + Name string `json:"name"` + Query string `json:"query"` + Labels model.LabelSet `json:"labels,omitempty"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation time.Time `json:"lastEvaluation"` + }{} + if err := json.Unmarshal(b, &rule); err != nil { + return err + } + r.Health = rule.Health + r.Labels = rule.Labels + r.Name = rule.Name + r.LastError = rule.LastError + r.Query = rule.Query + r.EvaluationTime = rule.EvaluationTime + r.LastEvaluation = rule.LastEvaluation + + return nil +} + +func (qr *queryResult) UnmarshalJSON(b []byte) error { + v := struct { + Type model.ValueType `json:"resultType"` + Result json.RawMessage `json:"result"` + }{} + + err := json.Unmarshal(b, &v) + if err != nil { + return err + } + + switch v.Type { + case model.ValScalar: + var sv model.Scalar + err = json.Unmarshal(v.Result, &sv) + qr.v = &sv + + case model.ValVector: + var vv model.Vector + err = json.Unmarshal(v.Result, &vv) + qr.v = vv + + case model.ValMatrix: + var mv model.Matrix + err = json.Unmarshal(v.Result, &mv) + qr.v = mv + + default: + err = fmt.Errorf("unexpected value type %q", v.Type) + } + return err +} + +// Exemplar is additional information associated with a time series. +type Exemplar struct { + Labels model.LabelSet `json:"labels"` + Value model.SampleValue `json:"value"` + Timestamp model.Time `json:"timestamp"` +} + +type ExemplarQueryResult struct { + SeriesLabels model.LabelSet `json:"seriesLabels"` + Exemplars []Exemplar `json:"exemplars"` +} + +// NewAPI returns a new API for the client. +// +// It is safe to use the returned API from multiple goroutines. +func NewAPI(c api.Client) API { + return &httpAPI{ + client: &apiClientImpl{ + client: c, + }, + } +} + +type httpAPI struct { + client apiClient +} + +func (h *httpAPI) Alerts(ctx context.Context) (AlertsResult, error) { + u := h.client.URL(epAlerts, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return AlertsResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return AlertsResult{}, err + } + + var res AlertsResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) AlertManagers(ctx context.Context) (AlertManagersResult, error) { + u := h.client.URL(epAlertManagers, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return AlertManagersResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return AlertManagersResult{}, err + } + + var res AlertManagersResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) CleanTombstones(ctx context.Context) error { + u := h.client.URL(epCleanTombstones, nil) + + req, err := http.NewRequest(http.MethodPost, u.String(), nil) + if err != nil { + return err + } + + _, _, _, err = h.client.Do(ctx, req) + return err +} + +func (h *httpAPI) Config(ctx context.Context) (ConfigResult, error) { + u := h.client.URL(epConfig, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return ConfigResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return ConfigResult{}, err + } + + var res ConfigResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) DeleteSeries(ctx context.Context, matches []string, startTime, endTime time.Time) error { + u := h.client.URL(epDeleteSeries, nil) + q := u.Query() + + for _, m := range matches { + q.Add("match[]", m) + } + + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodPost, u.String(), nil) + if err != nil { + return err + } + + _, _, _, err = h.client.Do(ctx, req) + return err +} + +func (h *httpAPI) Flags(ctx context.Context) (FlagsResult, error) { + u := h.client.URL(epFlags, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return FlagsResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return FlagsResult{}, err + } + + var res FlagsResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Buildinfo(ctx context.Context) (BuildinfoResult, error) { + u := h.client.URL(epBuildinfo, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return BuildinfoResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return BuildinfoResult{}, err + } + + var res BuildinfoResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Runtimeinfo(ctx context.Context) (RuntimeinfoResult, error) { + u := h.client.URL(epRuntimeinfo, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return RuntimeinfoResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return RuntimeinfoResult{}, err + } + + var res RuntimeinfoResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) LabelNames(ctx context.Context, matches []string, startTime, endTime time.Time) ([]string, Warnings, error) { + u := h.client.URL(epLabels, nil) + q := u.Query() + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + for _, m := range matches { + q.Add("match[]", m) + } + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, err + } + _, body, w, err := h.client.Do(ctx, req) + if err != nil { + return nil, w, err + } + var labelNames []string + return labelNames, w, json.Unmarshal(body, &labelNames) +} + +func (h *httpAPI) LabelValues(ctx context.Context, label string, matches []string, startTime, endTime time.Time) (model.LabelValues, Warnings, error) { + u := h.client.URL(epLabelValues, map[string]string{"name": label}) + q := u.Query() + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + for _, m := range matches { + q.Add("match[]", m) + } + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, err + } + _, body, w, err := h.client.Do(ctx, req) + if err != nil { + return nil, w, err + } + var labelValues model.LabelValues + return labelValues, w, json.Unmarshal(body, &labelValues) +} + +type apiOptions struct { + timeout time.Duration +} + +type Option func(c *apiOptions) + +// WithTimeout can be used to provide an optional query evaluation timeout for Query and QueryRange. +// https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries +func WithTimeout(timeout time.Duration) Option { + return func(o *apiOptions) { + o.timeout = timeout + } +} + +func (h *httpAPI) Query(ctx context.Context, query string, ts time.Time, opts ...Option) (model.Value, Warnings, error) { + u := h.client.URL(epQuery, nil) + q := u.Query() + + opt := &apiOptions{} + for _, o := range opts { + o(opt) + } + + d := opt.timeout + if d > 0 { + q.Set("timeout", d.String()) + } + + q.Set("query", query) + if !ts.IsZero() { + q.Set("time", formatTime(ts)) + } + + _, body, warnings, err := h.client.DoGetFallback(ctx, u, q) + if err != nil { + return nil, warnings, err + } + + var qres queryResult + return qres.v, warnings, json.Unmarshal(body, &qres) +} + +func (h *httpAPI) QueryRange(ctx context.Context, query string, r Range, opts ...Option) (model.Value, Warnings, error) { + u := h.client.URL(epQueryRange, nil) + q := u.Query() + + q.Set("query", query) + q.Set("start", formatTime(r.Start)) + q.Set("end", formatTime(r.End)) + q.Set("step", strconv.FormatFloat(r.Step.Seconds(), 'f', -1, 64)) + + opt := &apiOptions{} + for _, o := range opts { + o(opt) + } + + d := opt.timeout + if d > 0 { + q.Set("timeout", d.String()) + } + + _, body, warnings, err := h.client.DoGetFallback(ctx, u, q) + if err != nil { + return nil, warnings, err + } + + var qres queryResult + + return qres.v, warnings, json.Unmarshal(body, &qres) +} + +func (h *httpAPI) Series(ctx context.Context, matches []string, startTime, endTime time.Time) ([]model.LabelSet, Warnings, error) { + u := h.client.URL(epSeries, nil) + q := u.Query() + + for _, m := range matches { + q.Add("match[]", m) + } + + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, err + } + + _, body, warnings, err := h.client.Do(ctx, req) + if err != nil { + return nil, warnings, err + } + + var mset []model.LabelSet + return mset, warnings, json.Unmarshal(body, &mset) +} + +func (h *httpAPI) Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error) { + u := h.client.URL(epSnapshot, nil) + q := u.Query() + + q.Set("skip_head", strconv.FormatBool(skipHead)) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodPost, u.String(), nil) + if err != nil { + return SnapshotResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return SnapshotResult{}, err + } + + var res SnapshotResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Rules(ctx context.Context) (RulesResult, error) { + u := h.client.URL(epRules, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return RulesResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return RulesResult{}, err + } + + var res RulesResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Targets(ctx context.Context) (TargetsResult, error) { + u := h.client.URL(epTargets, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return TargetsResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return TargetsResult{}, err + } + + var res TargetsResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) TargetsMetadata(ctx context.Context, matchTarget, metric, limit string) ([]MetricMetadata, error) { + u := h.client.URL(epTargetsMetadata, nil) + q := u.Query() + + q.Set("match_target", matchTarget) + q.Set("metric", metric) + q.Set("limit", limit) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return nil, err + } + + var res []MetricMetadata + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) Metadata(ctx context.Context, metric, limit string) (map[string][]Metadata, error) { + u := h.client.URL(epMetadata, nil) + q := u.Query() + + q.Set("metric", metric) + q.Set("limit", limit) + + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return nil, err + } + + var res map[string][]Metadata + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) TSDB(ctx context.Context) (TSDBResult, error) { + u := h.client.URL(epTSDB, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return TSDBResult{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return TSDBResult{}, err + } + + var res TSDBResult + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) WalReplay(ctx context.Context) (WalReplayStatus, error) { + u := h.client.URL(epWalReplay, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return WalReplayStatus{}, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return WalReplayStatus{}, err + } + + var res WalReplayStatus + return res, json.Unmarshal(body, &res) +} + +func (h *httpAPI) QueryExemplars(ctx context.Context, query string, startTime, endTime time.Time) ([]ExemplarQueryResult, error) { + u := h.client.URL(epQueryExemplars, nil) + q := u.Query() + + q.Set("query", query) + q.Set("start", formatTime(startTime)) + q.Set("end", formatTime(endTime)) + u.RawQuery = q.Encode() + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, err + } + + _, body, _, err := h.client.Do(ctx, req) + if err != nil { + return nil, err + } + + var res []ExemplarQueryResult + return res, json.Unmarshal(body, &res) +} + +// Warnings is an array of non critical errors +type Warnings []string + +// apiClient wraps a regular client and processes successful API responses. +// Successful also includes responses that errored at the API level. +type apiClient interface { + URL(ep string, args map[string]string) *url.URL + Do(context.Context, *http.Request) (*http.Response, []byte, Warnings, error) + DoGetFallback(ctx context.Context, u *url.URL, args url.Values) (*http.Response, []byte, Warnings, error) +} + +type apiClientImpl struct { + client api.Client +} + +type apiResponse struct { + Status string `json:"status"` + Data json.RawMessage `json:"data"` + ErrorType ErrorType `json:"errorType"` + Error string `json:"error"` + Warnings []string `json:"warnings,omitempty"` +} + +func apiError(code int) bool { + // These are the codes that Prometheus sends when it returns an error. + return code == http.StatusUnprocessableEntity || code == http.StatusBadRequest +} + +func errorTypeAndMsgFor(resp *http.Response) (ErrorType, string) { + switch resp.StatusCode / 100 { + case 4: + return ErrClient, fmt.Sprintf("client error: %d", resp.StatusCode) + case 5: + return ErrServer, fmt.Sprintf("server error: %d", resp.StatusCode) + } + return ErrBadResponse, fmt.Sprintf("bad response code %d", resp.StatusCode) +} + +func (h *apiClientImpl) URL(ep string, args map[string]string) *url.URL { + return h.client.URL(ep, args) +} + +func (h *apiClientImpl) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, Warnings, error) { + resp, body, err := h.client.Do(ctx, req) + if err != nil { + return resp, body, nil, err + } + + code := resp.StatusCode + + if code/100 != 2 && !apiError(code) { + errorType, errorMsg := errorTypeAndMsgFor(resp) + return resp, body, nil, &Error{ + Type: errorType, + Msg: errorMsg, + Detail: string(body), + } + } + + var result apiResponse + + if http.StatusNoContent != code { + if jsonErr := json.Unmarshal(body, &result); jsonErr != nil { + return resp, body, nil, &Error{ + Type: ErrBadResponse, + Msg: jsonErr.Error(), + } + } + } + + if apiError(code) && result.Status == "success" { + err = &Error{ + Type: ErrBadResponse, + Msg: "inconsistent body for response code", + } + } + + if result.Status == "error" { + err = &Error{ + Type: result.ErrorType, + Msg: result.Error, + } + } + + return resp, []byte(result.Data), result.Warnings, err +} + +// DoGetFallback will attempt to do the request as-is, and on a 405 or 501 it +// will fallback to a GET request. +func (h *apiClientImpl) DoGetFallback(ctx context.Context, u *url.URL, args url.Values) (*http.Response, []byte, Warnings, error) { + encodedArgs := args.Encode() + req, err := http.NewRequest(http.MethodPost, u.String(), strings.NewReader(encodedArgs)) + if err != nil { + return nil, nil, nil, err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + // Following comment originates from https://pkg.go.dev/net/http#Transport + // Transport only retries a request upon encountering a network error if the request is + // idempotent and either has no body or has its Request.GetBody defined. HTTP requests + // are considered idempotent if they have HTTP methods GET, HEAD, OPTIONS, or TRACE; or + // if their Header map contains an "Idempotency-Key" or "X-Idempotency-Key" entry. If the + // idempotency key value is a zero-length slice, the request is treated as idempotent but + // the header is not sent on the wire. + req.Header["Idempotency-Key"] = nil + + resp, body, warnings, err := h.Do(ctx, req) + if resp != nil && (resp.StatusCode == http.StatusMethodNotAllowed || resp.StatusCode == http.StatusNotImplemented) { + u.RawQuery = encodedArgs + req, err = http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return nil, nil, warnings, err + } + return h.Do(ctx, req) + } + return resp, body, warnings, err +} + +func formatTime(t time.Time) string { + return strconv.FormatFloat(float64(t.Unix())+float64(t.Nanosecond())/1e9, 'f', -1, 64) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index e38d78f2..4c6692cb 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -362,6 +362,8 @@ github.com/pquerna/cachecontrol github.com/pquerna/cachecontrol/cacheobject # github.com/prometheus/client_golang v1.14.0 ## explicit; go 1.17 +github.com/prometheus/client_golang/api +github.com/prometheus/client_golang/api/prometheus/v1 github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/collectors github.com/prometheus/client_golang/prometheus/internal From 7389081a94b66a084a3c16775b7096af22a63bd6 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Fri, 12 Jul 2024 15:53:37 +0800 Subject: [PATCH 2/2] feat: make phaseCollectPrometheusBundle optional Signed-off-by: Jack Yu --- pkg/manager/manager.go | 79 ++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 19 deletions(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 18e53bfc..faa1026d 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -66,6 +66,11 @@ type SupportBundleManager struct { expectedNodes map[string]string } +type RunPhase struct { + Name types.ManagerPhase + Run func() error +} + func (m *SupportBundleManager) check() error { if len(m.Namespaces) == 0 || len(m.Namespaces[0]) == 0 { return errors.New("namespace is not specified") @@ -108,10 +113,7 @@ func (m *SupportBundleManager) getBundlefilesize() (int64, error) { } func (m *SupportBundleManager) Run() error { - phases := []struct { - Name types.ManagerPhase - Run func() error - }{ + requiredPhases := []RunPhase{ { types.ManagerPhaseInit, m.phaseInit, @@ -120,14 +122,23 @@ func (m *SupportBundleManager) Run() error { types.ManagerPhaseClusterBundle, m.phaseCollectClusterBundle, }, - { - types.ManagerPhasePrometheusBundle, - m.phaseCollectPrometheusBundle, - }, + { types.ManagerPhaseNodeBundle, m.phaseCollectNodeBundles, }, + } + + // optionalPhases should have independent phases + // if logic is dependent, put it into one function + optionalPhases := []RunPhase{ + { + types.ManagerPhasePrometheusBundle, + m.phaseCollectPrometheusBundle, + }, + } + + postPhases := []RunPhase{ { types.ManagerPhasePackaging, m.phasePackaging, @@ -138,21 +149,51 @@ func (m *SupportBundleManager) Run() error { }, } - for i, phase := range phases { - logrus.Infof("Running phase %s", phase.Name) - m.status.SetPhase(phase.Name) - if err := phase.Run(); err != nil { - m.status.SetError(err.Error()) - logrus.Errorf("Failed to run phase %s: %s", phase.Name, err.Error()) - break + m.runAllPhases(requiredPhases, optionalPhases, postPhases) + + <-m.context.Done() + return nil +} + +func (m *SupportBundleManager) runAllPhases(requiredPhases []RunPhase, optionalPhases []RunPhase, postPhases []RunPhase) { + progressCount := 0 + maxProgressCount := len(requiredPhases) + len(optionalPhases) + len(postPhases) + + for _, phase := range requiredPhases { + if err := m.runPhase(phase, &progressCount, maxProgressCount); err != nil { + logrus.Errorf("Failed to run requiredPhases %s: %s", phase.Name, err.Error()) + return } + } - progress := 100 * (i + 1) / len(phases) - m.status.SetProgress(progress) - logrus.Infof("Succeed to run phase %s. Progress (%d).", phase.Name, progress) + for _, phase := range optionalPhases { + if err := m.runPhase(phase, &progressCount, maxProgressCount); err != nil { + logrus.Errorf("Failed to run optionalPhases %s: %s", phase.Name, err.Error()) + // Since it's optional, don't return error. + continue + } } - <-m.context.Done() + for _, phase := range postPhases { + if err := m.runPhase(phase, &progressCount, maxProgressCount); err != nil { + logrus.Errorf("Failed to run postPhases %s: %s", phase.Name, err.Error()) + return + } + } +} + +func (m *SupportBundleManager) runPhase(phase RunPhase, progressCount *int, maxProgressCount int) error { + logrus.Infof("Running phase %s", phase.Name) + m.status.SetPhase(phase.Name) + if err := phase.Run(); err != nil { + m.status.SetError(err.Error()) + logrus.Errorf("Failed to run phase %s: %s", phase.Name, err.Error()) + return err + } + *progressCount++ + progress := 100 * (*progressCount) / maxProgressCount + m.status.SetProgress(progress) + logrus.Infof("Succeed to run phase %s. Progress (%d).", phase.Name, progress) return nil }