diff --git a/CONFIGURATION.md b/CONFIGURATION.md index 34665d4..fce4543 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -49,7 +49,6 @@ The `prometheus` section configures Prometheus settings. | Key | Description | Required | Default Values | |------------------------------------------|-------------------|----------|----------------| | kube_state_metrics_service_endpoint | The endpoint for kube-state-metrics service | Mandatory | | -| prometheus_node_exporter_service_endpoint| The endpoint for node-exporter service | Mandatory | | | configurations | List of one or more configuration files locations for prometheus to validate | Mandatory | | ## Diagnostics @@ -76,7 +75,6 @@ The following table describes the available checkers: | `k8s_version` | Checks the Kubernetes compatability | | `egress_reachable` | Checks pod can communicate with the Cloudzero API | | `kube_state_metrics_reachable` | Checks the kubernetes state metrics service is reachable | -| `node_exporter_reachable` | Checks the prometheus node exporter service is reachable | | `scrape_cfg` | Checks the prometheus configurations exist and contain the necessary scrape configuration | ## Example diff --git a/pkg/cmd/config/internal/template.yml b/pkg/cmd/config/internal/template.yml index d21c8aa..9cbd3b6 100644 --- a/pkg/cmd/config/internal/template.yml +++ b/pkg/cmd/config/internal/template.yml @@ -17,7 +17,6 @@ cloudzero: prometheus: kube_state_metrics_service_endpoint: {{ .KubeStateMetricsURL }} - prometheus_node_exporter_service_endpoint: {{ .PromNodeExporterURL }} configurations: - /etc/config/prometheus/configmaps/prometheus.yml @@ -33,7 +32,6 @@ diagnostics: checks: - k8s_version - kube_state_metrics_reachable - - node_exporter_reachable - scrape_cfg - name: pre-stop enforce: false diff --git a/pkg/config/diagnostics.go b/pkg/config/diagnostics.go index 48aa3cf..69f354f 100644 --- a/pkg/config/diagnostics.go +++ b/pkg/config/diagnostics.go @@ -10,7 +10,6 @@ const ( DiagnosticK8sVersion string = "k8s_version" DiagnosticEgressAccess string = "egress_reachable" DiagnosticKMS string = "kube_state_metrics_reachable" - DiagnosticNodeExporter string = "node_exporter_reachable" DiagnosticPrometheusVersion string = "prometheus_version" DiagnosticScrapeConfig string = "scrape_cfg" ) @@ -27,7 +26,7 @@ func IsValidDiagnostic(d string) bool { d = strings.ToLower(strings.TrimSpace(d)) switch d { case DiagnosticAPIKey, DiagnosticK8sVersion, DiagnosticEgressAccess, - DiagnosticKMS, DiagnosticNodeExporter, DiagnosticScrapeConfig, + DiagnosticKMS, DiagnosticScrapeConfig, DiagnosticPrometheusVersion: return true } diff --git a/pkg/config/diagnostics_test.go b/pkg/config/diagnostics_test.go index 93881ee..26885c9 100644 --- a/pkg/config/diagnostics_test.go +++ b/pkg/config/diagnostics_test.go @@ -33,11 +33,6 @@ func TestDiagnostics_IsValidDiagnostics(t *testing.T) { diagnostic: config.DiagnosticKMS, expected: true, }, - { - name: "DiagnosticNodeExporter", - diagnostic: config.DiagnosticNodeExporter, - expected: true, - }, { name: "DiagnosticScrapeConfig", diagnostic: config.DiagnosticScrapeConfig, diff --git a/pkg/config/error.go b/pkg/config/error.go index de3899a..c456fa9 100644 --- a/pkg/config/error.go +++ b/pkg/config/error.go @@ -13,5 +13,4 @@ const ( ErrNoChartVersionMsg = "missing Chart Version" ErrNoScrapeConfigLocationMsg = "missing Scrape Config Location" ErrNoKubeStateMetricsServiceEndpointMsg = "missing Kube State Metrics Service Endpoint" - ErrNoPrometheusNodeExporterServiceEndpointMsg = "missing Prometheus Node Exporter Service Endpoint" ) diff --git a/pkg/config/prometheus.go b/pkg/config/prometheus.go index ed9aed3..654949c 100644 --- a/pkg/config/prometheus.go +++ b/pkg/config/prometheus.go @@ -10,7 +10,6 @@ import ( type Prometheus struct { Executable string `yaml:"executable" default:"/bin/prometheus" env:"PROMETHEUS_EXECUTABLE" env-description:"Prometheus Executable Path"` KubeStateMetricsServiceEndpoint string `yaml:"kube_state_metrics_service_endpoint" env:"KMS_EP_URL" required:"true" env-description:"Kube State Metrics Service Endpoint"` - PrometheusNodeExporterServiceEndpoint string `yaml:"prometheus_node_exporter_service_endpoint" env:"NODE_EXPORTER_EP_URL" required:"true" env-description:"Prometheus Node Exporter Service Endpoint"` Configurations []string `yaml:"configurations"` } @@ -22,13 +21,6 @@ func (s *Prometheus) Validate() error { return fmt.Errorf("invalid %s", s.KubeStateMetricsServiceEndpoint) } - if s.PrometheusNodeExporterServiceEndpoint == "" { - return errors.New(ErrNoPrometheusNodeExporterServiceEndpointMsg) - } - if !isValidURL(s.PrometheusNodeExporterServiceEndpoint) { - return fmt.Errorf("URL format invalid: %s", s.PrometheusNodeExporterServiceEndpoint) - } - if len(s.Configurations) == 0 { s.Configurations = []string{ "/etc/prometheus/prometheus.yml", diff --git a/pkg/config/prometheus_test.go b/pkg/config/prometheus_test.go index c05b4e5..fb06514 100644 --- a/pkg/config/prometheus_test.go +++ b/pkg/config/prometheus_test.go @@ -1,66 +1,55 @@ package config_test import ( - "os" - "testing" + "os" + "testing" - "github.com/cloudzero/cloudzero-agent-validator/pkg/config" - "github.com/pkg/errors" - "github.com/stretchr/testify/assert" + "github.com/cloudzero/cloudzero-agent-validator/pkg/config" + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" ) func TestPrometheus_Validate(t *testing.T) { - wd, err := os.Getwd() - assert.NoError(t, err) - scrapeConfigFile := wd + "/testdata/prometheus.yml" - tests := []struct { - name string - prom config.Prometheus - expected error - }{ - { - name: "ValidPrometheus", - prom: config.Prometheus{ - KubeStateMetricsServiceEndpoint: kmsServiceEndpoint, - PrometheusNodeExporterServiceEndpoint: promNodeExporterServiceEndpoint, - Configurations: []string{scrapeConfigFile}, - }, - expected: nil, - }, - { - name: "MissingKubeStateMetricsServiceEndpoint", - prom: config.Prometheus{ - PrometheusNodeExporterServiceEndpoint: promNodeExporterServiceEndpoint, - Configurations: []string{scrapeConfigFile}, - }, - expected: errors.New(config.ErrNoKubeStateMetricsServiceEndpointMsg), - }, - { - name: "MissingPrometheusNodeExporterServiceEndpoint", - prom: config.Prometheus{ - KubeStateMetricsServiceEndpoint: kmsServiceEndpoint, - Configurations: []string{scrapeConfigFile}, - }, - expected: errors.New(config.ErrNoPrometheusNodeExporterServiceEndpointMsg), - }, - { - name: "MissingScrapeConfigLocation", - prom: config.Prometheus{ - KubeStateMetricsServiceEndpoint: kmsServiceEndpoint, - PrometheusNodeExporterServiceEndpoint: promNodeExporterServiceEndpoint, - }, - expected: nil, - }, - } + wd, err := os.Getwd() + assert.NoError(t, err) + scrapeConfigFile := wd + "/testdata/prometheus.yml" + tests := []struct { + name string + prom config.Prometheus + expected error + }{ + { + name: "ValidPrometheus", + prom: config.Prometheus{ + KubeStateMetricsServiceEndpoint: kmsServiceEndpoint, + Configurations: []string{scrapeConfigFile}, + }, + expected: nil, + }, + { + name: "MissingKubeStateMetricsServiceEndpoint", + prom: config.Prometheus{ + Configurations: []string{scrapeConfigFile}, + }, + expected: errors.New(config.ErrNoKubeStateMetricsServiceEndpointMsg), + }, + { + name: "MissingScrapeConfigLocation", + prom: config.Prometheus{ + KubeStateMetricsServiceEndpoint: kmsServiceEndpoint, + }, + expected: nil, + }, + } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := tt.prom.Validate() - if tt.expected == nil { - assert.NoError(t, err) - return - } - assert.Equal(t, tt.expected.Error(), err.Error()) - }) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.prom.Validate() + if tt.expected == nil { + assert.NoError(t, err) + return + } + assert.Equal(t, tt.expected.Error(), err.Error()) + }) + } } diff --git a/pkg/config/settings_test.go b/pkg/config/settings_test.go index 7df2bb9..4489acd 100644 --- a/pkg/config/settings_test.go +++ b/pkg/config/settings_test.go @@ -17,7 +17,6 @@ const ( apiKey = "my-cloudzero-token" kmsServiceEndpoint = "http://kube-state-metrics:8080" - promNodeExporterServiceEndpoint = "http://node-exporter:8080" ) func TestSettings_NewSettings(t *testing.T) { @@ -50,7 +49,6 @@ func TestSettings_NewSettings(t *testing.T) { // verify Prometheus assert.Equal(t, kmsServiceEndpoint, settings.Prometheus.KubeStateMetricsServiceEndpoint) - assert.Equal(t, promNodeExporterServiceEndpoint, settings.Prometheus.PrometheusNodeExporterServiceEndpoint) assert.Equal(t, []string{"prometheus.yml"}, settings.Prometheus.Configurations) // verify Diagnostics diff --git a/pkg/config/testdata/cloudzero-agent-validator.yml b/pkg/config/testdata/cloudzero-agent-validator.yml index 8326f04..26053b3 100644 --- a/pkg/config/testdata/cloudzero-agent-validator.yml +++ b/pkg/config/testdata/cloudzero-agent-validator.yml @@ -18,7 +18,6 @@ cloudzero: prometheus: kube_state_metrics_service_endpoint: http://kube-state-metrics:8080 - prometheus_node_exporter_service_endpoint: http://node-exporter:8080 configurations: - prometheus.yml @@ -35,7 +34,6 @@ diagnostics: - k8s_version - egress_reachable - kube_state_metrics_reachable - - node_exporter_reachable - scrape_cfg - name: pre-stop enforce: false diff --git a/pkg/config/testdata/file.env b/pkg/config/testdata/file.env index b0dbfb9..b4ef5f7 100644 --- a/pkg/config/testdata/file.env +++ b/pkg/config/testdata/file.env @@ -11,4 +11,3 @@ LOG_LEVEL=info LOG_LOCATION=cloudzero-agent-validator.log KMS_EP_URL='http://cloudzero-agent-kube-state-metrics:8080/' -NODE_EXPORTER_EP_URL='http://node-exporter.monitoring.svc.cluster.local:9100/' diff --git a/pkg/diagnostic/catalog/catalog.go b/pkg/diagnostic/catalog/catalog.go index b1d6cb8..9f27c8c 100644 --- a/pkg/diagnostic/catalog/catalog.go +++ b/pkg/diagnostic/catalog/catalog.go @@ -10,7 +10,6 @@ import ( "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/egress" "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/k8s" "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/kms" - "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/pne" promcfg "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/prom/config" promver "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/prom/version" "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/stage" @@ -42,7 +41,6 @@ func NewCatalog(ctx context.Context, c *config.Settings) Registry { r.add(config.DiagnosticEgressAccess, false, egress.NewProvider(ctx, c)) r.add(config.DiagnosticK8sVersion, false, k8s.NewProvider(ctx, c)) r.add(config.DiagnosticKMS, false, kms.NewProvider(ctx, c)) - r.add(config.DiagnosticNodeExporter, false, pne.NewProvider(ctx, c)) r.add(config.DiagnosticScrapeConfig, false, promcfg.NewProvider(ctx, c)) r.add(config.DiagnosticPrometheusVersion, false, promver.NewProvider(ctx, c)) diff --git a/pkg/diagnostic/catalog/catalog_test.go b/pkg/diagnostic/catalog/catalog_test.go index f39c3cf..a37b51b 100644 --- a/pkg/diagnostic/catalog/catalog_test.go +++ b/pkg/diagnostic/catalog/catalog_test.go @@ -1,52 +1,52 @@ package catalog_test import ( - "context" - "testing" + "context" + "testing" - "github.com/cloudzero/cloudzero-agent-validator/pkg/config" - "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/catalog" - "github.com/stretchr/testify/assert" + "github.com/cloudzero/cloudzero-agent-validator/pkg/config" + "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/catalog" + "github.com/stretchr/testify/assert" ) func TestRegistry_Get(t *testing.T) { - ctx := context.Background() - c := &config.Settings{} - r := catalog.NewCatalog(ctx, c) + ctx := context.Background() + c := &config.Settings{} + r := catalog.NewCatalog(ctx, c) - // Test getting providers with existing IDs - providers := r.Get(config.DiagnosticAPIKey, config.DiagnosticK8sVersion) - assert.Len(t, providers, 2) + // Test getting providers with existing IDs + providers := r.Get(config.DiagnosticAPIKey, config.DiagnosticK8sVersion) + assert.Len(t, providers, 2) - // Test getting providers with non-existing IDs - providers = r.Get("non-existing-id") - assert.Empty(t, providers) + // Test getting providers with non-existing IDs + providers = r.Get("non-existing-id") + assert.Empty(t, providers) - // Test getting providers with empty IDs - providers = r.Get() - assert.Empty(t, providers) + // Test getting providers with empty IDs + providers = r.Get() + assert.Empty(t, providers) } func TestRegistry_Has(t *testing.T) { - ctx := context.Background() - c := &config.Settings{} - r := catalog.NewCatalog(ctx, c) + ctx := context.Background() + c := &config.Settings{} + r := catalog.NewCatalog(ctx, c) - // Test checking for existing ID - has := r.Has(config.DiagnosticAPIKey) - assert.True(t, has) + // Test checking for existing ID + has := r.Has(config.DiagnosticAPIKey) + assert.True(t, has) - // Test checking for non-existing ID - has = r.Has("non-existing-id") - assert.False(t, has) + // Test checking for non-existing ID + has = r.Has("non-existing-id") + assert.False(t, has) } func TestRegistry_List(t *testing.T) { - ctx := context.Background() - c := &config.Settings{} - r := catalog.NewCatalog(ctx, c) + ctx := context.Background() + c := &config.Settings{} + r := catalog.NewCatalog(ctx, c) - // Test listing providers - providers := r.List() - assert.Len(t, providers, 7) + // Test listing providers + providers := r.List() + assert.Len(t, providers, 6) // Update the expected length to 6 } diff --git a/pkg/diagnostic/pne/check.go b/pkg/diagnostic/pne/check.go deleted file mode 100644 index 303b2d0..0000000 --- a/pkg/diagnostic/pne/check.go +++ /dev/null @@ -1,69 +0,0 @@ -package pne - -import ( - "context" - "fmt" - net "net/http" - "time" - - "github.com/cloudzero/cloudzero-agent-validator/pkg/config" - "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic" - "github.com/cloudzero/cloudzero-agent-validator/pkg/http" - "github.com/cloudzero/cloudzero-agent-validator/pkg/logging" - "github.com/cloudzero/cloudzero-agent-validator/pkg/status" - "github.com/sirupsen/logrus" -) - -const DiagnosticNodeExporter = config.DiagnosticNodeExporter - -var ( - // Exported so that it can be overridden in tests - MaxRetry = 12 - RetryInterval = 10 * time.Second -) - -type checker struct { - cfg *config.Settings - logger *logrus.Entry -} - -func NewProvider(ctx context.Context, cfg *config.Settings) diagnostic.Provider { - return &checker{ - cfg: cfg, - logger: logging.NewLogger(). - WithContext(ctx).WithField(logging.OpField, "pne"), - } -} - -func (c *checker) Check(ctx context.Context, client *net.Client, accessor status.Accessor) error { - var ( - err error - retriesRemaining = MaxRetry - url = fmt.Sprintf("%s/", c.cfg.Prometheus.PrometheusNodeExporterServiceEndpoint) - ) - - // We need to build in a retry here because the prometheus-node-exporter - // service can take a few seconds to start up - // If it is deploying with the cloudzero-agent chart - for { - _, err = http.Do(ctx, client, net.MethodGet, nil, nil, url, nil) - if err == nil { - break - } - if retriesRemaining == 0 { - break - } - - retriesRemaining-- - time.Sleep(RetryInterval) - } - - if err != nil { - accessor.AddCheck(&status.StatusCheck{Name: DiagnosticNodeExporter, Passing: false, Error: err.Error()}) - return nil - } - - accessor.AddCheck(&status.StatusCheck{Name: DiagnosticNodeExporter, Passing: true}) - return nil - -} diff --git a/pkg/diagnostic/pne/check_test.go b/pkg/diagnostic/pne/check_test.go deleted file mode 100644 index e1d7ff7..0000000 --- a/pkg/diagnostic/pne/check_test.go +++ /dev/null @@ -1,107 +0,0 @@ -package pne_test - -import ( - "context" - "net/http" - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/cloudzero/cloudzero-agent-validator/pkg/config" - "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/pne" - "github.com/cloudzero/cloudzero-agent-validator/pkg/status" - "github.com/cloudzero/cloudzero-agent-validator/test" -) - -const ( - mockURL = "http://example.com" -) - -func makeReport() status.Accessor { - return status.NewAccessor(&status.ClusterStatus{}) -} - -func TestChecker_CheckOK(t *testing.T) { - cfg := &config.Settings{ - Prometheus: config.Prometheus{ - PrometheusNodeExporterServiceEndpoint: mockURL, - }, - } - provider := pne.NewProvider(context.Background(), cfg) - - mock := test.NewHTTPMock() - mock.Expect(http.MethodGet, "Hello World", http.StatusOK, nil) - client := mock.HTTPClient() - - accessor := makeReport() - - err := provider.Check(context.Background(), client, accessor) - assert.NoError(t, err) - - accessor.ReadFromReport(func(s *status.ClusterStatus) { - assert.Len(t, s.Checks, 1) - for _, c := range s.Checks { - assert.True(t, c.Passing) - } - }) -} - -func TestChecker_CheckRetry(t *testing.T) { - cfg := &config.Settings{ - Prometheus: config.Prometheus{ - PrometheusNodeExporterServiceEndpoint: mockURL, - }, - } - provider := pne.NewProvider(context.Background(), cfg) - - // Update the test sleep interval to accellerate the test - pne.RetryInterval = 10 * time.Millisecond - pne.MaxRetry = 1 - - mock := test.NewHTTPMock() - mock.Expect(http.MethodGet, "", http.StatusNotFound, nil) - mock.Expect(http.MethodGet, "Hello World", http.StatusOK, nil) - client := mock.HTTPClient() - - accessor := makeReport() - - err := provider.Check(context.Background(), client, accessor) - assert.NoError(t, err) - - accessor.ReadFromReport(func(s *status.ClusterStatus) { - assert.Len(t, s.Checks, 1) - for _, c := range s.Checks { - assert.True(t, c.Passing) - } - }) -} - -func TestChecker_CheckRetryFailure(t *testing.T) { - cfg := &config.Settings{ - Prometheus: config.Prometheus{ - PrometheusNodeExporterServiceEndpoint: mockURL, - }, - } - provider := pne.NewProvider(context.Background(), cfg) - - // Update the test sleep interval to accellerate the test - pne.RetryInterval = 10 * time.Millisecond - pne.MaxRetry = 0 - - mock := test.NewHTTPMock() - mock.Expect(http.MethodGet, "", http.StatusNotFound, nil) - client := mock.HTTPClient() - - accessor := makeReport() - - err := provider.Check(context.Background(), client, accessor) - assert.NoError(t, err) - - accessor.ReadFromReport(func(s *status.ClusterStatus) { - assert.Len(t, s.Checks, 1) - for _, c := range s.Checks { - assert.False(t, c.Passing) - } - }) -}