Skip to content

CP-22073: Remove Service Discovery for Node Exporter #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions CONFIGURATION.md
Original file line number Diff line number Diff line change
@@ -49,7 +49,6 @@ The `prometheus` section configures Prometheus settings.
| Key | Description | Required | Default Values |
|------------------------------------------|-------------------|----------|----------------|
| kube_state_metrics_service_endpoint | The endpoint for kube-state-metrics service | Mandatory | |
| prometheus_node_exporter_service_endpoint| The endpoint for node-exporter service | Mandatory | |
| configurations | List of one or more configuration files locations for prometheus to validate | Mandatory | |

## Diagnostics
@@ -76,7 +75,6 @@ The following table describes the available checkers:
| `k8s_version` | Checks the Kubernetes compatability |
| `egress_reachable` | Checks pod can communicate with the Cloudzero API |
| `kube_state_metrics_reachable` | Checks the kubernetes state metrics service is reachable |
| `node_exporter_reachable` | Checks the prometheus node exporter service is reachable |
| `scrape_cfg` | Checks the prometheus configurations exist and contain the necessary scrape configuration |

## Example
8 changes: 6 additions & 2 deletions pkg/cmd/config/command.go
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@ type ScrapeConfigData struct {
ClusterName string
CloudAccountID string
Region string
Host string
}

func NewCommand(ctx context.Context) *cli.Command {
@@ -46,28 +47,31 @@ func NewCommand(ctx context.Context) *cli.Command {
&cli.StringFlag{Name: "namespace", Usage: "namespace of the cloudzero-agent pod", Required: true},
&cli.StringFlag{Name: "configmap", Usage: "name of the ConfigMap", Required: true},
&cli.StringFlag{Name: "pod", Usage: "name of the cloudzero-agent pod", Required: true},
&cli.StringFlag{Name: "host", Usage: "host for the prometheus remote write endpoint", Required: true},
},
Action: func(c *cli.Context) error {
kubeconfigPath := c.String("kubeconfig")
namespace := c.String("namespace")
configMapName := c.String("configmap")
host := c.String("host")

clientset, err := k8s.BuildKubeClient(kubeconfigPath)
if err != nil {
return err
}

kubeStateMetricsURL, nodeExporterURL, err := k8s.GetServiceURLs(ctx, clientset, namespace)
kubeStateMetricsURL, err := k8s.GetKubeStateMetricsURL(ctx, clientset, namespace)
if err != nil {
return err
}

targets := []string{kubeStateMetricsURL, nodeExporterURL}
targets := []string{kubeStateMetricsURL}
scrapeConfigData := ScrapeConfigData{
Targets: targets,
ClusterName: c.String(config.FlagClusterName),
CloudAccountID: c.String(config.FlagAccountID),
Region: c.String(config.FlagRegion),
Host: host,
}

configContent, err := Generate(scrapeConfigData)
20 changes: 5 additions & 15 deletions pkg/cmd/config/command_test.go
Original file line number Diff line number Diff line change
@@ -31,31 +31,21 @@ func TestGenerate(t *testing.T) {
},
},
},
&corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "node-exporter",
Namespace: namespace,
},
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{Port: 9100},
},
},
},
)

ctx, _ := context.WithCancel(context.Background())

// Fetch service URLs
kubeStateMetricsURL, nodeExporterURL, err := k8s.GetServiceURLs(ctx, clientset, namespace)
// Fetch the Kube State Metrics URL
kubeStateMetricsURL, err := k8s.GetKubeStateMetricsURL(ctx, clientset, namespace)
assert.NoError(t, err)

// Define the scrape config data
scrapeConfigData := config.ScrapeConfigData{
Targets: []string{kubeStateMetricsURL, nodeExporterURL},
Targets: []string{kubeStateMetricsURL},
ClusterName: "test-cluster",
CloudAccountID: "123456789",
Region: "us-west-2",
Host: "test-host",
}

// Generate the configuration content
@@ -65,10 +55,10 @@ func TestGenerate(t *testing.T) {

// Validate the dynamically populated values
assert.Contains(t, configContent, kubeStateMetricsURL)
assert.Contains(t, configContent, nodeExporterURL)
assert.Contains(t, configContent, "cluster_name=test-cluster")
assert.Contains(t, configContent, "cloud_account_id=123456789")
assert.Contains(t, configContent, "region=us-west-2")
assert.Contains(t, configContent, "test-host")

// Define the ConfigMap data
configMapData := map[string]string{
6 changes: 3 additions & 3 deletions pkg/cmd/config/internal/scrape_config.tmpl
Original file line number Diff line number Diff line change
@@ -104,7 +104,7 @@ scrape_configs:
metric_relabel_configs:
- source_labels: [__name__]
separator: ;
regex: ^(kube_node_info|kube_node_status_capacity|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_labels|kube_pod_info|node_dmi_info)$
regex: ^(kube_node_info|kube_node_status_capacity|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_labels|kube_pod_info)$
replacement: $1
action: keep
- separator: ;
@@ -117,12 +117,12 @@ scrape_configs:
- {{ . }}
{{- end }}
remote_write:
- url: https://api.cloudzero.com/v1/container-metrics?cluster_name={{ .ClusterName }}&cloud_account_id={{ .CloudAccountID }}&region={{ .Region }}
- url: https://{{ .Host }}/v1/container-metrics?cluster_name={{ .ClusterName }}&cloud_account_id={{ .CloudAccountID }}&region={{ .Region }}
remote_timeout: 30s
write_relabel_configs:
- source_labels: [__name__]
separator: ;
regex: ^(kube_node_info|kube_node_status_capacity|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_labels|kube_pod_info|node_dmi_info|container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total)$
regex: ^(kube_node_info|kube_node_status_capacity|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_labels|kube_pod_info|container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total)$
replacement: $1
action: keep
authorization:
2 changes: 0 additions & 2 deletions pkg/cmd/config/internal/template.yml
Original file line number Diff line number Diff line change
@@ -17,7 +17,6 @@ cloudzero:

prometheus:
kube_state_metrics_service_endpoint: {{ .KubeStateMetricsURL }}
prometheus_node_exporter_service_endpoint: {{ .PromNodeExporterURL }}
configurations:
- /etc/config/prometheus/configmaps/prometheus.yml

@@ -33,7 +32,6 @@ diagnostics:
checks:
- k8s_version
- kube_state_metrics_reachable
- node_exporter_reachable
- scrape_cfg
- name: pre-stop
enforce: false
3 changes: 1 addition & 2 deletions pkg/config/diagnostics.go
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@ const (
DiagnosticK8sVersion string = "k8s_version"
DiagnosticEgressAccess string = "egress_reachable"
DiagnosticKMS string = "kube_state_metrics_reachable"
DiagnosticNodeExporter string = "node_exporter_reachable"
DiagnosticPrometheusVersion string = "prometheus_version"
DiagnosticScrapeConfig string = "scrape_cfg"
)
@@ -27,7 +26,7 @@ func IsValidDiagnostic(d string) bool {
d = strings.ToLower(strings.TrimSpace(d))
switch d {
case DiagnosticAPIKey, DiagnosticK8sVersion, DiagnosticEgressAccess,
DiagnosticKMS, DiagnosticNodeExporter, DiagnosticScrapeConfig,
DiagnosticKMS, DiagnosticScrapeConfig,
DiagnosticPrometheusVersion:
return true
}
5 changes: 0 additions & 5 deletions pkg/config/diagnostics_test.go
Original file line number Diff line number Diff line change
@@ -33,11 +33,6 @@ func TestDiagnostics_IsValidDiagnostics(t *testing.T) {
diagnostic: config.DiagnosticKMS,
expected: true,
},
{
name: "DiagnosticNodeExporter",
diagnostic: config.DiagnosticNodeExporter,
expected: true,
},
{
name: "DiagnosticScrapeConfig",
diagnostic: config.DiagnosticScrapeConfig,
25 changes: 12 additions & 13 deletions pkg/config/error.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
package config

const (
ErrNoLifecycleStageMsg = "missing Lifecycle Stage"
ErrNoEnvFileMsg = "missing Env File"
ErrNoKeyFileMsg = "missing Key File"
ErrNoCloudZeroHostMsg = "missing CloudZero Host"
ErrNoAccountIDMsg = "missing AWS Account ID"
ErrNoClusterNameMsg = "missing Cluster Name"
ErrNoRegionMsg = "missing AWS Region"
ErrNoSecretFilePathMsg = "missing Secret File"
ErrNoAgentVersionMsg = "missing Agent Version"
ErrNoChartVersionMsg = "missing Chart Version"
ErrNoScrapeConfigLocationMsg = "missing Scrape Config Location"
ErrNoKubeStateMetricsServiceEndpointMsg = "missing Kube State Metrics Service Endpoint"
ErrNoPrometheusNodeExporterServiceEndpointMsg = "missing Prometheus Node Exporter Service Endpoint"
ErrNoLifecycleStageMsg = "missing Lifecycle Stage"
ErrNoEnvFileMsg = "missing Env File"
ErrNoKeyFileMsg = "missing Key File"
ErrNoCloudZeroHostMsg = "missing CloudZero Host"
ErrNoAccountIDMsg = "missing AWS Account ID"
ErrNoClusterNameMsg = "missing Cluster Name"
ErrNoRegionMsg = "missing AWS Region"
ErrNoSecretFilePathMsg = "missing Secret File"
ErrNoAgentVersionMsg = "missing Agent Version"
ErrNoChartVersionMsg = "missing Chart Version"
ErrNoScrapeConfigLocationMsg = "missing Scrape Config Location"
ErrNoKubeStateMetricsServiceEndpointMsg = "missing Kube State Metrics Service Endpoint"
)
14 changes: 3 additions & 11 deletions pkg/config/prometheus.go
Original file line number Diff line number Diff line change
@@ -8,10 +8,9 @@ import (
)

type Prometheus struct {
Executable string `yaml:"executable" default:"/bin/prometheus" env:"PROMETHEUS_EXECUTABLE" env-description:"Prometheus Executable Path"`
KubeStateMetricsServiceEndpoint string `yaml:"kube_state_metrics_service_endpoint" env:"KMS_EP_URL" required:"true" env-description:"Kube State Metrics Service Endpoint"`
PrometheusNodeExporterServiceEndpoint string `yaml:"prometheus_node_exporter_service_endpoint" env:"NODE_EXPORTER_EP_URL" required:"true" env-description:"Prometheus Node Exporter Service Endpoint"`
Configurations []string `yaml:"configurations"`
Executable string `yaml:"executable" default:"/bin/prometheus" env:"PROMETHEUS_EXECUTABLE" env-description:"Prometheus Executable Path"`
KubeStateMetricsServiceEndpoint string `yaml:"kube_state_metrics_service_endpoint" env:"KMS_EP_URL" required:"true" env-description:"Kube State Metrics Service Endpoint"`
Configurations []string `yaml:"configurations"`
}

func (s *Prometheus) Validate() error {
@@ -22,13 +21,6 @@ func (s *Prometheus) Validate() error {
return fmt.Errorf("invalid %s", s.KubeStateMetricsServiceEndpoint)
}

if s.PrometheusNodeExporterServiceEndpoint == "" {
return errors.New(ErrNoPrometheusNodeExporterServiceEndpointMsg)
}
if !isValidURL(s.PrometheusNodeExporterServiceEndpoint) {
return fmt.Errorf("URL format invalid: %s", s.PrometheusNodeExporterServiceEndpoint)
}

if len(s.Configurations) == 0 {
s.Configurations = []string{
"/etc/prometheus/prometheus.yml",
19 changes: 4 additions & 15 deletions pkg/config/prometheus_test.go
Original file line number Diff line number Diff line change
@@ -21,33 +21,22 @@ func TestPrometheus_Validate(t *testing.T) {
{
name: "ValidPrometheus",
prom: config.Prometheus{
KubeStateMetricsServiceEndpoint: kmsServiceEndpoint,
PrometheusNodeExporterServiceEndpoint: promNodeExporterServiceEndpoint,
Configurations: []string{scrapeConfigFile},
KubeStateMetricsServiceEndpoint: kmsServiceEndpoint,
Configurations: []string{scrapeConfigFile},
},
expected: nil,
},
{
name: "MissingKubeStateMetricsServiceEndpoint",
prom: config.Prometheus{
PrometheusNodeExporterServiceEndpoint: promNodeExporterServiceEndpoint,
Configurations: []string{scrapeConfigFile},
Configurations: []string{scrapeConfigFile},
},
expected: errors.New(config.ErrNoKubeStateMetricsServiceEndpointMsg),
},
{
name: "MissingPrometheusNodeExporterServiceEndpoint",
prom: config.Prometheus{
KubeStateMetricsServiceEndpoint: kmsServiceEndpoint,
Configurations: []string{scrapeConfigFile},
},
expected: errors.New(config.ErrNoPrometheusNodeExporterServiceEndpointMsg),
},
{
name: "MissingScrapeConfigLocation",
prom: config.Prometheus{
KubeStateMetricsServiceEndpoint: kmsServiceEndpoint,
PrometheusNodeExporterServiceEndpoint: promNodeExporterServiceEndpoint,
KubeStateMetricsServiceEndpoint: kmsServiceEndpoint,
},
expected: nil,
},
4 changes: 1 addition & 3 deletions pkg/config/settings_test.go
Original file line number Diff line number Diff line change
@@ -16,8 +16,7 @@ const (
apiHost = "https://api.cloudzero.com"
apiKey = "my-cloudzero-token"

kmsServiceEndpoint = "http://kube-state-metrics:8080"
promNodeExporterServiceEndpoint = "http://node-exporter:8080"
kmsServiceEndpoint = "http://kube-state-metrics:8080"
)

func TestSettings_NewSettings(t *testing.T) {
@@ -50,7 +49,6 @@ func TestSettings_NewSettings(t *testing.T) {

// verify Prometheus
assert.Equal(t, kmsServiceEndpoint, settings.Prometheus.KubeStateMetricsServiceEndpoint)
assert.Equal(t, promNodeExporterServiceEndpoint, settings.Prometheus.PrometheusNodeExporterServiceEndpoint)
assert.Equal(t, []string{"prometheus.yml"}, settings.Prometheus.Configurations)

// verify Diagnostics
2 changes: 0 additions & 2 deletions pkg/config/testdata/cloudzero-agent-validator.yml
Original file line number Diff line number Diff line change
@@ -18,7 +18,6 @@ cloudzero:

prometheus:
kube_state_metrics_service_endpoint: http://kube-state-metrics:8080
prometheus_node_exporter_service_endpoint: http://node-exporter:8080
configurations:
- prometheus.yml

@@ -35,7 +34,6 @@ diagnostics:
- k8s_version
- egress_reachable
- kube_state_metrics_reachable
- node_exporter_reachable
- scrape_cfg
- name: pre-stop
enforce: false
1 change: 0 additions & 1 deletion pkg/config/testdata/file.env
Original file line number Diff line number Diff line change
@@ -11,4 +11,3 @@ LOG_LEVEL=info
LOG_LOCATION=cloudzero-agent-validator.log

KMS_EP_URL='http://cloudzero-agent-kube-state-metrics:8080/'
NODE_EXPORTER_EP_URL='http://node-exporter.monitoring.svc.cluster.local:9100/'
2 changes: 0 additions & 2 deletions pkg/diagnostic/catalog/catalog.go
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@ import (
"github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/egress"
"github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/k8s"
"github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/kms"
"github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/pne"
promcfg "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/prom/config"
promver "github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/prom/version"
"github.com/cloudzero/cloudzero-agent-validator/pkg/diagnostic/stage"
@@ -42,7 +41,6 @@ func NewCatalog(ctx context.Context, c *config.Settings) Registry {
r.add(config.DiagnosticEgressAccess, false, egress.NewProvider(ctx, c))
r.add(config.DiagnosticK8sVersion, false, k8s.NewProvider(ctx, c))
r.add(config.DiagnosticKMS, false, kms.NewProvider(ctx, c))
r.add(config.DiagnosticNodeExporter, false, pne.NewProvider(ctx, c))
r.add(config.DiagnosticScrapeConfig, false, promcfg.NewProvider(ctx, c))
r.add(config.DiagnosticPrometheusVersion, false, promver.NewProvider(ctx, c))

2 changes: 1 addition & 1 deletion pkg/diagnostic/catalog/catalog_test.go
Original file line number Diff line number Diff line change
@@ -48,5 +48,5 @@ func TestRegistry_List(t *testing.T) {

// Test listing providers
providers := r.List()
assert.Len(t, providers, 7)
assert.Len(t, providers, 6) // Update the expected length to 6
}
Loading