From 5746e4b2dad02eaf6cca47172bbbd434ca2ee855 Mon Sep 17 00:00:00 2001 From: Pete Wall Date: Fri, 3 Jan 2025 17:43:56 -0600 Subject: [PATCH] Add DPM checks for control plane integration test and add a test for annotation autodiscovery Signed-off-by: Pete Wall --- .../.rendered/output.yaml | 618 ++++++++++++++++++ .../deployments/cert-manager.yaml | 36 + .../deployments/grafana.yaml | 39 ++ .../deployments/prometheus.yaml | 61 ++ .../deployments/query-test.yaml | 49 ++ .../anntoation-autodiscovery/values.yaml | 18 + .../.rendered/output.yaml | 74 +-- .../deployments/query-test.yaml | 7 + .../control-plane-monitoring/values.yaml | 12 +- 9 files changed, 875 insertions(+), 39 deletions(-) create mode 100644 charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/.rendered/output.yaml create mode 100644 charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/cert-manager.yaml create mode 100644 charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/grafana.yaml create mode 100644 charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/prometheus.yaml create mode 100644 charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/query-test.yaml create mode 100644 charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/values.yaml diff --git a/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/.rendered/output.yaml new file mode 100644 index 000000000..f52ff40bc --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/.rendered/output.yaml @@ -0,0 +1,618 @@ +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k8smon-alloy-metrics + namespace: default + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: rbac +--- +# Source: k8s-monitoring/templates/alloy-config.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: k8smon-alloy-metrics + namespace: default +data: + config.alloy: |- + // Destination: prometheus (prometheus) + otelcol.exporter.prometheus "prometheus" { + add_metric_suffixes = true + forward_to = [prometheus.remote_write.prometheus.receiver] + } + + prometheus.remote_write "prometheus" { + endpoint { + url = "http://prometheus-server.prometheus.svc:9090/api/v1/write" + headers = { + } + tls_config { + insecure_skip_verify = false + } + send_native_histograms = false + + queue_config { + capacity = 10000 + min_shards = 1 + max_shards = 50 + max_samples_per_send = 2000 + batch_send_deadline = "5s" + min_backoff = "30ms" + max_backoff = "5s" + retry_on_http_429 = true + sample_age_limit = "0s" + } + + write_relabel_config { + source_labels = ["cluster"] + regex = "" + replacement = "annotation-autodiscovery-test" + target_label = "cluster" + } + write_relabel_config { + source_labels = ["k8s.cluster.name"] + regex = "" + replacement = "annotation-autodiscovery-test" + target_label = "cluster" + } + } + + wal { + truncate_frequency = "2h" + min_keepalive_time = "5m" + max_keepalive_time = "8h" + } + } + + // Feature: Annotation Autodiscovery + declare "annotation_autodiscovery" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + discovery.kubernetes "pods" { + role = "pod" + } + + discovery.relabel "annotation_autodiscovery_pods" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the pod port + // The discovery generates a target for each declared container port of the pod. + // If the metricsPortName annotation has value, keep only the target where the port name matches the one of the annotation. + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + // If the metrics port number annotation has a value, override the target address to use it, regardless whether it is + // one of the declared ports on that Pod. + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" + replacement = "[$2]:$1" // IPv6 + target_label = "__address__" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" // IPv4, takes priority over IPv6 when both exists + replacement = "$2:$1" + target_label = "__address__" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_scrapeInterval"] + action = "replace" + target_label = "__scrape_interval__" + } + } + + discovery.kubernetes "services" { + role = "service" + } + + discovery.relabel "annotation_autodiscovery_services" { + targets = discovery.kubernetes.services.targets + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the service port + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portNumber"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } + + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_scrapeInterval"] + action = "replace" + target_label = "__scrape_interval__" + } + } + + discovery.relabel "annotation_autodiscovery_http" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "drop" + } + } + + discovery.relabel "annotation_autodiscovery_https" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "keep" + } + } + + prometheus.scrape "annotation_autodiscovery_http" { + targets = discovery.relabel.annotation_autodiscovery_http.output + scrape_interval = "60s" + honor_labels = true + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + clustering { + enabled = true + } + + forward_to = argument.metrics_destinations.value + } + + prometheus.scrape "annotation_autodiscovery_https" { + targets = discovery.relabel.annotation_autodiscovery_https.output + scrape_interval = "60s" + honor_labels = true + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + + forward_to = argument.metrics_destinations.value + } + } + annotation_autodiscovery "feature" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] + } + + // Self Reporting + prometheus.exporter.unix "kubernetes_monitoring_telemetry" { + set_collectors = ["textfile"] + textfile { + directory = "/etc/alloy" + } + } + + discovery.relabel "kubernetes_monitoring_telemetry" { + targets = prometheus.exporter.unix.kubernetes_monitoring_telemetry.targets + rule { + target_label = "instance" + action = "replace" + replacement = "k8smon" + } + rule { + target_label = "job" + action = "replace" + replacement = "integrations/kubernetes/kubernetes_monitoring_telemetry" + } + } + + prometheus.scrape "kubernetes_monitoring_telemetry" { + job_name = "integrations/kubernetes/kubernetes_monitoring_telemetry" + targets = discovery.relabel.kubernetes_monitoring_telemetry.output + scrape_interval = "1m" + clustering { + enabled = true + } + forward_to = [prometheus.relabel.kubernetes_monitoring_telemetry.receiver] + } + + prometheus.relabel "kubernetes_monitoring_telemetry" { + rule { + source_labels = ["__name__"] + regex = "grafana_kubernetes_monitoring_.*" + action = "keep" + } + forward_to = [ + prometheus.remote_write.prometheus.receiver, + ] + } + + + + + self-reporting-metric.prom: | + # HELP grafana_kubernetes_monitoring_build_info A metric to report the version of the Kubernetes Monitoring Helm chart + # TYPE grafana_kubernetes_monitoring_build_info gauge + grafana_kubernetes_monitoring_build_info{version="2.0.0-rc.12", namespace="default"} 1 + # HELP grafana_kubernetes_monitoring_feature_info A metric to report the enabled features of the Kubernetes Monitoring Helm chart + # TYPE grafana_kubernetes_monitoring_feature_info gauge + grafana_kubernetes_monitoring_feature_info{feature="annotationAutodiscovery", version="1.0.0"} 1 +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: k8smon-alloy-metrics + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: rbac +rules: + # Rules which allow discovery.kubernetes to function. + - apiGroups: + - "" + - "discovery.k8s.io" + - "networking.k8s.io" + resources: + - endpoints + - endpointslices + - ingresses + - nodes + - nodes/proxy + - nodes/metrics + - pods + - services + verbs: + - get + - list + - watch + # Rules which allow loki.source.kubernetes and loki.source.podlogs to work. + - apiGroups: + - "" + resources: + - pods + - pods/log + - namespaces + verbs: + - get + - list + - watch + - apiGroups: + - "monitoring.grafana.com" + resources: + - podlogs + verbs: + - get + - list + - watch + # Rules which allow mimir.rules.kubernetes to work. + - apiGroups: ["monitoring.coreos.com"] + resources: + - prometheusrules + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get + # Rules for prometheus.kubernetes.* + - apiGroups: ["monitoring.coreos.com"] + resources: + - podmonitors + - servicemonitors + - probes + verbs: + - get + - list + - watch + # Rules which allow eventhandler to work. + - apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch + # needed for remote.kubernetes.* + - apiGroups: [""] + resources: + - "configmaps" + - "secrets" + verbs: + - get + - list + - watch + # needed for otelcol.processor.k8sattributes + - apiGroups: ["apps"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions"] + resources: ["replicasets"] + verbs: ["get", "list", "watch"] +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: k8smon-alloy-metrics + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: rbac +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: k8smon-alloy-metrics +subjects: + - kind: ServiceAccount + name: k8smon-alloy-metrics + namespace: default +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/cluster_service.yaml +apiVersion: v1 +kind: Service +metadata: + name: k8smon-alloy-metrics-cluster + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: networking +spec: + type: ClusterIP + clusterIP: 'None' + publishNotReadyAddresses: true + selector: + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + ports: + # Do not include the -metrics suffix in the port name, otherwise metrics + # can be double-collected with the non-headless Service if it's also + # enabled. + # + # This service should only be used for clustering, and not metric + # collection. + - name: http + port: 12345 + targetPort: 12345 + protocol: "TCP" +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: k8smon-alloy-metrics + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy + app.kubernetes.io/component: networking +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + internalTrafficPolicy: Cluster + ports: + - name: http-metrics + port: 12345 + targetPort: 12345 + protocol: "TCP" +--- +# Source: k8s-monitoring/charts/alloy-metrics/templates/controllers/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: k8smon-alloy-metrics + labels: + helm.sh/chart: alloy-metrics-0.10.1 + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + + app.kubernetes.io/version: "v1.5.1" + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: alloy +spec: + replicas: 2 + podManagementPolicy: Parallel + minReadySeconds: 10 + serviceName: k8smon-alloy-metrics + selector: + matchLabels: + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: alloy + k8s.grafana.com/logs.job: integrations/alloy + labels: + app.kubernetes.io/name: alloy-metrics + app.kubernetes.io/instance: k8smon + spec: + serviceAccountName: k8smon-alloy-metrics + containers: + - name: alloy + image: docker.io/grafana/alloy:v1.5.1 + imagePullPolicy: IfNotPresent + args: + - run + - /etc/alloy/config.alloy + - --storage.path=/tmp/alloy + - --server.http.listen-addr=0.0.0.0:12345 + - --server.http.ui-path-prefix=/ + - --cluster.enabled=true + - --cluster.join-addresses=k8smon-alloy-metrics-cluster + - --cluster.name=alloy-metrics + - --stability.level=generally-available + env: + - name: ALLOY_DEPLOY_MODE + value: "helm" + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + ports: + - containerPort: 12345 + name: http-metrics + readinessProbe: + httpGet: + path: /-/ready + port: 12345 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 1 + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - CHOWN + - DAC_OVERRIDE + - FOWNER + - FSETID + - KILL + - SETGID + - SETUID + - SETPCAP + - NET_BIND_SERVICE + - NET_RAW + - SYS_CHROOT + - MKNOD + - AUDIT_WRITE + - SETFCAP + drop: + - ALL + seccompProfile: + type: RuntimeDefault + volumeMounts: + - name: config + mountPath: /etc/alloy + - name: config-reloader + image: ghcr.io/jimmidyson/configmap-reload:v0.12.0 + args: + - --volume-dir=/etc/alloy + - --webhook-url=http://localhost:12345/-/reload + volumeMounts: + - name: config + mountPath: /etc/alloy + resources: + requests: + cpu: 1m + memory: 5Mi + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + volumes: + - name: config + configMap: + name: k8smon-alloy-metrics diff --git a/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/cert-manager.yaml b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/cert-manager.yaml new file mode 100644 index 000000000..d73d1ec1a --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/cert-manager.yaml @@ -0,0 +1,36 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: cert-manager +--- +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: cert-manager + namespace: cert-manager +spec: + interval: 1m + url: https://charts.jetstack.io +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: cert-manager + namespace: cert-manager +spec: + interval: 1m + chart: + spec: + chart: cert-manager + sourceRef: + kind: HelmRepository + name: cert-manager + namespace: cert-manager + interval: 1m + values: + installCRDs: true + serviceAnnotations: + k8s.grafana.com/scrape: "true" + k8s.grafana.com/job: "integrations/cert-manager" + k8s.grafana.com/metrics.portNumber: "9402" diff --git a/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/grafana.yaml b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/grafana.yaml new file mode 100644 index 000000000..3743a01d4 --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/grafana.yaml @@ -0,0 +1,39 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: grafana +--- +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: grafana + namespace: grafana +spec: + interval: 1m + url: https://grafana.github.io/helm-charts +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: grafana + namespace: grafana +spec: + interval: 1m + chart: + spec: + chart: grafana + sourceRef: + kind: HelmRepository + name: grafana + namespace: grafana + interval: 1m + values: + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + url: http://prometheus-server.prometheus.svc:9090 + isDefault: true diff --git a/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/prometheus.yaml b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/prometheus.yaml new file mode 100644 index 000000000..e0efeabe3 --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/prometheus.yaml @@ -0,0 +1,61 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: prometheus +--- +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: prometheus-community + namespace: prometheus +spec: + interval: 1m + url: https://prometheus-community.github.io/helm-charts +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: prometheus + namespace: prometheus +spec: + interval: 1m + chart: + spec: + chart: prometheus + version: "^25" + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: prometheus + interval: 1m + values: + server: + extraFlags: + - enable-feature=remote-write-receiver + + persistentVolume: + enabled: false + + service: + servicePort: 9090 + + serverFiles: + prometheus.yml: + scrape_configs: [] + + configmapReload: + prometheus: + enabled: false + + alertmanager: + enabled: false + + kube-state-metrics: + enabled: false + + prometheus-node-exporter: + enabled: false + + prometheus-pushgateway: + enabled: false diff --git a/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/query-test.yaml b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/query-test.yaml new file mode 100644 index 000000000..761fcf1c6 --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/deployments/query-test.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1 +kind: GitRepository +metadata: + name: k8s-monitoring-test +spec: + interval: 1m + url: https://github.com/grafana/k8s-monitoring-helm + ref: + branch: main + ignore: | + /* + !/charts/k8s-monitoring-test +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: k8s-monitoring-test +spec: + interval: 1m + chart: + spec: + chart: charts/k8s-monitoring-test + sourceRef: + kind: GitRepository + name: k8s-monitoring-test + interval: 1m + values: + tests: + - env: + CLUSTER: annotation-autodiscovery-test + PROMETHEUS_URL: http://prometheus-server.prometheus.svc:9090/api/v1/query + queries: + # Self reporting metrics + - query: grafana_kubernetes_monitoring_build_info{cluster="$CLUSTER"} + type: promql + - query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="annotationAutodiscovery"} + type: promql + + # Annotation Autodiscovery by Servicell + - query: certmanager_clock_time_seconds{cluster="$CLUSTER", job="integrations/cert-manager"} + type: promql + + # DPM check + - query: avg(count_over_time(scrape_samples_scraped{cluster="$CLUSTER"}[1m])) + type: promql + expect: + value: 1 + operator: == diff --git a/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/values.yaml b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/values.yaml new file mode 100644 index 000000000..83e4a4990 --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/anntoation-autodiscovery/values.yaml @@ -0,0 +1,18 @@ +--- +cluster: + name: annotation-autodiscovery-test + +destinations: + - name: prometheus + type: prometheus + url: http://prometheus-server.prometheus.svc:9090/api/v1/write + +annotationAutodiscovery: + enabled: true + +selfReporting: {scrapeInterval: 1m} # Force self-report to be generated within test time + +alloy-metrics: + enabled: true + controller: + replicas: 2 diff --git a/charts/k8s-monitoring/tests/integration/control-plane-monitoring/.rendered/output.yaml b/charts/k8s-monitoring/tests/integration/control-plane-monitoring/.rendered/output.yaml index 6b9ec2043..50454669a 100644 --- a/charts/k8s-monitoring/tests/integration/control-plane-monitoring/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/integration/control-plane-monitoring/.rendered/output.yaml @@ -101,7 +101,7 @@ metadata: apiVersion: v1 kind: Secret metadata: - name: "localloki-k8smon-k8s-monitoring" + name: "loki-k8smon-k8s-monitoring" namespace: "default" type: Opaque data: @@ -140,13 +140,13 @@ metadata: namespace: default data: config.alloy: |- - // Destination: localPrometheus (prometheus) - otelcol.exporter.prometheus "localprometheus" { + // Destination: prometheus (prometheus) + otelcol.exporter.prometheus "prometheus" { add_metric_suffixes = true - forward_to = [prometheus.remote_write.localprometheus.receiver] + forward_to = [prometheus.remote_write.prometheus.receiver] } - prometheus.remote_write "localprometheus" { + prometheus.remote_write "prometheus" { endpoint { url = "http://prometheus-server.prometheus.svc:9090/api/v1/write" headers = { @@ -206,7 +206,7 @@ data: kubernetes.kubelet "scrape" { clustering = true - keep_metrics = "up|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes" + keep_metrics = "up|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes|scrape_samples_scraped" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -215,7 +215,7 @@ data: kubernetes.resources "scrape" { clustering = true job_label = "integrations/kubernetes/resources" - keep_metrics = "up|node_cpu_usage_seconds_total|node_memory_working_set_bytes" + keep_metrics = "up|node_cpu_usage_seconds_total|node_memory_working_set_bytes|scrape_samples_scraped" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -223,7 +223,7 @@ data: kubernetes.cadvisor "scrape" { clustering = true - keep_metrics = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes" + keep_metrics = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes|scrape_samples_scraped" scrape_interval = "60s" max_cache_size = 100000 forward_to = [prometheus.relabel.cadvisor.receiver] @@ -436,7 +436,7 @@ data: kube_state_metrics.scrape "metrics" { targets = kube_state_metrics.kubernetes.targets.output clustering = true - keep_metrics = "up|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_statefulset.*" + keep_metrics = "up|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_statefulset.*|scrape_samples_scraped" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -471,7 +471,7 @@ data: targets = discovery.relabel.node_exporter.output job_label = "integrations/node_exporter" clustering = true - keep_metrics = "up|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes" + keep_metrics = "up|node_cpu.*|node_exporter_build_info|node_filesystem.*|node_memory.*|node_network_receive_bytes_total|node_network_receive_drop_total|node_network_transmit_bytes_total|node_network_transmit_drop_total|process_cpu_seconds_total|process_resident_memory_bytes|scrape_samples_scraped" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -519,7 +519,7 @@ data: } cluster_metrics "feature" { metrics_destinations = [ - prometheus.remote_write.localprometheus.receiver, + prometheus.remote_write.prometheus.receiver, ] } --- @@ -531,13 +531,13 @@ metadata: namespace: default data: config.alloy: |- - // Destination: localPrometheus (prometheus) - otelcol.exporter.prometheus "localprometheus" { + // Destination: prometheus (prometheus) + otelcol.exporter.prometheus "prometheus" { add_metric_suffixes = true - forward_to = [prometheus.remote_write.localprometheus.receiver] + forward_to = [prometheus.remote_write.prometheus.receiver] } - prometheus.remote_write "localprometheus" { + prometheus.remote_write "prometheus" { endpoint { url = "http://prometheus-server.prometheus.svc:9090/api/v1/write" headers = { @@ -579,18 +579,18 @@ data: max_keepalive_time = "8h" } } - // Destination: localLoki (loki) - otelcol.exporter.loki "localloki" { - forward_to = [loki.write.localloki.receiver] + // Destination: loki (loki) + otelcol.exporter.loki "loki" { + forward_to = [loki.write.loki.receiver] } - loki.write "localloki" { + loki.write "loki" { endpoint { url = "http://loki.loki.svc:3100/loki/api/v1/push" - tenant_id = nonsensitive(remote.kubernetes.secret.localloki.data["tenantId"]) + tenant_id = nonsensitive(remote.kubernetes.secret.loki.data["tenantId"]) basic_auth { - username = nonsensitive(remote.kubernetes.secret.localloki.data["username"]) - password = remote.kubernetes.secret.localloki.data["password"] + username = nonsensitive(remote.kubernetes.secret.loki.data["username"]) + password = remote.kubernetes.secret.loki.data["password"] } tls_config { insecure_skip_verify = false @@ -602,8 +602,8 @@ data: } } - remote.kubernetes.secret "localloki" { - name = "localloki-k8smon-k8s-monitoring" + remote.kubernetes.secret "loki" { + name = "loki-k8smon-k8s-monitoring" namespace = "default" } @@ -681,7 +681,7 @@ data: } cluster_events "feature" { logs_destinations = [ - loki.write.localloki.receiver, + loki.write.loki.receiver, ] } @@ -724,7 +724,7 @@ data: action = "keep" } forward_to = [ - prometheus.remote_write.localprometheus.receiver, + prometheus.remote_write.prometheus.receiver, ] } @@ -749,18 +749,18 @@ metadata: namespace: default data: config.alloy: |- - // Destination: localLoki (loki) - otelcol.exporter.loki "localloki" { - forward_to = [loki.write.localloki.receiver] + // Destination: loki (loki) + otelcol.exporter.loki "loki" { + forward_to = [loki.write.loki.receiver] } - loki.write "localloki" { + loki.write "loki" { endpoint { url = "http://loki.loki.svc:3100/loki/api/v1/push" - tenant_id = nonsensitive(remote.kubernetes.secret.localloki.data["tenantId"]) + tenant_id = nonsensitive(remote.kubernetes.secret.loki.data["tenantId"]) basic_auth { - username = nonsensitive(remote.kubernetes.secret.localloki.data["username"]) - password = remote.kubernetes.secret.localloki.data["password"] + username = nonsensitive(remote.kubernetes.secret.loki.data["username"]) + password = remote.kubernetes.secret.loki.data["password"] } tls_config { insecure_skip_verify = false @@ -772,8 +772,8 @@ data: } } - remote.kubernetes.secret "localloki" { - name = "localloki-k8smon-k8s-monitoring" + remote.kubernetes.secret "loki" { + name = "loki-k8smon-k8s-monitoring" namespace = "default" } @@ -916,7 +916,7 @@ data: } pod_logs "feature" { logs_destinations = [ - loki.write.localloki.receiver, + loki.write.loki.receiver, ] } --- @@ -3669,7 +3669,7 @@ metadata: app.kubernetes.io/managed-by: Helm app.kubernetes.io/part-of: alloy spec: - replicas: 1 + replicas: 2 podManagementPolicy: Parallel minReadySeconds: 10 serviceName: k8smon-alloy-metrics diff --git a/charts/k8s-monitoring/tests/integration/control-plane-monitoring/deployments/query-test.yaml b/charts/k8s-monitoring/tests/integration/control-plane-monitoring/deployments/query-test.yaml index b035f7b7d..13088b92a 100644 --- a/charts/k8s-monitoring/tests/integration/control-plane-monitoring/deployments/query-test.yaml +++ b/charts/k8s-monitoring/tests/integration/control-plane-monitoring/deployments/query-test.yaml @@ -78,3 +78,10 @@ spec: # Pod logs - query: count_over_time({cluster="$CLUSTER", job!="integrations/kubernetes/eventhandler"}[1h]) type: logql + + # DPM check + - query: avg(count_over_time(scrape_samples_scraped{cluster="$CLUSTER"}[1m])) + type: promql + expect: + value: 1 + operator: == diff --git a/charts/k8s-monitoring/tests/integration/control-plane-monitoring/values.yaml b/charts/k8s-monitoring/tests/integration/control-plane-monitoring/values.yaml index 9992131ed..4dc8d9c44 100644 --- a/charts/k8s-monitoring/tests/integration/control-plane-monitoring/values.yaml +++ b/charts/k8s-monitoring/tests/integration/control-plane-monitoring/values.yaml @@ -3,10 +3,10 @@ cluster: name: control-plane-monitoring-test destinations: - - name: localPrometheus + - name: prometheus type: prometheus url: http://prometheus-server.prometheus.svc:9090/api/v1/write - - name: localLoki + - name: loki type: loki url: http://loki.loki.svc:3100/loki/api/v1/push tenantId: "1" @@ -20,6 +20,12 @@ clusterMetrics: controlPlane: enabled: true + kubelet: {metricsTuning: {includeMetrics: [scrape_samples_scraped]}} + kubeletResource: {metricsTuning: {includeMetrics: [scrape_samples_scraped]}} + cadvisor: {metricsTuning: {includeMetrics: [scrape_samples_scraped]}} + kube-state-metrics: {metricsTuning: {includeMetrics: [scrape_samples_scraped]}} + node-exporter: {metricsTuning: {includeMetrics: [scrape_samples_scraped]}} + clusterEvents: enabled: true @@ -30,6 +36,8 @@ selfReporting: {scrapeInterval: 1m} # Force self-report to be generated within alloy-metrics: enabled: true + controller: + replicas: 2 alloy-singleton: enabled: true