diff --git a/Header.md b/Header.md index 072b5b9..3d3def1 100644 --- a/Header.md +++ b/Header.md @@ -7,6 +7,8 @@ The goal of this project is to export ephemeral storage metric usage per pod to Prometheus that is address in this issue [Here](https://github.com/kubernetes/kubernetes/issues/69507) +It accomplishes this by creating a Service Monitor + ![main image](img/screenshot.png) diff --git a/Makefile b/Makefile index b5bb54e..2683ed2 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ $(LOCALBIN): ginkgo: - test -s $(LOCALBIN)/ginkgo || GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@v2.9.7 + test -s $(LOCALBIN)/ginkgo || GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@latest .PHONY: fmt fmt: ## Run go fmt against code. @@ -58,10 +58,10 @@ release-helm: cd .. release: github_login release-docker release-helm helm-docs - # ex. make VERSION=1.1.1 release + # ex. make VERSION=1.2.0 release release-github: github_login - # ex. make VERSION=1.1.1 release-github + # ex. make VERSION=1.2.0 release-github gh release create ${VERSION} --generate-notes gh release upload ${VERSION} "chart/k8s-ephemeral-storage-metrics-${VERSION}.tgz" rm chart/k8s-ephemeral-storage-metrics-*.tgz diff --git a/README.md b/README.md index bea8000..5dbd23e 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ The goal of this project is to export ephemeral storage metric usage per pod to Prometheus that is address in this issue [Here](https://github.com/kubernetes/kubernetes/issues/69507) +It accomplishes this by creating a Service Monitor + ![main image](img/screenshot.png) @@ -28,9 +30,10 @@ helm upgrade --install my-deployment k8s-ephemeral-storage-metrics/k8s-ephemeral | extra.adjusted_polling_rate | bool | `false` | Create the ephemeral_storage_adjusted_polling_rate metrics to report Adjusted Poll Rate in milliseconds. Typically used for testing. | | image.imagePullPolicy | string | `"IfNotPresent"` | | | image.repository | string | `"ghcr.io/jmcgrath207/k8s-ephemeral-storage-metrics"` | | -| image.tag | string | `"1.1.1"` | | -| interval | int | `15` | Polling rate for exporter | +| image.tag | string | `"1.2.0"` | | +| interval | int | `15` | Polling node rate for exporter | | log_level | string | `"info"` | | +| max_node_concurrency | int | `10` | Max amount of concurrent query requests at a time. | | prometheus.release | string | `"kube-prometheus-stack"` | | ## Contribute diff --git a/chart/Chart.yaml b/chart/Chart.yaml index d776ed1..ce14327 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: k8s-ephemeral-storage-metrics -version: 1.1.1 -appVersion: 1.1.1 +version: 1.2.0 +appVersion: 1.2.0 kubeVersion: ">=1.21.0-0" description: Ephemeral storage metrics for prometheus operator. home: https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics diff --git a/chart/README.md b/chart/README.md index 1e41d39..72ae4aa 100644 --- a/chart/README.md +++ b/chart/README.md @@ -15,9 +15,10 @@ helm upgrade --install my-deployment k8s-ephemeral-storage-metrics/k8s-ephemeral | extra.adjusted_polling_rate | bool | `false` | Create the ephemeral_storage_adjusted_polling_rate metrics to report Adjusted Poll Rate in milliseconds. Typically used for testing. | | image.imagePullPolicy | string | `"IfNotPresent"` | | | image.repository | string | `"ghcr.io/jmcgrath207/k8s-ephemeral-storage-metrics"` | | -| image.tag | string | `"1.1.1"` | | -| interval | int | `15` | Polling rate for exporter | +| image.tag | string | `"1.2.0"` | | +| interval | int | `15` | Polling node rate for exporter | | log_level | string | `"info"` | | +| max_node_concurrency | int | `10` | Max amount of concurrent query requests at a time. | | prometheus.release | string | `"kube-prometheus-stack"` | | ## Contribute diff --git a/chart/index.yaml b/chart/index.yaml index 73b6f8f..287adc5 100644 --- a/chart/index.yaml +++ b/chart/index.yaml @@ -1,6 +1,28 @@ apiVersion: v1 entries: k8s-ephemeral-storage-metrics: + - annotations: + artifacthub.io/license: MIT + artifacthub.io/links: | + - name: Documentation + url: https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics + artifacthub.io/prerelease: "false" + apiVersion: v2 + appVersion: 1.2.0 + created: "2023-10-31T00:47:30.805699609-05:00" + description: Ephemeral storage metrics for prometheus operator. + digest: 949f15224740d31b963f4ae5b74ef0a9002877f667b9f30f7443cb9894d96d76 + home: https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics + keywords: + - kubernetes + - metrics + kubeVersion: '>=1.21.0-0' + name: k8s-ephemeral-storage-metrics + sources: + - https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics + urls: + - https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics/releases/download/1.2.0/k8s-ephemeral-storage-metrics-1.2.0.tgz + version: 1.2.0 - annotations: artifacthub.io/license: MIT artifacthub.io/links: | @@ -111,4 +133,4 @@ entries: urls: - https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics/releases/download/1.0.0/k8s-ephemeral-storage-metrics-1.0.0.tgz version: 1.0.0 -generated: "2023-10-26T18:13:39.692332223-05:00" +generated: "2023-10-31T00:47:30.804384991-05:00" diff --git a/chart/templates/DeployType.yaml b/chart/templates/DeployType.yaml index b9234df..bcdd0df 100644 --- a/chart/templates/DeployType.yaml +++ b/chart/templates/DeployType.yaml @@ -57,6 +57,8 @@ spec: value: "{{ .Values.deploy_type }}" - name: SCRAPE_INTERVAL value: "{{ .Values.interval }}" + - name: MAX_NODE_CONCURRENCY + value: "{{ .Values.max_node_concurrency }}" - name: LOG_LEVEL value: "{{ .Values.log_level }}" {{ if .Values.extra.adjusted_polling_rate }} diff --git a/chart/values.yaml b/chart/values.yaml index b55b269..52072fd 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -1,13 +1,15 @@ image: repository: ghcr.io/jmcgrath207/k8s-ephemeral-storage-metrics - tag: 1.1.1 + tag: 1.2.0 imagePullPolicy: IfNotPresent log_level: info # -- Set as Deployment for single controller to query all nodes or Daemonset deploy_type: Deployment # Note in testing, Kube API does not refresh faster than 10 seconds -# -- Polling rate for exporter +# -- Polling node rate for exporter interval: 15 # Seconds +# -- Max amount of concurrent query requests at a time. +max_node_concurrency: 10 prometheus: release: kube-prometheus-stack diff --git a/go.mod b/go.mod index 98f764a..608fd5c 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,16 @@ module k8s-ephemeral-storage-metrics go 1.21 require ( + github.com/cenkalti/backoff/v4 v4.2.1 + github.com/deckarep/golang-set/v2 v2.3.1 github.com/onsi/ginkgo/v2 v2.13.0 - github.com/onsi/gomega v1.28.0 + github.com/onsi/gomega v1.29.0 + github.com/panjf2000/ants/v2 v2.8.2 github.com/prometheus/client_golang v1.17.0 github.com/rs/zerolog v1.31.0 - k8s.io/apimachinery v0.28.2 - k8s.io/client-go v0.28.2 + k8s.io/apimachinery v0.28.3 + k8s.io/client-go v0.28.3 + ) require ( @@ -24,7 +28,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/go-cmp v0.5.9 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20230926050212-f7f687d19a98 // indirect github.com/google/uuid v1.3.1 // indirect @@ -43,11 +47,11 @@ require ( github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect github.com/spf13/pflag v1.0.5 // indirect - golang.org/x/net v0.14.0 // indirect + golang.org/x/net v0.17.0 // indirect golang.org/x/oauth2 v0.10.0 // indirect - golang.org/x/sys v0.12.0 // indirect - golang.org/x/term v0.11.0 // indirect - golang.org/x/text v0.12.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/term v0.13.0 // indirect + golang.org/x/text v0.13.0 // indirect golang.org/x/time v0.3.0 // indirect golang.org/x/tools v0.12.0 // indirect google.golang.org/appengine v1.6.8 // indirect @@ -55,7 +59,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.28.2 // indirect + k8s.io/api v0.28.3 // indirect k8s.io/klog/v2 v2.100.1 // indirect k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect diff --git a/go.sum b/go.sum index 9b43899..07c23c4 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= @@ -8,6 +10,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deckarep/golang-set/v2 v2.3.1 h1:vjmkvJt/IV27WXPyYQpAh4bRyWJc5Y435D17XQ9QU5A= +github.com/deckarep/golang-set/v2 v2.3.1/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/emicklei/go-restful/v3 v3.10.2 h1:hIovbnmBTLjHXkqEBUz3HGpXZdM7ZrE9fJIZIqlJLqE= github.com/emicklei/go-restful/v3 v3.10.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -34,8 +38,9 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -77,8 +82,10 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= -github.com/onsi/gomega v1.28.0 h1:i2rg/p9n/UqIDAMFUJ6qIUUMcsqOuUHgbpbu235Vr1c= -github.com/onsi/gomega v1.28.0/go.mod h1:A1H2JE76sI14WIP57LMKj7FVfCHx3g3BcZVjJG8bjX8= +github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= +github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/panjf2000/ants/v2 v2.8.2 h1:D1wfANttg8uXhC9149gRt1PDQ+dLVFjNXkCEycMcvQQ= +github.com/panjf2000/ants/v2 v2.8.2/go.mod h1:7ZxyxsqE4vvW0M7LSD8aI3cKwgFhBHbxnlN8mDqHa1I= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= @@ -106,6 +113,7 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -126,8 +134,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= -golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8= golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -135,6 +143,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -144,18 +154,19 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0= -golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU= +golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= -golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= -golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -186,12 +197,12 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.28.2 h1:9mpl5mOb6vXZvqbQmankOfPIGiudghwCoLl1EYfUZbw= -k8s.io/api v0.28.2/go.mod h1:RVnJBsjU8tcMq7C3iaRSGMeaKt2TWEUXcpIt/90fjEg= -k8s.io/apimachinery v0.28.2 h1:KCOJLrc6gu+wV1BYgwik4AF4vXOlVJPdiqn0yAWWwXQ= -k8s.io/apimachinery v0.28.2/go.mod h1:RdzF87y/ngqk9H4z3EL2Rppv5jj95vGS/HaFXrLDApU= -k8s.io/client-go v0.28.2 h1:DNoYI1vGq0slMBN/SWKMZMw0Rq+0EQW6/AK4v9+3VeY= -k8s.io/client-go v0.28.2/go.mod h1:sMkApowspLuc7omj1FOSUxSoqjr+d5Q0Yc0LOFnYFJY= +k8s.io/api v0.28.3 h1:Gj1HtbSdB4P08C8rs9AR94MfSGpRhJgsS+GF9V26xMM= +k8s.io/api v0.28.3/go.mod h1:MRCV/jr1dW87/qJnZ57U5Pak65LGmQVkKTzf3AtKFHc= +k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A= +k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8= +k8s.io/client-go v0.28.3 h1:2OqNb72ZuTZPKCl+4gTKvqao0AMOl9f3o2ijbAj3LI4= +k8s.io/client-go v0.28.3/go.mod h1:LTykbBp9gsA7SwqirlCXBWtK0guzfhpoW4qSm7i9dxo= k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= diff --git a/main.go b/main.go index 7f3ce1d..823a287 100644 --- a/main.go +++ b/main.go @@ -5,6 +5,9 @@ import ( "encoding/json" "flag" "fmt" + "github.com/cenkalti/backoff/v4" + mapset "github.com/deckarep/golang-set/v2" + "github.com/panjf2000/ants/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/zerolog" @@ -24,14 +27,17 @@ import ( ) var ( - inCluster string - clientset *kubernetes.Clientset - sampleInterval int64 - adjustedPollingRate bool - adjustedTimeGauge prometheus.Gauge - deployType string - nodeSlice []string - nodeWaitGroup sync.WaitGroup + inCluster string + clientset *kubernetes.Clientset + sampleInterval int64 + sampleIntervalMill int64 + adjustedPollingRate bool + adjustedTimeGaugeVec *prometheus.GaugeVec + deployType string + nodeWaitGroup sync.WaitGroup + podGaugeVec *prometheus.GaugeVec + nodeSlice []string + maxNodeConcurrency int ) func getEnv(key, fallback string) string { @@ -102,34 +108,132 @@ type ephemeralStorageMetrics struct { } func getNodes() { + oldNodeSet := mapset.NewSet[string]() + nodeSet := mapset.NewSet[string]() nodeWaitGroup.Add(1) if deployType != "Deployment" { - nodeSlice = append(nodeSlice, getEnv("CURRENT_NODE_NAME", "")) + nodeSet.Add(getEnv("CURRENT_NODE_NAME", "")) nodeWaitGroup.Done() return } + + // Init Node slice + startNodes, _ := clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + for _, node := range startNodes.Items { + nodeSet.Add(node.Name) + } + nodeSlice = nodeSet.ToSlice() + nodeWaitGroup.Done() + + // Poll for new nodes and remove dead ones for { - nodeSlice = nil + oldNodeSet = nodeSet.Clone() + nodeSet.Clear() nodes, _ := clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) for _, node := range nodes.Items { - nodeSlice = append(nodeSlice, node.Name) + nodeSet.Add(node.Name) } - nodeWaitGroup.Done() + deadNodesSet := nodeSet.Difference(oldNodeSet) + + // Evict Metrics where the node doesn't exist anymore. + for _, deadNode := range deadNodesSet.ToSlice() { + podGaugeVec.DeletePartialMatch(prometheus.Labels{"node_name": deadNode}) + log.Info().Msgf("Node %s does not exist. Removing from monitoring", deadNode) + } + + nodeSlice = nodeSet.ToSlice() time.Sleep(1 * time.Minute) - nodeWaitGroup.Add(1) } } +func queryNode(node string) ([]byte, error) { + var content []byte + + bo := backoff.NewExponentialBackOff() + bo.MaxInterval = 1 * time.Second + bo.MaxElapsedTime = time.Duration(sampleInterval) * time.Second + + operation := func() error { + var err error + content, err = clientset.RESTClient().Get().AbsPath(fmt.Sprintf("/api/v1/nodes/%s/proxy/stats/summary", node)).DoRaw(context.Background()) + if err != nil { + return err + } + return nil + } + + err := backoff.Retry(operation, bo) + + if err != nil { + log.Warn().Msg(fmt.Sprintf("Failed fetched proxy stats from node : %s", node)) + return nil, err + } + + return content, nil + +} + type CollectMetric struct { usedBytes float64 labels prometheus.Labels } -func getMetrics() { - nodeWaitGroup.Wait() +func setMetrics(node string) { + var labelsList []CollectMetric - opsQueued := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + var data ephemeralStorageMetrics + + start := time.Now() + + content, err := queryNode(node) + if err != nil { + // Could not query node, skip + return + } + + log.Debug().Msg(fmt.Sprintf("Fetched proxy stats from node : %s", node)) + _ = json.Unmarshal(content, &data) + + nodeName := data.Node.NodeName + + for _, pod := range data.Pods { + podName := pod.PodRef.Name + podNamespace := pod.PodRef.Namespace + usedBytes := pod.EphemeralStorage.UsedBytes + if podNamespace == "" || (usedBytes == 0 && pod.EphemeralStorage.AvailableBytes == 0 && pod.EphemeralStorage.CapacityBytes == 0) { + log.Warn().Msg(fmt.Sprintf("pod %s/%s on %s has no metrics on its ephemeral storage usage", podName, podNamespace, nodeName)) + log.Warn().Msg(fmt.Sprintf("raw content %v", content)) + } + labelsList = append(labelsList, CollectMetric{ + usedBytes, + prometheus.Labels{"pod_namespace": podNamespace, + "pod_name": podName, "node_name": nodeName}, + }) + + log.Debug().Msg(fmt.Sprintf("pod %s/%s on %s with usedBytes: %f", podNamespace, podName, nodeName, usedBytes)) + } + + // Reset Metrics for this Node name to remove dead pods + podGaugeVec.DeletePartialMatch(prometheus.Labels{"node_name": nodeName}) + + // Push new metrics to exporter + for _, x := range labelsList { + podGaugeVec.With(x.labels).Set(x.usedBytes) + } + + adjustTime := sampleIntervalMill - time.Now().Sub(start).Milliseconds() + if adjustTime <= 0.0 { + log.Error().Msgf("Node %s: Polling Rate could not keep up. Adjust your Interval to a higher number than %d seconds", nodeName, sampleInterval) + } + if adjustedPollingRate { + adjustedTimeGaugeVec.With(prometheus.Labels{"node_name": nodeName}).Set(float64(adjustTime)) + } + +} + +func createMetrics() { + podGaugeVec = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "ephemeral_storage_pod_usage", Help: "Used to expose Ephemeral Storage metrics for pod in bytes ", }, @@ -143,75 +247,40 @@ func getMetrics() { }, ) - prometheus.MustRegister(opsQueued) - - log.Debug().Msg(fmt.Sprintf("getMetrics has been invoked")) + prometheus.MustRegister(podGaugeVec) if adjustedPollingRate { - adjustedTimeGauge = prometheus.NewGauge(prometheus.GaugeOpts{ + adjustedTimeGaugeVec = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "ephemeral_storage_adjusted_polling_rate", Help: "AdjustTime polling rate time after a Node API queries in Milliseconds", - }) + }, + []string{ + // Name of Node where pod is placed. + "node_name", + }) - prometheus.MustRegister(adjustedTimeGauge) + prometheus.MustRegister(adjustedTimeGaugeVec) } - sampleInterval, _ = strconv.ParseInt(getEnv("SCRAPE_INTERVAL", "15"), 10, 64) - sampleInterval = sampleInterval * 1000 - adjustTime := sampleInterval + +} + +func getMetrics() { + + nodeWaitGroup.Wait() + + p, _ := ants.NewPoolWithFunc(maxNodeConcurrency, func(node interface{}) { + setMetrics(node.(string)) + }, ants.WithExpiryDuration(time.Duration(sampleInterval)*time.Second)) + + defer p.Release() + for { - start := time.Now() for _, node := range nodeSlice { - - content, err := clientset.RESTClient().Get().AbsPath(fmt.Sprintf("/api/v1/nodes/%s/proxy/stats/summary", node)).DoRaw(context.Background()) - if err != nil { - log.Error().Msg(fmt.Sprintf("ErrorBadRequst : %s\n", err.Error())) - os.Exit(1) - } - log.Debug().Msg(fmt.Sprintf("Fetched proxy stats from node : %s", node)) - var data ephemeralStorageMetrics - _ = json.Unmarshal(content, &data) - - nodeName := data.Node.NodeName - for _, pod := range data.Pods { - podName := pod.PodRef.Name - podNamespace := pod.PodRef.Namespace - usedBytes := pod.EphemeralStorage.UsedBytes - if podNamespace == "" || (usedBytes == 0 && pod.EphemeralStorage.AvailableBytes == 0 && pod.EphemeralStorage.CapacityBytes == 0) { - log.Warn().Msg(fmt.Sprintf("pod %s/%s on %s has no metrics on its ephemeral storage usage", podName, podNamespace, nodeName)) - log.Warn().Msg(fmt.Sprintf("raw content %v", content)) - } - labelsList = append(labelsList, CollectMetric{ - usedBytes, - prometheus.Labels{"pod_namespace": podNamespace, - "pod_name": podName, "node_name": nodeName}, - }) - - log.Debug().Msg(fmt.Sprintf("pod %s/%s on %s with usedBytes: %f", podNamespace, podName, nodeName, usedBytes)) - } + _ = p.Invoke(node) } - // reset this metrics in the Exporter to flush dead pods - opsQueued.Reset() - // Push new metrics to exporter - for _, x := range labelsList { - opsQueued.With(x.labels).Set(x.usedBytes) - } - // Zero out collection list - labelsList = nil - - elapsedTime := time.Now().Sub(start).Milliseconds() - adjustTime = sampleInterval - elapsedTime - if adjustTime <= 0.0 { - log.Error().Msgf("Adjusted Poll Rate: %d ms", adjustTime) - log.Error().Msgf("Polling Rate could not keep up. Adjust your Interval to a higher number than %d", sampleInterval) - os.Exit(1) - } - if adjustedPollingRate { - adjustedTimeGauge.Set(float64(adjustTime)) - } - log.Debug().Msgf("Adjusted Poll Rate: %d ms", adjustTime) - time.Sleep(time.Duration(adjustTime) * time.Millisecond) + time.Sleep(time.Duration(sampleInterval) * time.Second) } } @@ -238,13 +307,18 @@ func setLogger() { func main() { flag.Parse() + port := getEnv("METRICS_PORT", "9100") + adjustedPollingRate, _ = strconv.ParseBool(getEnv("ADJUSTED_POLLING_RATE", "false")) + deployType = getEnv("DEPLOY_TYPE", "DaemonSet") + sampleInterval, _ = strconv.ParseInt(getEnv("SCRAPE_INTERVAL", "15"), 10, 64) + maxNodeConcurrency, _ = strconv.Atoi(getEnv("MAX_NODE_CONCURRENCY", "10")) + sampleIntervalMill = sampleInterval * 1000 + setLogger() getK8sClient() + createMetrics() go getNodes() go getMetrics() - port := getEnv("METRICS_PORT", "9100") - adjustedPollingRate, _ = strconv.ParseBool(getEnv("ADJUSTED_POLLING_RATE", "false")) - deployType = getEnv("DEPLOY_TYPE", "DaemonSet") if deployType != "Deployment" && deployType != "DaemonSet" { log.Error().Msg(fmt.Sprintf("deployType must be 'Deployment' or 'DaemonSet', got %s", deployType)) os.Exit(1) diff --git a/tests/e2e/deployment_test.go b/tests/e2e/deployment_test.go index 899b3ba..dca237c 100644 --- a/tests/e2e/deployment_test.go +++ b/tests/e2e/deployment_test.go @@ -110,7 +110,7 @@ func checkPrometheus(checkSlice []string) { func WatchPollingRate(pollRateUpper float64, pollingRateLower float64, timeout time.Duration) { status := 0 startTime := time.Now() - re := regexp.MustCompile(`ephemeral_storage_adjusted_polling_rate\s+(.+)`) + re := regexp.MustCompile(`ephemeral_storage_adjusted_polling_rate\{node_name="ephemeral-metrics-cluster-control-plane"}\s+(.+)`) for { elapsed := time.Since(startTime) if elapsed >= timeout { @@ -119,7 +119,7 @@ func WatchPollingRate(pollRateUpper float64, pollingRateLower float64, timeout t } output := requestPrometheusString() match := re.FindAllStringSubmatch(output, -1) - floatValue, _ := strconv.ParseFloat(match[2][1], 64) + floatValue, _ := strconv.ParseFloat(match[0][1], 64) if pollRateUpper >= floatValue && pollingRateLower <= floatValue { status = 1 break