From 91a24a5c0283caf74b16d9c8805fd0dc0188e1e6 Mon Sep 17 00:00:00 2001
From: Mikkel Oscar Lyderik Larsen <mikkel.larsen@zalando.de>
Date: Fri, 24 Oct 2025 16:09:00 +0200
Subject: [PATCH] Example of defining scheduled scaling for Prometheus

Signed-off-by: Mikkel Oscar Lyderik Larsen <mikkel.larsen@zalando.de>
---
 cluster/config-defaults.yaml                  |   7 +
 .../02-scheduled-scaling-vpa/cronjob.yaml     |  37 ++
 .../02-scheduled-scaling-vpa/rbac.yaml        |  44 ++
 .../zalando.org_verticalpodautoscalers.yaml   | 436 ++++++++++++++++++
 cluster/manifests/deletions.yaml              |  14 +
 .../manifests/prometheus/prometheus-vpa.yaml  |  18 +
 6 files changed, 556 insertions(+)
 create mode 100644 cluster/manifests/02-scheduled-scaling-vpa/cronjob.yaml
 create mode 100644 cluster/manifests/02-scheduled-scaling-vpa/rbac.yaml
 create mode 100644 cluster/manifests/02-scheduled-scaling-vpa/zalando.org_verticalpodautoscalers.yaml
diff --git a/cluster/config-defaults.yaml b/cluster/config-defaults.yaml
index 5a73bfd4e3..f0a4407554 100644
--- a/cluster/config-defaults.yaml
+++ b/cluster/config-defaults.yaml
@@ -534,6 +534,13 @@ prometheus_remote_max_backoff: "10s"
 # Comma-separated list of user ids allowed to access Prometheus UI
 prometheus_ui_users: ""
 
+# Scheduled scaling events for Prometheus VPA (in the format
+# <name>:<pre-start-min>:<cpu>:<memory>, multiple events separated by commas)
+prometheus_scheduled_scaling_events: ""
+
+# Enable scheduled scaling for VPA
+scheduled_scaling_vpa_enabled: "false"
+
 metrics_service_cpu: "100m"
 metrics_service_mem_max: "4Gi"
 metrics_server_metric_resolution: "15s"
diff --git a/cluster/manifests/02-scheduled-scaling-vpa/cronjob.yaml b/cluster/manifests/02-scheduled-scaling-vpa/cronjob.yaml
new file mode 100644
index 0000000000..6057e17ef8
--- /dev/null
+++ b/cluster/manifests/02-scheduled-scaling-vpa/cronjob.yaml
@@ -0,0 +1,37 @@
+{{- if eq .Cluster.ConfigItems.scheduled_scaling_vpa_enabled "true" }}
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: scheduled-scaling-vpa
+  namespace: kube-system
+  labels:
+    application: kubernetes
+    component: scheduled-scaling-vpa
+spec:
+  schedule: "*/5 * * * *"
+  concurrencyPolicy: Forbid
+  startingDeadlineSeconds: 30
+  successfulJobsHistoryLimit: 1
+  failedJobsHistoryLimit: 1
+  jobTemplate:
+    metadata:
+      labels:
+        application: kubernetes
+        component: scheduled-scaling-vpa
+    spec:
+      activeDeadlineSeconds: 30
+      backoffLimit: 1
+      template:
+        metadata:
+          labels:
+            application: kubernetes
+            component: scheduled-scaling-vpa
+          annotations:
+            logging/destination: "{{.Cluster.ConfigItems.log_destination_infra}}"
+        spec:
+          serviceAccountName: scheduled-scaling-vpa
+          restartPolicy: Never
+          containers:
+          - name: main
+            image: container-registry-test.zalando.net/cloud-platform/scheduled-scaling-vpa:pr-1-4
+{{- end }}
diff --git a/cluster/manifests/02-scheduled-scaling-vpa/rbac.yaml b/cluster/manifests/02-scheduled-scaling-vpa/rbac.yaml
new file mode 100644
index 0000000000..0e24fca47e
--- /dev/null
+++ b/cluster/manifests/02-scheduled-scaling-vpa/rbac.yaml
@@ -0,0 +1,44 @@
+{{ if eq .Cluster.ConfigItems.scheduled_scaling_vpa_enabled "true" }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: scheduled-scaling-vpa
+  namespace: kube-system
+  labels:
+    application: kubernetes
+    component: scheduled-scaling-vpa
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: scheduled-scaling-vpa
+  labels:
+    application: kubernetes
+    component: scheduled-scaling-vpa
+rules:
+- apiGroups: ["autoscaling.k8s.io"]
+  resources: ["verticalpodautoscalers"]
+  verbs: ["get", "list", "watch", "create", "update", "patch"]
+- apiGroups: ["zalando.org"]
+  resources:
+  - "verticalpodautoscalers"
+  - "scalingschedules"
+  - "clusterscalingschedules"
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: scheduled-scaling-vpa
+  labels:
+    application: kubernetes
+    component: scheduled-scaling-vpa
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: scheduled-scaling-vpa
+subjects:
+- kind: ServiceAccount
+  name: scheduled-scaling-vpa
+  namespace: kube-system
+{{- end }}
diff --git a/cluster/manifests/02-scheduled-scaling-vpa/zalando.org_verticalpodautoscalers.yaml b/cluster/manifests/02-scheduled-scaling-vpa/zalando.org_verticalpodautoscalers.yaml
new file mode 100644
index 0000000000..647248cf3b
--- /dev/null
+++ b/cluster/manifests/02-scheduled-scaling-vpa/zalando.org_verticalpodautoscalers.yaml
@@ -0,0 +1,436 @@
+{{- if eq .Cluster.ConfigItems.scheduled_scaling_vpa_enabled "true" }}
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.17.2
+  name: verticalpodautoscalers.zalando.org
+spec:
+  group: zalando.org
+  names:
+    categories:
+    - all
+    kind: VerticalPodAutoscaler
+    listKind: VerticalPodAutoscalerList
+    plural: verticalpodautoscalers
+    shortNames:
+    - zvpa
+    singular: verticalpodautoscaler
+  scope: Namespaced
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .status.activeSchedules
+      name: Active Schedules
+      type: string
+    name: v1
+    schema:
+      openAPIV3Schema:
+        description: VerticalPodAutoscaler describes an application resource.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: VerticalPodAutoscalerSpec is the spec part of the VerticalPodAutoscaler.
+            properties:
+              recommenders:
+                description: |-
+                  Recommender responsible for generating recommendation for this object.
+                  List should be empty (then the default recommender will generate the
+                  recommendation) or contain exactly one recommender.
+                items:
+                  description: |-
+                    VerticalPodAutoscalerRecommenderSelector points to a specific Vertical Pod Autoscaler recommender.
+                    In the future it might pass parameters to the recommender.
+                  properties:
+                    name:
+                      description: Name of the recommender responsible for generating
+                        recommendation for this object.
+                      type: string
+                  required:
+                  - name
+                  type: object
+                type: array
+              resourcePolicy:
+                description: |-
+                  Controls how the autoscaler computes recommended resources.
+                  The resource policy may be used to set constraints on the recommendations
+                  for individual containers.
+                  If any individual containers need to be excluded from getting the VPA recommendations, then
+                  it must be disabled explicitly by setting mode to "Off" under containerPolicies.
+                  If not specified, the autoscaler computes recommended resources for all containers in the pod,
+                  without additional constraints.
+                properties:
+                  containerPolicies:
+                    description: Per-container resource policies.
+                    items:
+                      description: |-
+                        ContainerResourcePolicy controls how autoscaler computes the recommended
+                        resources for a specific container.
+                      properties:
+                        containerName:
+                          description: |-
+                            Name of the container or DefaultContainerResourcePolicy, in which
+                            case the policy is used by the containers that don't have their own
+                            policy specified.
+                          type: string
+                        controlledResources:
+                          description: |-
+                            Specifies the type of recommendations that will be computed
+                            (and possibly applied) by VPA.
+                            If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                          items:
+                            description: ResourceName is the name identifying various
+                              resources in a ResourceList.
+                            type: string
+                          type: array
+                        controlledValues:
+                          description: |-
+                            Specifies which resource values should be controlled.
+                            The default is "RequestsAndLimits".
+                          enum:
+                          - RequestsAndLimits
+                          - RequestsOnly
+                          type: string
+                        maxAllowed:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Specifies the maximum amount of resources that will be recommended
+                            for the container. The default is no maximum.
+                          type: object
+                        minAllowed:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Specifies the minimal amount of resources that will be recommended
+                            for the container. The default is no minimum.
+                          type: object
+                        mode:
+                          description: Whether autoscaler is enabled for the container.
+                            The default is "Auto".
+                          enum:
+                          - Auto
+                          - "Off"
+                          type: string
+                      type: object
+                    type: array
+                type: object
+              schedules:
+                items:
+                  properties:
+                    name:
+                      type: string
+                    namespace:
+                      type: string
+                    preStartMinutes:
+                      format: int64
+                      type: integer
+                    resourcePolicy:
+                      description: |-
+                        Controls how the autoscaler computes recommended resources.
+                        The resource policy may be used to set constraints on the recommendations
+                        for individual containers.
+                        If any individual containers need to be excluded from getting the VPA recommendations, then
+                        it must be disabled explicitly by setting mode to "Off" under containerPolicies.
+                        If not specified, the autoscaler computes recommended resources for all containers in the pod,
+                        without additional constraints.
+                      properties:
+                        containerPolicies:
+                          description: Per-container resource policies.
+                          items:
+                            description: |-
+                              ContainerResourcePolicy controls how autoscaler computes the recommended
+                              resources for a specific container.
+                            properties:
+                              containerName:
+                                description: |-
+                                  Name of the container or DefaultContainerResourcePolicy, in which
+                                  case the policy is used by the containers that don't have their own
+                                  policy specified.
+                                type: string
+                              controlledResources:
+                                description: |-
+                                  Specifies the type of recommendations that will be computed
+                                  (and possibly applied) by VPA.
+                                  If not specified, the default of [ResourceCPU, ResourceMemory] will be used.
+                                items:
+                                  description: ResourceName is the name identifying
+                                    various resources in a ResourceList.
+                                  type: string
+                                type: array
+                              controlledValues:
+                                description: |-
+                                  Specifies which resource values should be controlled.
+                                  The default is "RequestsAndLimits".
+                                enum:
+                                - RequestsAndLimits
+                                - RequestsOnly
+                                type: string
+                              maxAllowed:
+                                additionalProperties:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                  x-kubernetes-int-or-string: true
+                                description: |-
+                                  Specifies the maximum amount of resources that will be recommended
+                                  for the container. The default is no maximum.
+                                type: object
+                              minAllowed:
+                                additionalProperties:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                  x-kubernetes-int-or-string: true
+                                description: |-
+                                  Specifies the minimal amount of resources that will be recommended
+                                  for the container. The default is no minimum.
+                                type: object
+                              mode:
+                                description: Whether autoscaler is enabled for the
+                                  container. The default is "Auto".
+                                enum:
+                                - Auto
+                                - "Off"
+                                type: string
+                            type: object
+                          type: array
+                      type: object
+                  required:
+                  - name
+                  type: object
+                type: array
+              targetRef:
+                description: |-
+                  TargetRef points to the controller managing the set of pods for the
+                  autoscaler to control - e.g. Deployment, StatefulSet. VerticalPodAutoscaler
+                  can be targeted at controller implementing scale subresource (the pod set is
+                  retrieved from the controller's ScaleStatus) or some well known controllers
+                  (e.g. for DaemonSet the pod set is read from the controller's spec).
+                  If VerticalPodAutoscaler cannot use specified target it will report
+                  ConfigUnsupported condition.
+                  Note that VerticalPodAutoscaler does not require full implementation
+                  of scale subresource - it will not use it to modify the replica count.
+                  The only thing retrieved is a label selector matching pods grouped by
+                  the target resource.
+                properties:
+                  apiVersion:
+                    description: apiVersion is the API version of the referent
+                    type: string
+                  kind:
+                    description: 'kind is the kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+                    type: string
+                  name:
+                    description: 'name is the name of the referent; More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
+                    type: string
+                required:
+                - kind
+                - name
+                type: object
+                x-kubernetes-map-type: atomic
+              updatePolicy:
+                description: |-
+                  Describes the rules on how changes are applied to the pods.
+                  If not specified, all fields in the `PodUpdatePolicy` are set to their
+                  default values.
+                properties:
+                  evictionRequirements:
+                    description: |-
+                      EvictionRequirements is a list of EvictionRequirements that need to
+                      evaluate to true in order for a Pod to be evicted. If more than one
+                      EvictionRequirement is specified, all of them need to be fulfilled to allow eviction.
+                    items:
+                      description: |-
+                        EvictionRequirement defines a single condition which needs to be true in
+                        order to evict a Pod
+                      properties:
+                        changeRequirement:
+                          description: EvictionChangeRequirement refers to the relationship
+                            between the new target recommendation for a Pod and its
+                            current requests, what kind of change is necessary for
+                            the Pod to be evicted
+                          enum:
+                          - TargetHigherThanRequests
+                          - TargetLowerThanRequests
+                          type: string
+                        resources:
+                          description: |-
+                            Resources is a list of one or more resources that the condition applies
+                            to. If more than one resource is given, the EvictionRequirement is fulfilled
+                            if at least one resource meets `changeRequirement`.
+                          items:
+                            description: ResourceName is the name identifying various
+                              resources in a ResourceList.
+                            type: string
+                          type: array
+                      required:
+                      - changeRequirement
+                      - resources
+                      type: object
+                    type: array
+                  minReplicas:
+                    description: |-
+                      Minimal number of replicas which need to be alive for Updater to attempt
+                      pod eviction (pending other checks like PDB). Only positive values are
+                      allowed. Overrides global '--min-replicas' flag.
+                    format: int32
+                    type: integer
+                  updateMode:
+                    description: |-
+                      Controls when autoscaler applies changes to the pod resources.
+                      The default is 'Auto'.
+                    enum:
+                    - "Off"
+                    - Initial
+                    - Recreate
+                    - Auto
+                    type: string
+                type: object
+            required:
+            - targetRef
+            type: object
+          status:
+            description: VerticalPodAutoscalerStatus describes the runtime state of
+              the autoscaler.
+            properties:
+              conditions:
+                description: |-
+                  Conditions is the set of conditions required for this autoscaler to scale its target,
+                  and indicates whether or not those conditions are met.
+                items:
+                  description: |-
+                    VerticalPodAutoscalerCondition describes the state of
+                    a VerticalPodAutoscaler at a certain point.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from
+                        one status to another
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human-readable explanation containing details about
+                        the transition
+                      type: string
+                    reason:
+                      description: reason is the reason for the condition's last transition.
+                      type: string
+                    status:
+                      description: status is the status of the condition (True, False,
+                        Unknown)
+                      type: string
+                    type:
+                      description: type describes the current condition
+                      type: string
+                  required:
+                  - status
+                  - type
+                  type: object
+                type: array
+              recommendation:
+                description: |-
+                  The most recently computed amount of resources recommended by the
+                  autoscaler for the controlled pods.
+                properties:
+                  containerRecommendations:
+                    description: Resources recommended by the autoscaler for each
+                      container.
+                    items:
+                      description: |-
+                        RecommendedContainerResources is the recommendation of resources computed by
+                        autoscaler for a specific container. Respects the container resource policy
+                        if present in the spec. In particular the recommendation is not produced for
+                        containers with `ContainerScalingMode` set to 'Off'.
+                      properties:
+                        containerName:
+                          description: Name of the container.
+                          type: string
+                        lowerBound:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Minimum recommended amount of resources. Observes ContainerResourcePolicy.
+                            This amount is not guaranteed to be sufficient for the application to operate in a stable way, however
+                            running with less resources is likely to have significant impact on performance/availability.
+                          type: object
+                        target:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: Recommended amount of resources. Observes ContainerResourcePolicy.
+                          type: object
+                        uncappedTarget:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            The most recent recommended resources target computed by the autoscaler
+                            for the controlled pods, based only on actual resource usage, not taking
+                            into account the ContainerResourcePolicy.
+                            May differ from the Recommendation if the actual resource usage causes
+                            the target to violate the ContainerResourcePolicy (lower than MinAllowed
+                            or higher that MaxAllowed).
+                            Used only as status indication, will not affect actual resource assignment.
+                          type: object
+                        upperBound:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Maximum recommended amount of resources. Observes ContainerResourcePolicy.
+                            Any resources allocated beyond this value are likely wasted. This value may be larger than the maximum
+                            amount of application is actually capable of consuming.
+                          type: object
+                      required:
+                      - target
+                      type: object
+                    type: array
+                type: object
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+{{- end }}
diff --git a/cluster/manifests/deletions.yaml b/cluster/manifests/deletions.yaml
index 8c703ba723..3808ad9893 100644
--- a/cluster/manifests/deletions.yaml
+++ b/cluster/manifests/deletions.yaml
@@ -381,6 +381,20 @@ post_apply:
   kind: ServiceAccount
   namespace: kube-system
 {{- end}}
+{{- if ne .Cluster.ConfigItems.scheduled_scaling_vpa_enabled "true"}}
+- name: scheduled-scaling-vpa
+  kind: CronJob
+  namespace: kube-system
+- name: scheduled-scaling-vpa
+  kind: ClusterRole
+- name: scheduled-scaling-vpa
+  kind: ClusterRoleBinding
+- name: scheduled-scaling-vpa
+  kind: ServiceAccount
+  namespace: kube-system
+- name: verticalpodautoscalers.zalando.org
+  kind: CustomResourceDefinition
+{{- end}}
 {{- if eq .Cluster.Provider "zalando-eks"}}
 {{- if ne .Cluster.ConfigItems.aws_vpc_cni_custom_networking "true"}}
 - kind: ENIConfig
diff --git a/cluster/manifests/prometheus/prometheus-vpa.yaml b/cluster/manifests/prometheus/prometheus-vpa.yaml
index c6e25ff7d6..3953bd23c0 100644
--- a/cluster/manifests/prometheus/prometheus-vpa.yaml
+++ b/cluster/manifests/prometheus/prometheus-vpa.yaml
@@ -1,4 +1,8 @@
+{{- if and (eq .Cluster.ConfigItems.scheduled_scaling_vpa_enabled "true") (ne .Cluster.ConfigItems.prometheus_scheduled_scaling_events "") }}
+apiVersion: zalando.org/v1
+{{- else }}
 apiVersion: autoscaling.k8s.io/v1
+{{- end }}
 kind: VerticalPodAutoscaler
 metadata:
   name: prometheus-vpa
@@ -19,3 +23,17 @@ spec:
       minAllowed:
         memory: {{.Cluster.ConfigItems.prometheus_mem_min}}
         cpu: {{.Cluster.ConfigItems.prometheus_cpu_min}}
+{{- if and (eq .Cluster.ConfigItems.scheduled_scaling_vpa_enabled "true") (ne .Cluster.ConfigItems.prometheus_scheduled_scaling_events "") }}
+  schedules:
+  {{- range split .Cluster.ConfigItems.skipper_cluster_scaling_schedules "," }}
+  {{- $tuple := split . ":" }} # <name>:<pre-start-minutes>:<cpu>:<memory>
+  - name: "{{ index $tuple 0 }}"
+    preStartMinutes: {{ index $tuple 1 }}
+    resourcePolicy:
+      containerPolicies:
+      - containerName: prometheus
+        minAllowed:
+          cpu: {{ index $tuple 2 }}
+          memory: {{ index $tuple 3 }}
+  {{- end }}
+{{- end }}