values.yaml

# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

craned:
  enabled: true
  replicaCount: 1
  nameOverride: ""
  fullnameOverride: "craned"

cost-exporter:
  enabled: true
  replicaCount: 1
  nameOverride: ""
  fullnameOverride: "fadvisor-cost-exporter"

  # image for prometheus deployment
  image:
    repository: "docker.io/gocrane/fadvisor-cost-exporter"
    pullPolicy: Always
    tag: "v0.1.0"

  service:
    type: ClusterIP
    port: 8081

  tolerations:
    - key: "node-role.kubernetes.io/master"
      operator: "Exists"
      effect: "NoSchedule"

  extraArgs:
    v: 4

#prometheus-node-exporter:
#  enabled: true
#  hostRootFsMount: false
#  nameOverride: ""
#  fullnameOverride: "prometheus-node-exporter"
#
#kube-state-metrics:
#  enabled: true
#  nameOverride: ""
#  fullnameOverride: "kube-state-metrics"

prometheus:
  enabled: true
  nameOverride: ""
  fullnameOverride: "prometheus"

  kube-state-metrics:
    enabled: false

  prometheus-node-exporter:
    enabled: false

  pushgateway:
    ## If false, pushgateway will not be installed
    ##
    enabled: false
  alertmanager:
    ## If false, alertmanager will not be installed
    ##
    enabled: false
  server:
    ## Prometheus server container name
    ##
    enabled: true

    ## Use a ClusterRole (and ClusterRoleBinding)
    ## - If set to false - we define a RoleBinding in the defined namespaces ONLY
    ##
    ## NB: because we need a Role with nonResourceURL's ("/metrics") - you must get someone with Cluster-admin privileges to define this role for you, before running with this setting enabled.
    ##     This makes prometheus work - for users who do not have ClusterAdmin privs, but wants prometheus to operate on their own namespaces, instead of clusterwide.
    ##
    ## You MUST also set namespaces to the ones you have access to and want monitored by Prometheus.
    ##
    # useExistingClusterRoleName: nameofclusterrole

    ## namespaces to monitor (instead of monitoring all - clusterwide). Needed if you want to run without Cluster-admin privileges.
    # namespaces:
    #   - yournamespace

    name: server

    # sidecarContainers - add more containers to prometheus server
    # Key/Value where Key is the sidecar `- name: <Key>`
    # Example:
    #   sidecarContainers:
    #      webserver:
    #        image: nginx
    sidecarContainers: [ ]

    # sidecarTemplateValues - context to be used in template for sidecarContainers
    # Example:
    #   sidecarTemplateValues: *your-custom-globals
    #   sidecarContainers:
    #     webserver: |-
    #       {{ include "webserver-container-template" . }}
    # Template for `webserver-container-template` might looks like this:
    #   image: "{{ .Values.server.sidecarTemplateValues.repository }}:{{ .Values.server.sidecarTemplateValues.tag }}"
    #   ...
    #
    sidecarTemplateValues: { }

    ## Prometheus server container image
    ##
    image:
      repository: quay.io/prometheus/prometheus
      tag: v2.31.1
      pullPolicy: IfNotPresent

    ## prometheus server priorityClassName
    ##
    priorityClassName: ""

    ## EnableServiceLinks indicates whether information about services should be injected
    ## into pod's environment variables, matching the syntax of Docker links.
    ## WARNING: the field is unsupported and will be skipped in K8s prior to v1.13.0.
    ##
    enableServiceLinks: true

    ## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
    ## so that the various internal URLs are still able to access as they are in the default case.
    ## (Optional)
    prefixURL: ""

    ## External URL which can access prometheus
    ## Maybe same with Ingress host name
    baseURL: ""

    ## Additional server container environment variables
    ##
    ## You specify this manually like you would a raw deployment manifest.
    ## This means you can bind in environment variables from secrets.
    ##
    ## e.g. static environment variable:
    ##  - name: DEMO_GREETING
    ##    value: "Hello from the environment"
    ##
    ## e.g. secret environment variable:
    ## - name: USERNAME
    ##   valueFrom:
    ##     secretKeyRef:
    ##       name: mysecret
    ##       key: username
    env: [ ]

    extraFlags:
      - web.enable-lifecycle
      ## web.enable-admin-api flag controls access to the administrative HTTP API which includes functionality such as
      ## deleting time series. This is disabled by default.
      # - web.enable-admin-api
      ##
      ## storage.tsdb.no-lockfile flag controls BD locking
      # - storage.tsdb.no-lockfile
      ##
      ## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL)
      # - storage.tsdb.wal-compression

    ## Path to a configuration file on prometheus server container FS
    configPath: /etc/config/prometheus.yml

    ### The data directory used by prometheus to set --storage.tsdb.path
    ### When empty server.persistentVolume.mountPath is used instead
    storagePath: ""

    global:
      ## How frequently to scrape targets by default
      ##
      scrape_interval: 1m
      ## How long until a scrape request times out
      ##
      scrape_timeout: 10s
      ## How frequently to evaluate rules
      ##
      evaluation_interval: 1m
    ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
    ##
    remoteWrite: [ ]
    ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read
    ##
    remoteRead: [ ]

    ## Custom HTTP headers for Liveness/Readiness/Startup Probe
    ##
    ## Useful for providing HTTP Basic Auth to healthchecks
    probeHeaders: [ ]

    ## Additional Prometheus server container arguments
    ##
    extraArgs: { }

    ## Additional InitContainers to initialize the pod
    ##
    extraInitContainers: [ ]

    ## Additional Prometheus server Volume mounts
    ##
    extraVolumeMounts: [ ]

    ## Additional Prometheus server Volumes
    ##
    extraVolumes: [ ]

    ## Additional Prometheus server hostPath mounts
    ##
    extraHostPathMounts: [ ]
      # - name: certs-dir
      #   mountPath: /etc/kubernetes/certs
    #   subPath: ""
    #   hostPath: /etc/kubernetes/certs
    #   readOnly: true

    extraConfigmapMounts: [ ]
      # - name: certs-configmap
      #   mountPath: /prometheus
    #   subPath: ""
    #   configMap: certs-configmap
    #   readOnly: true

    ## Additional Prometheus server Secret mounts
    # Defines additional mounts with secrets. Secrets must be manually created in the namespace.
    extraSecretMounts: [ ]
      # - name: secret-files
      #   mountPath: /etc/secrets
    #   subPath: ""
    #   secretName: prom-secret-files
    #   readOnly: true

    ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}}
    ## Defining configMapOverrideName will cause templates/server-configmap.yaml
    ## to NOT generate a ConfigMap resource
    ##
    configMapOverrideName: ""

    ingress:
      ## If true, Prometheus server Ingress will be created
      ##
      enabled: false

      # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName
      # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress
      # ingressClassName: nginx

      ## Prometheus server Ingress annotations
      ##
      annotations: { }
      #   kubernetes.io/ingress.class: nginx
      #   kubernetes.io/tls-acme: 'true'

      ## Prometheus server Ingress additional labels
      ##
      extraLabels: { }

      ## Prometheus server Ingress hostnames with optional path
      ## Must be provided if Ingress is enabled
      ##
      hosts: [ ]
      #   - prometheus.domain.com
      #   - domain.com/prometheus

      path: /

      # pathType is only for k8s >= 1.18
      pathType: Prefix

      ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
      extraPaths: [ ]
      # - path: /*
      #   backend:
      #     serviceName: ssl-redirect
      #     servicePort: use-annotation

      ## Prometheus server Ingress TLS configuration
      ## Secrets must be manually created in the namespace
      ##
      tls: [ ]
      #   - secretName: prometheus-server-tls
      #     hosts:
      #       - prometheus.domain.com

    ## Server Deployment Strategy type
    # strategy:
    #   type: Recreate

    ## hostAliases allows adding entries to /etc/hosts inside the containers
    hostAliases: [ ]
    #   - ip: "127.0.0.1"
    #     hostnames:
    #       - "example.com"

    ## Node tolerations for server scheduling to nodes with taints
    ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
    ##
    tolerations: [ ]
      # - key: "key"
    #   operator: "Equal|Exists"
    #   value: "value"
    #   effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"

    ## Node labels for Prometheus server pod assignment
    ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
    ##
    nodeSelector: { }

    ## Pod affinity
    ##
    affinity: { }

    ## PodDisruptionBudget settings
    ## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
    ##
    podDisruptionBudget:
      enabled: false
      maxUnavailable: 1

    ## Use an alternate scheduler, e.g. "stork".
    ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
    ##
    # schedulerName:

    persistentVolume:
      ## If true, Prometheus server will create/use a Persistent Volume Claim
      ## If false, use emptyDir
      ##
      enabled: true

      ## Prometheus server data Persistent Volume access modes
      ## Must match those of existing PV or dynamic provisioner
      ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
      ##
      accessModes:
        - ReadWriteOnce

      ## Prometheus server data Persistent Volume annotations
      ##
      annotations: { }

      ## Prometheus server data Persistent Volume existing claim name
      ## Requires server.persistentVolume.enabled: true
      ## If defined, PVC must be created manually before volume will be bound
      existingClaim: ""

      ## Prometheus server data Persistent Volume mount root path
      ##
      mountPath: /data

      ## Prometheus server data Persistent Volume size
      ##
      size: 10Gi

      ## Prometheus server data Persistent Volume Storage Class
      ## If defined, storageClassName: <storageClass>
      ## If set to "-", storageClassName: "", which disables dynamic provisioning
      ## If undefined (the default) or set to null, no storageClassName spec is
      ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
      ##   GKE, AWS & OpenStack)
      ##
      # storageClass: "-"

      ## Prometheus server data Persistent Volume Binding Mode
      ## If defined, volumeBindingMode: <volumeBindingMode>
      ## If undefined (the default) or set to null, no volumeBindingMode spec is
      ##   set, choosing the default mode.
      ##
      # volumeBindingMode: ""

      ## Subdirectory of Prometheus server data Persistent Volume to mount
      ## Useful if the volume's root directory is not empty
      ##
      subPath: ""

      ## Persistent Volume Claim Selector
      ## Useful if Persistent Volumes have been provisioned in advance
      ## Ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#selector
      ##
      # selector:
      #  matchLabels:
      #    release: "stable"
      #  matchExpressions:
      #    - { key: environment, operator: In, values: [ dev ] }

    emptyDir:
      ## Prometheus server emptyDir volume size limit
      ##
      sizeLimit: ""

    ## Annotations to be added to Prometheus server pods
    ##
    podAnnotations: { }
    # iam.amazonaws.com/role: prometheus

    ## Labels to be added to Prometheus server pods
    ##
    podLabels: { }

    ## Prometheus AlertManager configuration
    ##
    alertmanagers: [ ]

    ## Specify if a Pod Security Policy for node-exporter must be created
    ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/
    ##
    podSecurityPolicy:
      annotations: { }
        ## Specify pod annotations
        ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
        ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
        ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
        ##
      # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
      # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default'
      # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default'

    ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
    ##
    replicaCount: 1

    ## Annotations to be added to deployment
    ##
    deploymentAnnotations: { }

    statefulSet:
      ## If true, use a statefulset instead of a deployment for pod management.
      ## This allows to scale replicas to more than 1 pod
      ##
      enabled: false

      annotations: { }
      labels: { }
      podManagementPolicy: OrderedReady

      ## Alertmanager headless service to use for the statefulset
      ##
      headless:
        annotations: { }
        labels: { }
        servicePort: 80
        ## Enable gRPC port on service to allow auto discovery with thanos-querier
        gRPC:
          enabled: false
          servicePort: 10901
          # nodePort: 10901

    ## Prometheus server readiness and liveness probe initial delay and timeout
    ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
    ##
    tcpSocketProbeEnabled: false
    probeScheme: HTTP
    readinessProbeInitialDelay: 30
    readinessProbePeriodSeconds: 5
    readinessProbeTimeout: 4
    readinessProbeFailureThreshold: 3
    readinessProbeSuccessThreshold: 1
    livenessProbeInitialDelay: 30
    livenessProbePeriodSeconds: 15
    livenessProbeTimeout: 10
    livenessProbeFailureThreshold: 3
    livenessProbeSuccessThreshold: 1
    startupProbe:
      enabled: false
      periodSeconds: 5
      failureThreshold: 30
      timeoutSeconds: 10

    ## Prometheus server resource requests and limits
    ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
    ##
    resources: { }
      # limits:
      #   cpu: 500m
      #   memory: 512Mi
    # requests:
    #   cpu: 500m
    #   memory: 512Mi

    # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
    # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
    ##
    hostNetwork: false

    # When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet
    dnsPolicy: ClusterFirst

    ## Vertical Pod Autoscaler config
    ## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
    verticalAutoscaler:
      ## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs)
      enabled: false
      # updateMode: "Auto"
      # containerPolicies:
      # - containerName: 'prometheus-server'

    # Custom DNS configuration to be added to prometheus server pods
    dnsConfig: { }
      # nameservers:
      #   - 1.2.3.4
      # searches:
      #   - ns1.svc.cluster-domain.example
      #   - my.dns.search.suffix
    # options:
    #   - name: ndots
    #     value: "2"
    #   - name: edns0
    ## Security context to be added to server pods
    ##
    securityContext:
      runAsUser: 65534
      runAsNonRoot: true
      runAsGroup: 65534
      fsGroup: 65534

    service:
      annotations: { }
      labels: { }
      clusterIP: ""

      ## List of IP addresses at which the Prometheus server service is available
      ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
      ##
      externalIPs: [ ]

      loadBalancerIP: ""
      loadBalancerSourceRanges: [ ]
      servicePort: 80
      nodePort: 31111
      sessionAffinity: None
      type: NodePort

      ## Enable gRPC port on service to allow auto discovery with thanos-querier
      gRPC:
        enabled: false
        servicePort: 10901
        # nodePort: 10901

      ## If using a statefulSet (statefulSet.enabled=true), configure the
      ## service to connect to a specific replica to have a consistent view
      ## of the data.
      statefulsetReplica:
        enabled: false
        replica: 0

    ## Prometheus server pod termination grace period
    ##
    terminationGracePeriodSeconds: 300

    ## Prometheus data retention period (default if not specified is 15 days)
    ##
    retention: "15d"

  serverFiles:
    ## Alerts configuration
    ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
    alerting_rules.yml: {}
    # groups:
    #   - name: Instances
    #     rules:
    #       - alert: InstanceDown
    #         expr: up == 0
    #         for: 5m
    #         labels:
    #           severity: page
    #         annotations:
    #           description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
    #           summary: 'Instance {{ $labels.instance }} down'
    ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
    alerts: {}

    ## Records configuration
    ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
    recording_rules.yml:
      groups:
        - name: costs.rules
          interval: 3600s
          rules:
            - expr: |
                sum(label_replace(irate(container_cpu_usage_seconds_total{container!="POD", container!="",image!=""}[1h]), "node", "$1", "instance",  "(.*)")) by (container, pod, node, namespace) * on (node) group_left() avg(avg_over_time(node_cpu_hourly_cost[1h])) by (node)
              record: namespace:container_cpu_usage_costs_hourly:sum_rate
            - expr: |
                sum(label_replace(avg_over_time(container_memory_working_set_bytes{container!="POD",container!="",image!=""}[1h]), "node", "$1", "instance",  "(.*)")) by (container, pod, node, namespace) / 1024.0 / 1024.0 / 1024.0 * on (node) group_left() avg(avg_over_time(node_ram_hourly_cost[1h])) by (node)
              record: namespace:container_memory_usage_costs_hourly:sum_rate
            - expr: |
                avg(avg_over_time(node_cpu_hourly_cost[1h])) by (node)
              record: node:node_cpu_hourly_cost:avg
            - expr: |
                avg(avg_over_time(node_ram_hourly_cost[1h])) by (node)
              record: node:node_ram_hourly_cost:avg
            - expr: |
                avg(avg_over_time(node_total_hourly_cost[1h])) by (node)
              record: node:node_total_hourly_cost:avg

    ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
    rules: {}

  # adds additional scrape configs to prometheus.yml
  # must be a string so you have to add a | after extraScrapeConfigs:
  # example adds prometheus-blackbox-exporter scrape config
  extraScrapeConfigs: |-
    - job_name: 'node-exporter'
      kubernetes_sd_configs:
        - role: node
      relabel_configs:
        - source_labels: [ __address__ ]
          regex: '(.*):10250'
          replacement: '${1}:9100'
          target_label: __address__
          action: replace
        - action: labelmap
          regex: __meta_kubernetes_node_label_(.+)

    - job_name: 'kube-state-metrics'
      static_configs:
        - targets: [ 'kube-state-metrics.crane-system.svc.cluster.local:8080' ]

    # this is used to scrape cost exporter
    - job_name: "fadvisor-cost-exporter"
      honor_timestamps: true
      scheme: http
      metrics_path: /metrics
      static_configs:
        - targets: [ 'cost-exporter.crane-system.svc.cluster.local:8081' ]


grafana:
  enabled: true
  nameOverride: ""
  fullnameOverride: "grafana"
  service:
    enabled: true
    type: NodePort
    nodePort: 31112
    port: 80
    targetPort: 3000
    # targetPort: 4181 To be used with a proxy extraContainer
    annotations: { }
    labels: { }
    portName: service
  resources: { }
  #  limits:
  #    cpu: 100m
  #    memory: 128Mi
  #  requests:
  #    cpu: 100m
  #    memory: 128Mi

  ## Node labels for pod assignment
  ## ref: https://kubernetes.io/docs/user-guide/node-selection/
  #
  nodeSelector: {}

  ## Tolerations for pod assignment
  ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
  ##
  tolerations: []
  datasources:
    datasources.yaml:
      apiVersion: 1
      datasources:
        - name: Prometheus
          type: prometheus
          url: http://crane-prometheus-server.crane-system.svc.cluster.local
          #      url: http://kvass-thanos.kube-system:9090
          access: proxy
          isDefault: true
  dashboardProviders:
    dashboardproviders.yaml:
      apiVersion: 1
      providers:
        - name: 'default'
          orgId: 1
          folder: ''
          type: file
          disableDeletion: false
          editable: true
          options:
            path: /var/lib/grafana/dashboards/default

scheduler:
  enabled: true
  nameOverride: ""
  fullnameOverride: "crane-scheduler"