From 5150565254eaab0e9d9c0f20a65bd44e25a04ffc Mon Sep 17 00:00:00 2001 From: Pete Wall Date: Wed, 30 Oct 2024 09:11:03 -0500 Subject: [PATCH 1/6] Add image scanner Signed-off-by: Pete Wall --- .github/workflows/security-scans.yml | 60 + .../examples/features/all-features/README.md | 63 + .../features/all-features/alloy-logs.alloy | 173 + .../features/all-features/alloy-metrics.alloy | 839 ++++ .../all-features/alloy-profiles.alloy | 846 ++++ .../all-features/alloy-receiver.alloy | 168 + .../all-features/alloy-singleton.alloy | 109 + .../features/all-features/output.yaml | 4418 +++++++++++++++++ .../features/all-features/values.yaml | 54 + 9 files changed, 6730 insertions(+) create mode 100644 .github/workflows/security-scans.yml create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/README.md create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/alloy-logs.alloy create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/alloy-metrics.alloy create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/alloy-profiles.alloy create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/alloy-receiver.alloy create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/alloy-singleton.alloy create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/output.yaml create mode 100644 charts/k8s-monitoring/docs/examples/features/all-features/values.yaml diff --git a/.github/workflows/security-scans.yml b/.github/workflows/security-scans.yml new file mode 100644 index 000000000..24f9daa2d --- /dev/null +++ b/.github/workflows/security-scans.yml @@ -0,0 +1,60 @@ +--- +name: Security Scans + +on: + push: + branches: ["main"] + paths: + - 'charts/k8s-monitoring/docs/examples/features/all-features/output.yaml' + pull_request: + paths: + - 'charts/k8s-monitoring/docs/examples/features/all-features/output.yaml' + workflow_dispatch: + +jobs: + list-container-images: + name: List Container Images + runs-on: ubuntu-latest + outputs: + images: ${{ steps.list_images.outputs.images }} + steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9' + check-latest: true + + - name: Install yq + run: pip install yq + + - name: List Container Images + id: list_images + run: | + file=charts/k8s-monitoring/docs/examples/features/all-features/output.yaml + yq -r -o json '. | select(.kind=="DaemonSet") | .spec.template.spec.containers[].image' "${file}" > images.txt + yq -r -o json '. | select(.kind=="Deployment") | .spec.template.spec.containers[].image' "${file}" > images.txt + yq -r -o json '. | select(.kind=="Job") | .spec.template.spec.containers[].image' "${file}" > images.txt + yq -r -o json '. | select(.kind=="Pod") | .spec.containers[].image' "${file}" > images.txt + yq -r -o json '. | select(.kind=="StatefulSet") | .spec.template.spec.containers[].image' "${file}" > images.txt + echo "images=$(cat images.txt | sort --unique | jq --raw-input --slurp --compact-output 'split("\n") | map(select(. != ""))')" >> "${GITHUB_OUTPUT}" + + scan-container-images: + name: Scan Container Images + needs: list-container-images + runs-on: ubuntu-latest + strategy: + matrix: + image: ${{ fromJson(needs.list-container-images.outputs.images) }} + fail-fast: false + steps: + - name: Run Trivy + uses: aquasecurity/trivy-action + with: + image-ref: ${{ matrix.image }} + format: sarif + output: trivy-results.sarif + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-results.sarif diff --git a/charts/k8s-monitoring/docs/examples/features/all-features/README.md b/charts/k8s-monitoring/docs/examples/features/all-features/README.md new file mode 100644 index 000000000..0f956f84b --- /dev/null +++ b/charts/k8s-monitoring/docs/examples/features/all-features/README.md @@ -0,0 +1,63 @@ + +# Example: features/all-features/values.yaml + +## Values + +```yaml +--- +cluster: + name: all-features-cluster + +destinations: + - name: otlpGateway + type: otlp + url: https://otlp.example.com:4317/v1/traces + auth: + type: basic + username: "my-username" + password: "my-password" + metrics: { enabled: true } + logs: { enabled: true } + traces: { enabled: true } + - name: pyroscope + type: pyroscope + url: http://pyroscope.example.com + +# Features +clusterMetrics: + enabled: true + kepler: { enabled: true } +clusterEvents: { enabled: true } + +podLogs: { enabled: true } +applicationObservability: + enabled: true + receivers: + grpc: + enabled: true +annotationAutodiscovery: { enabled: true } +prometheusOperatorObjects: { enabled: true } +profiling: { enabled: true } +integrations: + alloy: + instances: + - name: alloy + +# Collectors +alloy-metrics: { enabled: true } + +alloy-logs: { enabled: true } + +alloy-singleton: { enabled: true } +alloy-receiver: + enabled: true + alloy: + extraPorts: + - name: otlp-grpc + port: 4317 + targetPort: 4317 + protocol: TCP +alloy-profiles: { enabled: true }``` diff --git a/charts/k8s-monitoring/docs/examples/features/all-features/alloy-logs.alloy b/charts/k8s-monitoring/docs/examples/features/all-features/alloy-logs.alloy new file mode 100644 index 000000000..0a02fb28c --- /dev/null +++ b/charts/k8s-monitoring/docs/examples/features/all-features/alloy-logs.alloy @@ -0,0 +1,173 @@ +// Destination: otlpGateway (otlp) +otelcol.receiver.prometheus "otlpgateway" { + output { + metrics = [otelcol.processor.transform.otlpgateway.input] + } +} +otelcol.receiver.loki "otlpgateway" { + output { + logs = [otelcol.processor.transform.otlpgateway.input] + } +} +otelcol.auth.basic "otlpgateway" { + username = nonsensitive(remote.kubernetes.secret.otlpgateway.data["username"]) + password = remote.kubernetes.secret.otlpgateway.data["password"] +} + +otelcol.processor.transform "otlpgateway" { + error_mode = "ignore" + metric_statements { + context = "resource" + statements = ["set(attributes[\"k8s.cluster.name\"], \"all-features-cluster\") where attributes[\"k8s.cluster.name\"] == nil"] + } + log_statements { + context = "resource" + statements = ["set(attributes[\"k8s.cluster.name\"], \"all-features-cluster\") where attributes[\"k8s.cluster.name\"] == nil"] + } + trace_statements { + context = "resource" + statements = ["set(attributes[\"k8s.cluster.name\"], \"all-features-cluster\") where attributes[\"k8s.cluster.name\"] == nil"] + } + + output { + metrics = [otelcol.exporter.otlp.otlpgateway.input] + logs = [otelcol.exporter.otlp.otlpgateway.input] + traces = [otelcol.exporter.otlp.otlpgateway.input] + } +} +otelcol.exporter.otlp "otlpgateway" { + client { + endpoint = "https://otlp.example.com:4317/v1/traces" + headers = { + "X-Scope-OrgID" = nonsensitive(remote.kubernetes.secret.otlpgateway.data["tenantId"]), + } + tls { + insecure = false + insecure_skip_verify = false + ca_pem = nonsensitive(remote.kubernetes.secret.otlpgateway.data["ca"]) + cert_pem = nonsensitive(remote.kubernetes.secret.otlpgateway.data["cert"]) + key_pem = remote.kubernetes.secret.otlpgateway.data["key"] + } + } +} + +remote.kubernetes.secret "otlpgateway" { + name = "otlpgateway-ko-k8s-monitoring" + namespace = "default" +} + +// Feature: Pod Logs +declare "pod_logs" { + argument "logs_destinations" { + comment = "Must be a list of log destinations where collected logs should be forwarded to" + } + + discovery.relabel "filtered_pods" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_namespace"] + action = "replace" + target_label = "namespace" + } + rule { + source_labels = ["__meta_kubernetes_pod_name"] + action = "replace" + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + action = "replace" + target_label = "container" + } + rule { + source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "$1" + target_label = "job" + } + + // set the container runtime as a label + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_container_id"] + regex = "^(\\S+):\\/\\/.+$" + replacement = "$1" + target_label = "tmp_container_runtime" + } + } + + discovery.kubernetes "pods" { + role = "pod" + selectors { + role = "pod" + field = "spec.nodeName=" + env("HOSTNAME") + } + } + + discovery.relabel "filtered_pods_with_paths" { + targets = discovery.relabel.filtered_pods.output + + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + action = "replace" + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + } + + local.file_match "pod_logs" { + path_targets = discovery.relabel.filtered_pods_with_paths.output + } + + loki.source.file "pod_logs" { + targets = local.file_match.pod_logs.targets + forward_to = [loki.process.pod_logs.receiver] + } + + loki.process "pod_logs" { + stage.match { + selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + stage.match { + selector = "{tmp_container_runtime=\"docker\"}" + // the docker processing stage extracts the following k/v pairs: log, stream, time + stage.docker {} + + // Set the extract stream value as a label + stage.labels { + values = { + stream = "", + } + } + } + + // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster, + // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary + // container runtime label as it is no longer needed. + stage.label_drop { + values = [ + "filename", + "tmp_container_runtime", + ] + } + forward_to = argument.logs_destinations.value + } +} +pod_logs "feature" { + logs_destinations = [ + otelcol.receiver.loki.otlpgateway.receiver, + ] +} diff --git a/charts/k8s-monitoring/docs/examples/features/all-features/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/all-features/alloy-metrics.alloy new file mode 100644 index 000000000..fb5558e9d --- /dev/null +++ b/charts/k8s-monitoring/docs/examples/features/all-features/alloy-metrics.alloy @@ -0,0 +1,839 @@ +// Destination: otlpGateway (otlp) +otelcol.receiver.prometheus "otlpgateway" { + output { + metrics = [otelcol.processor.transform.otlpgateway.input] + } +} +otelcol.receiver.loki "otlpgateway" { + output { + logs = [otelcol.processor.transform.otlpgateway.input] + } +} +otelcol.auth.basic "otlpgateway" { + username = nonsensitive(remote.kubernetes.secret.otlpgateway.data["username"]) + password = remote.kubernetes.secret.otlpgateway.data["password"] +} + +otelcol.processor.transform "otlpgateway" { + error_mode = "ignore" + metric_statements { + context = "resource" + statements = ["set(attributes[\"k8s.cluster.name\"], \"all-features-cluster\") where attributes[\"k8s.cluster.name\"] == nil"] + } + log_statements { + context = "resource" + statements = ["set(attributes[\"k8s.cluster.name\"], \"all-features-cluster\") where attributes[\"k8s.cluster.name\"] == nil"] + } + trace_statements { + context = "resource" + statements = ["set(attributes[\"k8s.cluster.name\"], \"all-features-cluster\") where attributes[\"k8s.cluster.name\"] == nil"] + } + + output { + metrics = [otelcol.exporter.otlp.otlpgateway.input] + logs = [otelcol.exporter.otlp.otlpgateway.input] + traces = [otelcol.exporter.otlp.otlpgateway.input] + } +} +otelcol.exporter.otlp "otlpgateway" { + client { + endpoint = "https://otlp.example.com:4317/v1/traces" + headers = { + "X-Scope-OrgID" = nonsensitive(remote.kubernetes.secret.otlpgateway.data["tenantId"]), + } + tls { + insecure = false + insecure_skip_verify = false + ca_pem = nonsensitive(remote.kubernetes.secret.otlpgateway.data["ca"]) + cert_pem = nonsensitive(remote.kubernetes.secret.otlpgateway.data["cert"]) + key_pem = remote.kubernetes.secret.otlpgateway.data["key"] + } + } +} + +remote.kubernetes.secret "otlpgateway" { + name = "otlpgateway-ko-k8s-monitoring" + namespace = "default" +} + +// Feature: Annotation Autodiscovery +declare "annotation_autodiscovery" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + discovery.kubernetes "pods" { + role = "pod" + } + + discovery.relabel "annotation_autodiscovery_pods" { + targets = discovery.kubernetes.pods.targets + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the pod port + // The discovery generates a target for each declared container port of the pod. + // If the metricsPortName annotation has value, keep only the target where the port name matches the one of the annotation. + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + // If the metrics port number annotation has a value, override the target address to use it, regardless whether it is + // one of the declared ports on that Pod. + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})" + replacement = "[$2]:$1" // IPv6 + target_label = "__address__" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_portNumber", "__meta_kubernetes_pod_ip"] + regex = "(\\d+);((([0-9]+?)(\\.|$)){4})" // IPv4, takes priority over IPv6 when both exists + replacement = "$2:$1" + target_label = "__address__" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_metrics_scrapeInterval"] + action = "replace" + target_label = "__scrape_interval__" + } + } + + discovery.kubernetes "services" { + role = "service" + } + + discovery.relabel "annotation_autodiscovery_services" { + targets = discovery.kubernetes.services.targets + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_scrape"] + regex = "true" + action = "keep" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_job"] + action = "replace" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_instance"] + action = "replace" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_path"] + action = "replace" + target_label = "__metrics_path__" + } + + // Choose the service port + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portName"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_portNumber"] + regex = "(.+)" + target_label = "__tmp_port" + } + rule { + source_labels = ["__meta_kubernetes_service_port_number"] + action = "keepequal" + target_label = "__tmp_port" + } + + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_scheme"] + action = "replace" + target_label = "__scheme__" + } + + rule { + source_labels = ["__meta_kubernetes_service_annotation_k8s_grafana_com_metrics_scrapeInterval"] + action = "replace" + target_label = "__scrape_interval__" + } + } + + discovery.relabel "annotation_autodiscovery_http" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "drop" + } + } + + discovery.relabel "annotation_autodiscovery_https" { + targets = concat(discovery.relabel.annotation_autodiscovery_pods.output, discovery.relabel.annotation_autodiscovery_services.output) + rule { + source_labels = ["__scheme__"] + regex = "https" + action = "keep" + } + } + + prometheus.scrape "annotation_autodiscovery_http" { + targets = discovery.relabel.annotation_autodiscovery_http.output + scrape_interval = "60s" + honor_labels = true + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + clustering { + enabled = true + } + + forward_to = argument.metrics_destinations.value + } + + prometheus.scrape "annotation_autodiscovery_https" { + targets = discovery.relabel.annotation_autodiscovery_https.output + scrape_interval = "60s" + honor_labels = true + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config { + insecure_skip_verify = true + } + clustering { + enabled = true + } + + forward_to = argument.metrics_destinations.value + } +} +annotation_autodiscovery "feature" { + metrics_destinations = [ + otelcol.receiver.prometheus.otlpgateway.receiver, + ] +} + +// Feature: Cluster Metrics +declare "cluster_metrics" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + import.git "kubernetes" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/kubernetes/core/metrics.alloy" + pull_frequency = "15m" + } + + kubernetes.kubelet "scrape" { + clustering = true + keep_metrics = "up|container_cpu_usage_seconds_total|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_used|kubernetes_build_info|namespace_workload_pod|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + + kubernetes.resources "scrape" { + clustering = true + job_label = "integrations/kubernetes/resources" + keep_metrics = "up|node_cpu_usage_seconds_total|node_memory_working_set_bytes" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + + kubernetes.cadvisor "scrape" { + clustering = true + keep_metrics = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = [prometheus.relabel.cadvisor.receiver] + } + + prometheus.relabel "cadvisor" { + max_cache_size = 100000 + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + // Normalizing unimportant labels (not deleting to continue satisfying