From e78bf16e048d8f70f49b3f93f2240ab500cc0bad Mon Sep 17 00:00:00 2001 From: Parul Singh Date: Thu, 5 Sep 2024 13:23:11 +0200 Subject: [PATCH] peaks-pipeline Signed-off-by: Parul Singh --- benchmarks/peaks-pipeline/README.md | 0 benchmarks/peaks-pipeline/deploy-all.sh | 0 .../prometheus-query/Dockerfile | 25 ++++++ .../prometheus-query/prometheus-pvc.yaml | 10 +++ .../prometheus-query-configmap.yaml | 20 +++++ .../prometheus-query-deployment.yaml | 66 ++++++++++++++++ .../prometheus-query/prometheus_query.py | 78 +++++++++++++++++++ .../peaks-pipeline/stress-ng/Dockerfile | 5 ++ .../stress-ng/stress-ng-configmap.yaml | 9 +++ .../stress-ng/stress-ng-daemonset.yaml | 45 +++++++++++ .../peaks-pipeline/stress-ng/stress-script.sh | 17 ++++ .../training/ training_script.py | 0 .../training/Dockerfile-training | 0 .../training/training-configmap.yaml | 0 .../training/training-deployment.yaml | 0 .../peaks-pipeline/training/training-pvc.yaml | 0 16 files changed, 275 insertions(+) create mode 100644 benchmarks/peaks-pipeline/README.md create mode 100644 benchmarks/peaks-pipeline/deploy-all.sh create mode 100644 benchmarks/peaks-pipeline/prometheus-query/Dockerfile create mode 100644 benchmarks/peaks-pipeline/prometheus-query/prometheus-pvc.yaml create mode 100644 benchmarks/peaks-pipeline/prometheus-query/prometheus-query-configmap.yaml create mode 100644 benchmarks/peaks-pipeline/prometheus-query/prometheus-query-deployment.yaml create mode 100644 benchmarks/peaks-pipeline/prometheus-query/prometheus_query.py create mode 100644 benchmarks/peaks-pipeline/stress-ng/Dockerfile create mode 100644 benchmarks/peaks-pipeline/stress-ng/stress-ng-configmap.yaml create mode 100644 benchmarks/peaks-pipeline/stress-ng/stress-ng-daemonset.yaml create mode 100644 benchmarks/peaks-pipeline/stress-ng/stress-script.sh create mode 100644 benchmarks/peaks-pipeline/training/ training_script.py create mode 100644 benchmarks/peaks-pipeline/training/Dockerfile-training create mode 100644 benchmarks/peaks-pipeline/training/training-configmap.yaml create mode 100644 benchmarks/peaks-pipeline/training/training-deployment.yaml create mode 100644 benchmarks/peaks-pipeline/training/training-pvc.yaml diff --git a/benchmarks/peaks-pipeline/README.md b/benchmarks/peaks-pipeline/README.md new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/peaks-pipeline/deploy-all.sh b/benchmarks/peaks-pipeline/deploy-all.sh new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/peaks-pipeline/prometheus-query/Dockerfile b/benchmarks/peaks-pipeline/prometheus-query/Dockerfile new file mode 100644 index 00000000..f0d0b015 --- /dev/null +++ b/benchmarks/peaks-pipeline/prometheus-query/Dockerfile @@ -0,0 +1,25 @@ +# Use an official Python runtime as a parent image +FROM python:3.9-alpine + +# Set the working directory in the container +WORKDIR /app + +# Copy the Python script and any necessary files into the container +COPY prometheus_query.py /app/ + +# Install necessary Python libraries +RUN pip install requests pandas + +# Set environment variables for configurable properties (these will be overridden by Kubernetes ConfigMap) +ENV PROMETHEUS_URL="http://localhost:9090/api/v1/query_range" +ENV METRICS_LIST='kepler_node_dram_joules_total,kepler_node_other_joules_total,kepler_node_package_joules_total,kepler_node_platform_joules_total,node_cpu_seconds_total' +ENV CSV_DIRECTORY="/tmp/pvc/data" +ENV QUERY_INTERVAL="60" +ENV START_TIME="2024-09-05T10:50:00Z" +ENV END_TIME="2024-09-05T11:56:00Z" + +# Expose the working directory (in case files need to be written to the mounted PVC) +VOLUME ["/mnt/pvc"] + +# Run the Python script to query Prometheus +CMD ["python", "prometheus_query.py"] diff --git a/benchmarks/peaks-pipeline/prometheus-query/prometheus-pvc.yaml b/benchmarks/peaks-pipeline/prometheus-query/prometheus-pvc.yaml new file mode 100644 index 00000000..e9f5d8a5 --- /dev/null +++ b/benchmarks/peaks-pipeline/prometheus-query/prometheus-pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: prometheus-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/benchmarks/peaks-pipeline/prometheus-query/prometheus-query-configmap.yaml b/benchmarks/peaks-pipeline/prometheus-query/prometheus-query-configmap.yaml new file mode 100644 index 00000000..478b9e14 --- /dev/null +++ b/benchmarks/peaks-pipeline/prometheus-query/prometheus-query-configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-query-config +data: + # Prometheus server URL + PROMETHEUS_URL: "http://prometheus-server:9090/api/v1/query_range" + + # Start and end times for querying Prometheus + START_TIME: "2024-09-04T00:00:00Z" + END_TIME: "2024-09-04T12:00:00Z" + + # Directory to store the CSV files (mounted from PVC) + CSV_DIRECTORY: "/mnt/pvc/data" + + # List of metrics to query, as a comma-separated string + METRICS_LIST: 'kepler_node_dram_joules_total,kepler_node_other_joules_total,kepler_node_package_joules_total,kepler_node_platform_joules_total,node_cpu_seconds_total' + + # Optional: Query interval (time between queries in seconds) + QUERY_INTERVAL: "60" diff --git a/benchmarks/peaks-pipeline/prometheus-query/prometheus-query-deployment.yaml b/benchmarks/peaks-pipeline/prometheus-query/prometheus-query-deployment.yaml new file mode 100644 index 00000000..a368455a --- /dev/null +++ b/benchmarks/peaks-pipeline/prometheus-query/prometheus-query-deployment.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-query-deployment + labels: + app: prometheus-query +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-query + template: + metadata: + labels: + app: prometheus-query + spec: + containers: + - name: prometheus-query-client + image: quay.io/husky_parul/prometheus-query-client:latest + imagePullPolicy: Always + volumeMounts: + - mountPath: "/mnt/pvc" + name: prometheus-pvc-volume + # Inject the environment variables from the ConfigMap + env: + - name: PROMETHEUS_URL + valueFrom: + configMapKeyRef: + name: prometheus-query-config + key: PROMETHEUS_URL + - name: CSV_DIRECTORY + valueFrom: + configMapKeyRef: + name: prometheus-query-config + key: CSV_DIRECTORY + - name: START_TIME + valueFrom: + configMapKeyRef: + name: prometheus-query-config + key: START_TIME + - name: END_TIME + valueFrom: + configMapKeyRef: + name: prometheus-query-config + key: END_TIME + - name: QUERY_INTERVAL + valueFrom: + configMapKeyRef: + name: prometheus-query-config + key: QUERY_INTERVAL + - name: METRICS_LIST + valueFrom: + configMapKeyRef: + name: prometheus-query-config + key: METRICS_LIST + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: prometheus-pvc-volume + persistentVolumeClaim: + claimName: prometheus-pvc diff --git a/benchmarks/peaks-pipeline/prometheus-query/prometheus_query.py b/benchmarks/peaks-pipeline/prometheus-query/prometheus_query.py new file mode 100644 index 00000000..7f6ac882 --- /dev/null +++ b/benchmarks/peaks-pipeline/prometheus-query/prometheus_query.py @@ -0,0 +1,78 @@ +import os +import requests +import pandas as pd +import time + +# Get configuration from environment variables +PROMETHEUS_URL = os.getenv('PROMETHEUS_URL') +CSV_DIRECTORY = os.getenv('CSV_DIRECTORY', '/mnt/pvc/data') +START_TIME = os.getenv('START_TIME') +END_TIME = os.getenv('END_TIME') +QUERY_INTERVAL = int(os.getenv('QUERY_INTERVAL', 60)) # Default to 60 seconds + +# Read the metrics list from environment variable (comma-separated) +METRICS_LIST = os.getenv('METRICS_LIST').split(',') + +# Function to query Prometheus with a time range +def query_prometheus(query, start_time, end_time): + try: + params = { + 'query': query, + 'start': start_time, + 'end': end_time, + 'step': '60s' # Adjust step size as needed + } + response = requests.get(PROMETHEUS_URL, params=params) + response.raise_for_status() + return response.json()['data']['result'] + except Exception as e: + print(f"Error querying Prometheus: {e}") + return [] + +# Function to save the results to a CSV file +def save_to_csv(data, filename): + df = pd.DataFrame(data) + csv_file_path = os.path.join(CSV_DIRECTORY, filename) + df.to_csv(csv_file_path, index=False) + print(f"Data saved to {csv_file_path}") + +# Main function to query Prometheus for multiple metrics +def main(): + for metric in METRICS_LIST: + metric_name = metric.replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace(',', '_') + print(f"Querying metric: {metric}") + + # Query Prometheus for the current metric + result = query_prometheus(metric, START_TIME, END_TIME) + + print(result) + + # Check if the result contains data + if result: + # Flatten the metric labels and values for better CSV format + data = [] + for item in result: + metric_labels = item['metric'] + values = item['values'] + for value in values: + timestamp = value[0] + metric_value = value[1] + + # Create a flat record containing metric labels, timestamp, and value + flat_metric = {k: v for k, v in metric_labels.items()} + flat_metric['timestamp'] = timestamp + flat_metric['value'] = metric_value + data.append(flat_metric) + + # Save the result to a CSV file named after the metric + timestamp = time.strftime("%Y%m%d-%H%M%S") + filename = f"{metric_name}_data_{timestamp}.csv" + save_to_csv(data, filename) + else: + print(f"No data found for the metric: {metric}") + + # Wait for the query interval before the next query + time.sleep(QUERY_INTERVAL) + +if __name__ == "__main__": + main() diff --git a/benchmarks/peaks-pipeline/stress-ng/Dockerfile b/benchmarks/peaks-pipeline/stress-ng/Dockerfile new file mode 100644 index 00000000..49a79fc1 --- /dev/null +++ b/benchmarks/peaks-pipeline/stress-ng/Dockerfile @@ -0,0 +1,5 @@ +FROM alpine:latest +RUN apk update && apk add stress-ng bash +COPY stress-script.sh /usr/local/bin/stress-script.sh +RUN chmod +x /usr/local/bin/stress-script.sh +ENTRYPOINT ["/usr/local/bin/stress-script.sh"] \ No newline at end of file diff --git a/benchmarks/peaks-pipeline/stress-ng/stress-ng-configmap.yaml b/benchmarks/peaks-pipeline/stress-ng/stress-ng-configmap.yaml new file mode 100644 index 00000000..bd16ee21 --- /dev/null +++ b/benchmarks/peaks-pipeline/stress-ng/stress-ng-configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: stress-ng-config +data: + # Configurable parameters for Stress-ng + MAX_CPU_LOAD: "100" + STEP: "10" + DURATION: "60" \ No newline at end of file diff --git a/benchmarks/peaks-pipeline/stress-ng/stress-ng-daemonset.yaml b/benchmarks/peaks-pipeline/stress-ng/stress-ng-daemonset.yaml new file mode 100644 index 00000000..7efdb345 --- /dev/null +++ b/benchmarks/peaks-pipeline/stress-ng/stress-ng-daemonset.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: stress-ng-daemonset + labels: + app: stress-ng +spec: + selector: + matchLabels: + app: stress-ng + template: + metadata: + labels: + app: stress-ng + spec: + containers: + - name: stress-ng + image: quay.io/husky_parul/stress-ng-container:latest + args: [] + env: + - name: MAX_CPU_LOAD + valueFrom: + configMapKeyRef: + name: stress-ng-config + key: MAX_CPU_LOAD + - name: STEP + valueFrom: + configMapKeyRef: + name: stress-ng-config + key: STEP + - name: DURATION + valueFrom: + configMapKeyRef: + name: stress-ng-config + key: DURATION + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always diff --git a/benchmarks/peaks-pipeline/stress-ng/stress-script.sh b/benchmarks/peaks-pipeline/stress-ng/stress-script.sh new file mode 100644 index 00000000..e5fe1649 --- /dev/null +++ b/benchmarks/peaks-pipeline/stress-ng/stress-script.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Read configuration values from environment variables (provided via ConfigMap) +MAX_CPU_LOAD=${MAX_CPU_LOAD:-100} # Default to 100% if not provided +STEP=${STEP:-10} # Default to 10% increment if not provided +DURATION=${DURATION:-60} # Default to 60 seconds if not provided +CPU_COUNT=$(nproc) # Number of CPU cores available + +# Run the stress-ng workload in incremental steps +for i in $(seq $STEP $STEP $MAX_CPU_LOAD); do + WORKERS=$(($i * $CPU_COUNT / 100)) + echo "Stressing CPU with $WORKERS workers for $DURATION seconds" + stress-ng --cpu $WORKERS --timeout ${DURATION}s + sleep 10 +done + +echo "CPU stress test completed." diff --git a/benchmarks/peaks-pipeline/training/ training_script.py b/benchmarks/peaks-pipeline/training/ training_script.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/peaks-pipeline/training/Dockerfile-training b/benchmarks/peaks-pipeline/training/Dockerfile-training new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/peaks-pipeline/training/training-configmap.yaml b/benchmarks/peaks-pipeline/training/training-configmap.yaml new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/peaks-pipeline/training/training-deployment.yaml b/benchmarks/peaks-pipeline/training/training-deployment.yaml new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/peaks-pipeline/training/training-pvc.yaml b/benchmarks/peaks-pipeline/training/training-pvc.yaml new file mode 100644 index 00000000..e69de29b