Merge pull request #388 from sthaha/chore-hatch-fmt

chore: hatch fmt to format all source to 120 char width
sustainable-computing-io · Aug 21, 2024 · 195609c · 195609c
2 parents d14240f + 7cc18be
commit 195609c
Show file tree

Hide file tree

Showing 86 changed files with 1,191 additions and 586 deletions.
diff --git a/cmd/main.py b/cmd/main.py
@@ -3,6 +3,7 @@
 # -*- coding: utf-8 -*-
 import re
 import sys
+
 from kepler_model.cmd.main import run
 
 if __name__ == "__main__":

diff --git a/manifests/compose/dev/compose.yaml b/manifests/compose/dev/compose.yaml
@@ -0,0 +1,109 @@
+name: model-dev
+services:
+  kepler:
+    image: quay.io/sustainable_computing_io/kepler:latest
+    ports:
+      - 9100:9100
+    privileged: true
+    pid: host
+    networks:
+      - kepler-network
+      - model-server-network
+    volumes:
+      - type: bind
+        source: /proc
+        target: /proc
+      # - type: bind
+      #   source: /sys
+      #   target: /sys
+      - type: bind
+        source: ./kepler/etc/kepler
+        target: /etc/kepler
+
+      # NOTE: use the models from the local repo
+      - type: bind
+        source: ./kepler/var/lib/kepler/data/model_weight/
+        target: /var/lib/kepler/data
+      - type: bind
+        source: ./kepler/var/lib/kepler/data/cpus.yaml
+        target: /var/lib/kepler/data/cpus.yaml
+
+        # NOTE: for estimator - kepler communication
+      - kepler-tmp:/tmp
+
+    healthcheck:
+      test: curl -f http://localhost:9100/metrics || exit 1
+      interval: ${HEALTHCHECK_INTERVAL:-50s}
+      timeout: ${HEALTHCHECK_TIMEOUT:-30s}
+      retries: ${HEALTHCHECK_RETRIES:-3}
+      start_period: ${HEALTHCHECK_START_PERIOD:-1m}
+
+    cap_add:
+      - ALL
+
+    entrypoint:
+      - /usr/bin/bash
+      - -c
+
+    command:
+      - |
+        echo "Waiting for model-server";
+        until [[ "$(curl -s -o /dev/null -w "%{http_code}" http://model-server:8100/best-models)" -eq 200 ]]; do
+          echo " ... waiting for model-server";
+          sleep 1;
+        done;
+
+        echo "Waiting for estimator socket";
+        until [[ -e /tmp/estimator.sock ]]; do
+          echo " ... waiting for estimator socket";
+          sleep 1;
+        done;
+
+        echo "starting kepler";
+        set -x;
+        /usr/bin/kepler \
+          -address="0.0.0.0:9100" \
+          -v="8"
+
+  estimator:
+    command: [estimator,  -l, debug ]
+    build: &build
+      context: ../../../
+      dockerfile: dockerfiles/Dockerfile
+
+    volumes:
+      - type: bind
+        source: ./kepler/etc/kepler
+        target: /etc/kepler
+
+      - kepler-tmp:/tmp
+      - estimator-mnt:/mnt
+    networks:
+      - kepler-network
+      - model-server-network
+
+  model-server:
+    ports:
+      - '8100:8100'
+    command: [model-server,  -l, debug ]
+    build:
+      <<: *build
+    volumes:
+      - type: bind
+        source: ./kepler/etc/kepler
+        target: /etc/kepler
+      - model-server-mnt:/mnt
+    networks:
+      - model-server-network
+
+volumes:
+  # for kepler - estimator sock
+  kepler-tmp:
+
+  # for downloading models
+  estimator-mnt:
+  model-server-mnt:
+
+networks:
+  kepler-network:
+  model-server-network:
diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS b/manifests/compose/dev/kepler/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS
@@ -0,0 +1 @@
+true
diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS b/manifests/compose/dev/kepler/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS
@@ -0,0 +1 @@
+true
diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_CONFIG b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_CONFIG
@@ -0,0 +1,4 @@
+NODE_TOTAL_ESTIMATOR=true
+NODE_TOTAL_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/specpower-0.7.11/acpi/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip
+NODE_COMPONENTS_ESTIMATOR=true
+NODE_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/ec2-0.7.11/rapl-sysfs/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip
diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_ENABLE b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_ENABLE
@@ -0,0 +1 @@
+true
diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_URL b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_URL
@@ -0,0 +1 @@
+http://model-server:8100
diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/cpus.yaml b/manifests/compose/dev/kepler/var/lib/kepler/data/cpus.yaml
@@ -0,0 +1,198 @@
+##########
+# CPUS - used to lookup uarch and channels by family, model, and stepping
+#    The model and stepping fields will be interpreted as regular expressions
+#    An empty stepping field means 'any' stepping
+
+##########
+# Intel Core CPUs
+##########
+#  Haswell
+- core: HSW
+  uarch: Haswell
+  family: 6
+  model: (50|69|70)
+  stepping:
+
+#  Broadwell
+- core: BDW
+  uarch: Broadwell
+  family: 6
+  model: (61|71)
+  stepping:
+
+#  Skylake
+- core: SKL
+  uarch: Skylake
+  family: 6
+  model: (78|94)
+  stepping:
+
+#  Kabylake
+- core: KBL
+  uarch: Kaby Lake
+  family: 6
+  model: (142|158)
+  stepping: 9
+
+#  Coffelake
+- core: CFL
+  uarch: Coffee Lake
+  family: 6
+  model: (142|158)
+  stepping: (10|11|12|13)
+
+#  Rocket Lake
+- core: RKL
+  uarch: Cypress Cove
+  family: 6
+  model: 167
+  stepping:
+
+#  Tiger Lake
+- core: TGL
+  uarch: Willow Cove
+  family: 6
+  model: (140|141)
+  stepping:
+
+#  Alder Lake
+- core: ADL
+  uarch: Golden Cove
+  family: 6
+  model: (151|154)
+  stepping:
+
+#  Raptor Lake
+- core: RTL
+  uarch: Raptor Cove
+  family: 6
+  model: 183
+  stepping:
+
+##########
+# Intel Xeon CPUs
+##########
+#  Haswell
+- core: HSX
+  uarch: Haswell
+  family: 6
+  model: 63
+  stepping:
+
+#  Broadwell
+- core: BDX
+  uarch: Broadwell
+  family: 6
+  model: (79|86)
+  stepping:
+
+#  Skylake
+- core: SKX
+  uarch: Skylake
+  family: 6
+  model: 85
+  stepping: (0|1|2|3|4)
+
+#  Cascadelake
+- core: CLX
+  uarch: Cascade Lake
+  family: 6
+  model: 85
+  stepping: (5|6|7)
+
+#  Cooperlake
+- core: CPX
+  uarch: Cooper Lake
+  family: 6
+  model: 85
+  stepping: 11
+
+#  Icelake
+- core: ICX
+  uarch: Sunny Cove
+  family: 6
+  model: (106|108)
+  stepping:
+
+#  Sapphire Rapids
+- core: SPR
+  uarch: Sapphire Rapids
+  family: 6
+  model: 143
+  stepping:
+
+#  Emerald Rapids
+- core: EMR
+  uarch: Emerald Rapids
+  family: 6
+  model: 207
+  stepping:
+
+#  Granite Rapids
+- core: GNR
+  uarch: Granite Rapids
+  family: 6
+  model: 173
+  stepping:
+
+#  Sierra Forest
+- core: SRF
+  uarch: Sierra Forest
+  family: 6
+  model: 175
+  stepping:
+
+##########
+# AMD CPUs
+##########
+#  Naples
+- core: Naples
+  uarch: Zen
+  family: 23
+  model: 1
+  stepping:
+
+#  Rome
+- core: Rome
+  uarch: Zen 2
+  family: 23
+  model: 49
+  stepping:
+
+#  Milan
+- core: Milan
+  uarch: Zen 3
+  family: 25
+  model: 1
+  stepping:
+
+#  Genoa
+- core: Genoa
+  uarch: Zen 4
+  family: 25
+  model: 17
+  stepping:
+
+# Siena
+- core: Siena
+  uarch: Zen 4c
+  family: 25
+  model: 160
+  stepping:
+
+##########
+# ARM CPUs
+#########
+#  AWS Graviton 2
+- core: Ares
+  uarch: neoverse_n1
+  family:
+  model: 1
+  stepping: r3p1
+
+#  AWS Graviton 3
+- core: Zeus
+  uarch: neoverse_v1
+  family:
+  model: 1
+  stepping: r1p1
diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json
@@ -0,0 +1 @@
+{"platform": {"All_Weights": {"Bias_Weight": 220.9079278650894, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 29.028228361462897}}}}}
diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json
@@ -0,0 +1 @@
+{"platform": {"All_Weights": {"Bias_Weight": 49.56491877218095, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 28.501356366108837}}}}}
diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json
@@ -0,0 +1 @@
+{"package": {"All_Weights": {"Bias_Weight": 69.91739430907396, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.16772409328642}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 47.142633336743344, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.57348245077466}}}}}
diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json
@@ -0,0 +1 @@
+{"package": {"All_Weights": {"Bias_Weight": 38.856412561925055, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.258830113477515}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 9.080889901856153, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.0358946796490924}}}}}
diff --git a/model_training/s3/src/s3/loader.py b/model_training/s3/src/s3/loader.py
@@ -4,6 +4,7 @@
 # <provider>_upload(client, mnt_path)
 import argparse
 import os
+
 from . import util
 
 model_dir = "models"

diff --git a/model_training/s3/src/s3/pusher.py b/model_training/s3/src/s3/pusher.py
@@ -2,8 +2,9 @@
 # client = new_<provider>_client(args)
 ## upload all files in mnt path
 # <provider>_upload(client, mnt_path)
-import os
 import argparse
+import os
+
 from . import util
 
 model_dir = "models"

diff --git a/model_training/s3/src/s3/util.py b/model_training/s3/src/s3/util.py
@@ -1,4 +1,5 @@
 import argparse
+
 import s3.__about__ as about