From f89cb3e9e891ce9adf0c18ed557f43800c6a1ce0 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Tue, 20 Aug 2024 15:09:53 +1000 Subject: [PATCH 1/3] chore(compose): add compose for local development Signed-off-by: Sunil Thaha --- manifests/compose/dev/compose.yaml | 109 ++++++++++ .../kepler.config/ENABLE_PROCESS_METRICS | 1 + .../EXPOSE_ESTIMATED_IDLE_POWER_METRICS | 1 + .../etc/kepler/kepler.config/MODEL_CONFIG | 4 + .../kepler/kepler.config/MODEL_SERVER_ENABLE | 1 + .../etc/kepler/kepler.config/MODEL_SERVER_URL | 1 + .../dev/kepler/var/lib/kepler/data/cpus.yaml | 198 ++++++++++++++++++ .../data/model_weight/acpi_AbsPowerModel.json | 1 + .../data/model_weight/acpi_DynPowerModel.json | 1 + .../intel_rapl_AbsPowerModel.json | 1 + .../intel_rapl_DynPowerModel.json | 1 + 11 files changed, 319 insertions(+) create mode 100644 manifests/compose/dev/compose.yaml create mode 100644 manifests/compose/dev/kepler/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS create mode 100644 manifests/compose/dev/kepler/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS create mode 100644 manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_CONFIG create mode 100644 manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_ENABLE create mode 100644 manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_URL create mode 100644 manifests/compose/dev/kepler/var/lib/kepler/data/cpus.yaml create mode 100644 manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json create mode 100644 manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json create mode 100644 manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json create mode 100644 manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json diff --git a/manifests/compose/dev/compose.yaml b/manifests/compose/dev/compose.yaml new file mode 100644 index 00000000..0b629e1c --- /dev/null +++ b/manifests/compose/dev/compose.yaml @@ -0,0 +1,109 @@ +name: model-dev +services: + kepler: + image: quay.io/sustainable_computing_io/kepler:latest + ports: + - 9100:9100 + privileged: true + pid: host + networks: + - kepler-network + - model-server-network + volumes: + - type: bind + source: /proc + target: /proc + # - type: bind + # source: /sys + # target: /sys + - type: bind + source: ./kepler/etc/kepler + target: /etc/kepler + + # NOTE: use the models from the local repo + - type: bind + source: ./kepler/var/lib/kepler/data/model_weight/ + target: /var/lib/kepler/data + - type: bind + source: ./kepler/var/lib/kepler/data/cpus.yaml + target: /var/lib/kepler/data/cpus.yaml + + # NOTE: for estimator - kepler communication + - kepler-tmp:/tmp + + healthcheck: + test: curl -f http://localhost:9100/metrics || exit 1 + interval: ${HEALTHCHECK_INTERVAL:-50s} + timeout: ${HEALTHCHECK_TIMEOUT:-30s} + retries: ${HEALTHCHECK_RETRIES:-3} + start_period: ${HEALTHCHECK_START_PERIOD:-1m} + + cap_add: + - ALL + + entrypoint: + - /usr/bin/bash + - -c + + command: + - | + echo "Waiting for model-server"; + until [[ "$(curl -s -o /dev/null -w "%{http_code}" http://model-server:8100/best-models)" -eq 200 ]]; do + echo " ... waiting for model-server"; + sleep 1; + done; + + echo "Waiting for estimator socket"; + until [[ -e /tmp/estimator.sock ]]; do + echo " ... waiting for estimator socket"; + sleep 1; + done; + + echo "starting kepler"; + set -x; + /usr/bin/kepler \ + -address="0.0.0.0:9100" \ + -v="8" + + estimator: + command: [estimator, -l, debug ] + build: &build + context: ../../../ + dockerfile: dockerfiles/Dockerfile + + volumes: + - type: bind + source: ./kepler/etc/kepler + target: /etc/kepler + + - kepler-tmp:/tmp + - estimator-mnt:/mnt + networks: + - kepler-network + - model-server-network + + model-server: + ports: + - '8100:8100' + command: [model-server, -l, debug ] + build: + <<: *build + volumes: + - type: bind + source: ./kepler/etc/kepler + target: /etc/kepler + - model-server-mnt:/mnt + networks: + - model-server-network + +volumes: + # for kepler - estimator sock + kepler-tmp: + + # for downloading models + estimator-mnt: + model-server-mnt: + +networks: + kepler-network: + model-server-network: diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS b/manifests/compose/dev/kepler/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/manifests/compose/dev/kepler/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS @@ -0,0 +1 @@ +true diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS b/manifests/compose/dev/kepler/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/manifests/compose/dev/kepler/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS @@ -0,0 +1 @@ +true diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_CONFIG b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_CONFIG new file mode 100644 index 00000000..29139eb2 --- /dev/null +++ b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_CONFIG @@ -0,0 +1,4 @@ +NODE_TOTAL_ESTIMATOR=true +NODE_TOTAL_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/specpower-0.7.11/acpi/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip +NODE_COMPONENTS_ESTIMATOR=true +NODE_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/ec2-0.7.11/rapl-sysfs/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_ENABLE b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_ENABLE new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_ENABLE @@ -0,0 +1 @@ +true diff --git a/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_URL b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_URL new file mode 100644 index 00000000..335bd84b --- /dev/null +++ b/manifests/compose/dev/kepler/etc/kepler/kepler.config/MODEL_SERVER_URL @@ -0,0 +1 @@ +http://model-server:8100 diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/cpus.yaml b/manifests/compose/dev/kepler/var/lib/kepler/data/cpus.yaml new file mode 100644 index 00000000..a7fcb6d7 --- /dev/null +++ b/manifests/compose/dev/kepler/var/lib/kepler/data/cpus.yaml @@ -0,0 +1,198 @@ +########## +# CPUS - used to lookup uarch and channels by family, model, and stepping +# The model and stepping fields will be interpreted as regular expressions +# An empty stepping field means 'any' stepping + +########## +# Intel Core CPUs +########## +# Haswell +- core: HSW + uarch: Haswell + family: 6 + model: (50|69|70) + stepping: + +# Broadwell +- core: BDW + uarch: Broadwell + family: 6 + model: (61|71) + stepping: + +# Skylake +- core: SKL + uarch: Skylake + family: 6 + model: (78|94) + stepping: + +# Kabylake +- core: KBL + uarch: Kaby Lake + family: 6 + model: (142|158) + stepping: 9 + +# Coffelake +- core: CFL + uarch: Coffee Lake + family: 6 + model: (142|158) + stepping: (10|11|12|13) + +# Rocket Lake +- core: RKL + uarch: Cypress Cove + family: 6 + model: 167 + stepping: + +# Tiger Lake +- core: TGL + uarch: Willow Cove + family: 6 + model: (140|141) + stepping: + +# Alder Lake +- core: ADL + uarch: Golden Cove + family: 6 + model: (151|154) + stepping: + +# Raptor Lake +- core: RTL + uarch: Raptor Cove + family: 6 + model: 183 + stepping: + +########## +# Intel Xeon CPUs +########## +# Haswell +- core: HSX + uarch: Haswell + family: 6 + model: 63 + stepping: + +# Broadwell +- core: BDX + uarch: Broadwell + family: 6 + model: (79|86) + stepping: + +# Skylake +- core: SKX + uarch: Skylake + family: 6 + model: 85 + stepping: (0|1|2|3|4) + +# Cascadelake +- core: CLX + uarch: Cascade Lake + family: 6 + model: 85 + stepping: (5|6|7) + +# Cooperlake +- core: CPX + uarch: Cooper Lake + family: 6 + model: 85 + stepping: 11 + +# Icelake +- core: ICX + uarch: Sunny Cove + family: 6 + model: (106|108) + stepping: + +# Sapphire Rapids +- core: SPR + uarch: Sapphire Rapids + family: 6 + model: 143 + stepping: + +# Emerald Rapids +- core: EMR + uarch: Emerald Rapids + family: 6 + model: 207 + stepping: + +# Granite Rapids +- core: GNR + uarch: Granite Rapids + family: 6 + model: 173 + stepping: + +# Sierra Forest +- core: SRF + uarch: Sierra Forest + family: 6 + model: 175 + stepping: + +########## +# AMD CPUs +########## +# Naples +- core: Naples + uarch: Zen + family: 23 + model: 1 + stepping: + +# Rome +- core: Rome + uarch: Zen 2 + family: 23 + model: 49 + stepping: + +# Milan +- core: Milan + uarch: Zen 3 + family: 25 + model: 1 + stepping: + +# Genoa +- core: Genoa + uarch: Zen 4 + family: 25 + model: 17 + stepping: + +# Siena +- core: Siena + uarch: Zen 4c + family: 25 + model: 160 + stepping: + +########## +# ARM CPUs +######### +# AWS Graviton 2 +- core: Ares + uarch: neoverse_n1 + family: + model: 1 + stepping: r3p1 + +# AWS Graviton 3 +- core: Zeus + uarch: neoverse_v1 + family: + model: 1 + stepping: r1p1 diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json new file mode 100644 index 00000000..a9e3077f --- /dev/null +++ b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json @@ -0,0 +1 @@ +{"platform": {"All_Weights": {"Bias_Weight": 220.9079278650894, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 29.028228361462897}}}}} diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json new file mode 100644 index 00000000..df09966a --- /dev/null +++ b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json @@ -0,0 +1 @@ +{"platform": {"All_Weights": {"Bias_Weight": 49.56491877218095, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 28.501356366108837}}}}} diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json new file mode 100644 index 00000000..7c964746 --- /dev/null +++ b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json @@ -0,0 +1 @@ +{"package": {"All_Weights": {"Bias_Weight": 69.91739430907396, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.16772409328642}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 47.142633336743344, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.57348245077466}}}}} diff --git a/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json new file mode 100644 index 00000000..14791a9d --- /dev/null +++ b/manifests/compose/dev/kepler/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json @@ -0,0 +1 @@ +{"package": {"All_Weights": {"Bias_Weight": 38.856412561925055, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.258830113477515}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 9.080889901856153, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.0358946796490924}}}}} From 16ef378d28135ed85bfc9007af5f44b8276abc32 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 21 Aug 2024 12:59:51 +1000 Subject: [PATCH 2/3] chore: add more logging Signed-off-by: Sunil Thaha --- src/kepler_model/estimate/estimator.py | 20 ++++++++------ src/kepler_model/estimate/model/model.py | 26 ++++++++++-------- src/kepler_model/util/config.py | 4 +-- src/kepler_model/util/loader.py | 29 +++++++++++++------- src/kepler_model/util/train_types.py | 35 ++++++++++++++++-------- 5 files changed, 70 insertions(+), 44 deletions(-) diff --git a/src/kepler_model/estimate/estimator.py b/src/kepler_model/estimate/estimator.py index 26e00ea6..b492ba62 100644 --- a/src/kepler_model/estimate/estimator.py +++ b/src/kepler_model/estimate/estimator.py @@ -14,7 +14,7 @@ from kepler_model.estimate.model.model import load_downloaded_model from kepler_model.util.loader import get_download_output_path from kepler_model.util.config import set_env_from_model_config, SERVE_SOCKET, download_path -from kepler_model.util.train_types import is_support_output_type, ModelOutputType +from kepler_model.util.train_types import is_output_type_supported, ModelOutputType ############################################### # power request @@ -43,16 +43,16 @@ def __init__(self, metrics, values, output_type, source, system_features, system loaded_model = dict() -def handle_request(data): +def handle_request(data: str) -> dict: try: power_request = json.loads(data, object_hook=lambda d: PowerRequest(**d)) except Exception as e: - logger.error(f"fail to handle request: {e}") - msg = "fail to handle request: {}".format(e) + msg = f"failed to handle request: {e}" + logger.error(msg) return {"powers": dict(), "msg": msg} - if not is_support_output_type(power_request.output_type): - msg = "output type {} is not supported".format(power_request.output_type) + if not is_output_type_supported(power_request.output_type): + msg = f"output type {power_request.output_type} is not supported" logger.error(msg) return {"powers": dict(), "msg": msg} @@ -63,6 +63,7 @@ def handle_request(data): if output_type.name not in loaded_model: loaded_model[output_type.name] = dict() + output_path = "" mismatch_trainer = False if is_model_server_enabled(): @@ -87,12 +88,12 @@ def handle_request(data): msg = "failed to get model from request {}".format(data) logger.error(msg) return {"powers": dict(), "msg": msg} - else: - logger.info(f"load model from config: {output_path}") + logger.info(f"load model from config: {output_path}") else: logger.info(f"load model from model server: {output_path}") loaded_item = load_downloaded_model(power_request.energy_source, output_type) + if loaded_item is not None and loaded_item.estimator is not None: loaded_model[output_type.name][power_request.energy_source] = loaded_item logger.info(f"set model {loaded_item.model_name} for {output_type.name} ({power_request.energy_source})") @@ -103,6 +104,7 @@ def handle_request(data): logger.info(f"{model.model_name} failed to predict; removed: {msg}") if output_path != "" and os.path.exists(output_path): shutil.rmtree(output_path) + return {"powers": powers, "msg": msg} @@ -124,7 +126,7 @@ def start(self): os.remove(self.socket_path) sys.stdout.write("close socket\n") except Exception as e: - logger.error(f"fail to close socket: {e}") + logger.error(f"failed to close socket: {e}") def accepted(self, connection): data = b"" diff --git a/src/kepler_model/estimate/model/model.py b/src/kepler_model/estimate/model/model.py index 3b15f370..4b435570 100644 --- a/src/kepler_model/estimate/model/model.py +++ b/src/kepler_model/estimate/model/model.py @@ -1,4 +1,5 @@ import json +import logging import pandas as pd from kepler_model.util.loader import load_metadata, get_download_output_path @@ -10,6 +11,8 @@ from kepler_model.estimate.model.curvefit_model import CurveFitModelEstimator # from keras_model import KerasModelEstimator +logger = logging.getLogger(__name__) + # model wrapper MODELCLASS = { "scikit": ScikitModelEstimator, @@ -157,17 +160,18 @@ def append_idle_prediction(self, data, predicted_col_func=default_idle_predicted def load_model(model_path): metadata = load_metadata(model_path) - if metadata is not None: - metadata["model_path"] = model_path - metadata_str = json.dumps(metadata) - try: - model = json.loads(metadata_str, object_hook=lambda d: Model(**d)) - return model - except Exception as e: - print("fail to load: ", e) - return None - print("no metadata") - return None + if not metadata: + logger.warn(f"no metadata in {model_path}") + return None + + metadata["model_path"] = model_path + metadata_str = json.dumps(metadata) + try: + model = json.loads(metadata_str, object_hook=lambda d: Model(**d)) + return model + except Exception as e: + logger.error(f"fail to load: {model_path} - {e}") + return None # download model folder has no subfolder of energy source and feature group because it has been already determined by model request diff --git a/src/kepler_model/util/config.py b/src/kepler_model/util/config.py index aa2858ed..0971404e 100644 --- a/src/kepler_model/util/config.py +++ b/src/kepler_model/util/config.py @@ -14,7 +14,7 @@ import os from .loader import get_url, get_pipeline_url, base_model_url, default_pipelines, default_train_output_pipeline -from .train_types import ModelOutputType, is_support_output_type, FeatureGroup +from .train_types import ModelOutputType, is_output_type_supported, FeatureGroup # must be writable (for shared volume mount) MNT_PATH = "/mnt" @@ -150,7 +150,7 @@ def get_init_model_url(energy_source, output_type, model_topurl=model_topurl): if get_energy_source(prefix) == energy_source: modelURL = get_init_url(prefix) print("get init url", modelURL) - if modelURL == "" and is_support_output_type(output_type): + if modelURL == "" and is_output_type_supported(output_type): print("init URL is not set, try using default URL".format(output_type)) return get_url(feature_group=FeatureGroup.BPFOnly, output_type=ModelOutputType[output_type], energy_source=energy_source, model_topurl=model_topurl, pipeline_name=pipeline_name) else: diff --git a/src/kepler_model/util/loader.py b/src/kepler_model/util/loader.py index 60133e8e..65ed58c0 100644 --- a/src/kepler_model/util/loader.py +++ b/src/kepler_model/util/loader.py @@ -1,5 +1,6 @@ import os import json +import logging import joblib import pandas as pd from .saver import assure_path, METADATA_FILENAME, SCALER_FILENAME, WEIGHT_FILENAME, TRAIN_ARGS_FILENAME, NODE_TYPE_INDEX_FILENAME, MACHINE_SPEC_PATH, _pipeline_model_metadata_filename @@ -31,6 +32,9 @@ base_model_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v{}".format(major_version) +logger = logging.getLogger(__name__) + + def get_pipeline_url(model_topurl, pipeline_name): file_ext = ".zip" return os.path.join(model_topurl, pipeline_name + file_ext) @@ -52,21 +56,24 @@ def assure_pipeline_name(pipeline_name, energy_source, nodeCollection): default_feature_group = FeatureGroup.BPFOnly -def load_json(path, name): - if ".json" not in name: +def load_json(path: str, name: str): + if name.endswith(".json") is False: name = name + ".json" + filepath = os.path.join(path, name) try: with open(filepath) as f: res = json.load(f) return res except Exception as err: + logger.error(f"fail to load json {filepath}: {err}") return None -def load_pkl(path, name): - if ".pkl" not in name: +def load_pkl(path: str, name: str): + if name.endswith(".pkl") is False: name = name + ".pkl" + filepath = os.path.join(path, name) try: res = joblib.load(filepath) @@ -74,18 +81,20 @@ def load_pkl(path, name): except FileNotFoundError: return None except Exception as err: - print("fail to load pkl {}: {}".format(filepath, err)) + logger.error(f"failed to load pkl {filepath}: {err}") return None def load_remote_pkl(url_path): - if ".pkl" not in url_path: - url_path = url_path + ".pkl" + if url_path.endswith(".pkl") is False: + url_path += ".pkl" + try: response = urlopen(url_path) loaded_model = joblib.load(response) return loaded_model - except: + except Exception as e: + logger.error(f"failed to load pkl url {url_path}: {e}") return None @@ -133,8 +142,8 @@ def load_csv(path, name): data = pd.read_csv(file_path) data = data.apply(pd.to_numeric, errors="ignore") return data - except: - # print('cannot load {}'.format(file_path)) + except Exception as err: + logger.error(f"cannot load {file_path}: {err}") return None diff --git a/src/kepler_model/util/train_types.py b/src/kepler_model/util/train_types.py index 64c05a6d..8d8c5b34 100644 --- a/src/kepler_model/util/train_types.py +++ b/src/kepler_model/util/train_types.py @@ -1,6 +1,6 @@ ########################################################### ## types.py -## +## ## defines ## - collection of features ## - feature groups @@ -17,7 +17,7 @@ COUNTER_FEAUTRES = ["cache_miss", "cpu_cycles", "cpu_instructions"] BPF_FEATURES = ["bpf_cpu_time_ms", "bpf_page_cache_hit"] IRQ_FEATURES = ["bpf_block_irq", "bpf_net_rx_irq", "bpf_net_tx_irq"] -ACCELERATE_FEATURES = ['accelerator_intel_qat'] +ACCELERATE_FEATURES = ["accelerator_intel_qat"] WORKLOAD_FEATURES = COUNTER_FEAUTRES + BPF_FEATURES + IRQ_FEATURES + ACCELERATE_FEATURES BASIC_FEATURES = COUNTER_FEAUTRES + BPF_FEATURES @@ -40,8 +40,8 @@ "cpu_scaling_frequency_hertz": ["1GHz", "2GHz", "3GHz"], } -no_weight_trainers = ['PolynomialRegressionTrainer', 'GradientBoostingRegressorTrainer', 'KNeighborsRegressorTrainer', 'LinearRegressionTrainer','SVRRegressorTrainer', 'XgboostFitTrainer'] -weight_support_trainers = ['SGDRegressorTrainer', 'LogarithmicRegressionTrainer', 'LogisticRegressionTrainer', 'ExponentialRegressionTrainer'] +no_weight_trainers = ["PolynomialRegressionTrainer", "GradientBoostingRegressorTrainer", "KNeighborsRegressorTrainer", "LinearRegressionTrainer", "SVRRegressorTrainer", "XgboostFitTrainer"] +weight_support_trainers = ["SGDRegressorTrainer", "LogarithmicRegressionTrainer", "LogisticRegressionTrainer", "ExponentialRegressionTrainer"] default_trainer_names = no_weight_trainers + weight_support_trainers default_trainers = ",".join(default_trainer_names) @@ -59,24 +59,29 @@ class FeatureGroup(enum.Enum): ThirdParty = 10 Unknown = 99 + class EnergyComponentLabelGroup(enum.Enum): PackageEnergyComponentOnly = 1 DRAMEnergyComponentOnly = 2 CoreEnergyComponentOnly = 3 PackageDRAMEnergyComponents = 4 + class ModelOutputType(enum.Enum): AbsPower = 1 DynPower = 2 -def is_support_output_type(output_type_name): + +def is_output_type_supported(output_type_name): return any(output_type_name == item.name for item in ModelOutputType) + def deep_sort(elements): sorted_elements = elements.copy() sorted_elements.sort() return sorted_elements + FeatureGroups = { FeatureGroup.Full: deep_sort(WORKLOAD_FEATURES + SYSTEM_FEATURES), FeatureGroup.WorkloadOnly: deep_sort(WORKLOAD_FEATURES), @@ -90,9 +95,11 @@ def deep_sort(elements): SingleSourceFeatures = [FeatureGroup.CounterOnly.name, FeatureGroup.BPFOnly.name, FeatureGroup.BPFIRQ.name] + def is_single_source_feature_group(fg): return fg.name in SingleSourceFeatures + default_main_feature_map = { FeatureGroup.Full: "cpu_instructions", FeatureGroup.WorkloadOnly: "cpu_instructions", @@ -122,15 +129,17 @@ def main_feature(feature_group_name, energy_component): feature = default_main_feature_map[feature_group] return features.index(feature) + # XGBoostRegressionTrainType class XGBoostRegressionTrainType(enum.Enum): TrainTestSplitFit = 1 KFoldCrossValidation = 2 + # XGBoost Model Feature and Label Incompatability Exception class XGBoostModelFeatureOrLabelIncompatabilityException(Exception): - """Exception raised when a saved model's features and label is incompatable with the training data. - + """Exception raised when a saved model's features and label is incompatable with the training data. + ... Attributes @@ -139,7 +148,7 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception): expected_labels: the expected model labels actual_features: the actual model features actual_labels: the actual model labels - features_incompatible: true if expected_features == actual_features else false + features_incompatible: true if expected_features == actual_features else false labels_incompatible: true if expected_labels == actual_labels else false """ @@ -150,7 +159,6 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception): features_incompatible: bool labels_incompatible: bool - def __init__(self, expected_features: List[str], expected_labels: List[str], received_features: List[str], received_labels: List[str], message="expected features/labels are the not the same as the features/labels of the training data") -> None: self.expected_features = expected_features self.expected_labels = expected_labels @@ -188,12 +196,12 @@ def __init__(self, missing_model: bool, missing_model_desc: bool, message="model EnergyComponentLabelGroup.PackageEnergyComponentOnly: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL), EnergyComponentLabelGroup.DRAMEnergyComponentOnly: deep_sort(DRAM_ENERGY_COMPONENT_LABEL), EnergyComponentLabelGroup.CoreEnergyComponentOnly: deep_sort(CORE_ENERGY_COMPONENT_LABEL), - EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL) - + EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL), } all_feature_groups = [fg.name for fg in FeatureGroups.keys()] + def get_feature_group(features): sorted_features = deep_sort(features) for g, g_features in FeatureGroups.items(): @@ -202,6 +210,7 @@ def get_feature_group(features): return g return FeatureGroup.Unknown + def get_valid_feature_groups(features): valid_fgs = [] for fg_key, fg_features in FeatureGroups.items(): @@ -214,6 +223,7 @@ def get_valid_feature_groups(features): valid_fgs += [fg_key] return valid_fgs + def is_weight_output(output_type): if output_type == ModelOutputType.AbsModelWeight: return True @@ -225,7 +235,8 @@ def is_weight_output(output_type): return True return False -if __name__ == '__main__': + +if __name__ == "__main__": for g, g_features in FeatureGroups.items(): shuffled_features = g_features.copy() random.shuffle(shuffled_features) From 7cc18becd0a7898e55025754cf422c5a841bc8d3 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 21 Aug 2024 15:25:34 +1000 Subject: [PATCH 3/3] chore: run hatch fmt to format to 120 width Signed-off-by: Sunil Thaha --- cmd/main.py | 1 + model_training/s3/src/s3/loader.py | 1 + model_training/s3/src/s3/pusher.py | 3 +- model_training/s3/src/s3/util.py | 1 + src/kepler_model/cmd/cmd_plot.py | 42 ++--- src/kepler_model/cmd/cmd_util.py | 49 +++--- src/kepler_model/cmd/main.py | 149 ++++++++++++------ src/kepler_model/estimate/__init__.py | 15 +- src/kepler_model/estimate/archived_model.py | 4 +- src/kepler_model/estimate/estimator.py | 16 +- .../estimate/model/curvefit_model.py | 9 +- .../estimate/model/estimate_common.py | 7 +- .../estimate/model/keras_model.py | 8 +- src/kepler_model/estimate/model/model.py | 52 +++--- .../estimate/model/scikit_model.py | 11 +- .../estimate/model/xgboost_model.py | 8 +- .../estimate/model_server_connector.py | 17 +- src/kepler_model/server/model_server.py | 64 +++++--- src/kepler_model/train/__init__.py | 6 +- src/kepler_model/train/ec2_pipeline.py | 20 ++- src/kepler_model/train/exporter/exporter.py | 16 +- src/kepler_model/train/exporter/validator.py | 7 +- src/kepler_model/train/exporter/writer.py | 34 ++-- src/kepler_model/train/extractor/extractor.py | 32 +++- .../train/extractor/preprocess.py | 6 +- .../train/extractor/smooth_extractor.py | 3 +- src/kepler_model/train/isolator/isolator.py | 14 +- .../train/isolator/train_isolator.py | 24 ++- src/kepler_model/train/offline_trainer.py | 23 ++- src/kepler_model/train/online_trainer.py | 13 +- src/kepler_model/train/pipeline.py | 59 ++++--- .../train/profiler/generate_scaler.py | 6 +- .../train/profiler/node_type_index.py | 14 +- src/kepler_model/train/profiler/profiler.py | 15 +- src/kepler_model/train/prom/prom_query.py | 13 +- src/kepler_model/train/specpower_pipeline.py | 19 +-- .../ExponentialRegressionTrainer/main.py | 5 +- .../GradientBoostingRegressorTrainer/main.py | 1 + .../KNeighborsRegressorTrainer/main.py | 1 + .../trainer/LinearRegressionTrainer/main.py | 1 + .../LogarithmicRegressionTrainer/main.py | 3 +- .../trainer/LogisticRegressionTrainer/main.py | 3 +- .../train/trainer/SGDRegressorTrainer/main.py | 1 + .../train/trainer/SVRRegressorTrainer/main.py | 1 + .../train/trainer/XGBoostTrainer/main.py | 36 +++-- src/kepler_model/train/trainer/__init__.py | 51 +++--- src/kepler_model/train/trainer/curvefit.py | 9 +- src/kepler_model/train/trainer/scikit.py | 7 +- .../train/trainer/xgboost_interface.py | 13 +- src/kepler_model/util/__init__.py | 31 +++- src/kepler_model/util/config.py | 17 +- src/kepler_model/util/extract_types.py | 6 +- src/kepler_model/util/format.py | 6 +- src/kepler_model/util/loader.py | 44 ++++-- src/kepler_model/util/prom_types.py | 17 +- src/kepler_model/util/saver.py | 15 +- src/kepler_model/util/train_types.py | 13 +- tests/client_load_tester.py | 14 +- tests/common_plot.py | 31 ++-- tests/estimator_model_request_test.py | 32 ++-- tests/estimator_model_test.py | 16 +- tests/estimator_power_request_test.py | 9 +- tests/extractor_test.py | 26 +-- tests/http_server.py | 6 +- tests/isolator_test.py | 40 +++-- tests/minimal_trainer.py | 1 + tests/model_server_test.py | 14 +- tests/model_tester.py | 14 +- tests/offline_trainer_test.py | 21 ++- tests/pipeline_test.py | 13 +- tests/prom_test.py | 2 +- tests/query_test.py | 9 +- tests/trainer_test.py | 19 ++- tests/weight_model_request_test.py | 17 +- tests/xgboost_test.py | 4 +- 75 files changed, 805 insertions(+), 545 deletions(-) diff --git a/cmd/main.py b/cmd/main.py index 0f506f59..ca28d401 100644 --- a/cmd/main.py +++ b/cmd/main.py @@ -3,6 +3,7 @@ # -*- coding: utf-8 -*- import re import sys + from kepler_model.cmd.main import run if __name__ == "__main__": diff --git a/model_training/s3/src/s3/loader.py b/model_training/s3/src/s3/loader.py index 83899635..772a59ed 100644 --- a/model_training/s3/src/s3/loader.py +++ b/model_training/s3/src/s3/loader.py @@ -4,6 +4,7 @@ # _upload(client, mnt_path) import argparse import os + from . import util model_dir = "models" diff --git a/model_training/s3/src/s3/pusher.py b/model_training/s3/src/s3/pusher.py index 180cf0d1..5a69ee88 100644 --- a/model_training/s3/src/s3/pusher.py +++ b/model_training/s3/src/s3/pusher.py @@ -2,8 +2,9 @@ # client = new__client(args) ## upload all files in mnt path # _upload(client, mnt_path) -import os import argparse +import os + from . import util model_dir = "models" diff --git a/model_training/s3/src/s3/util.py b/model_training/s3/src/s3/util.py index 786133d1..00f8c5b5 100644 --- a/model_training/s3/src/s3/util.py +++ b/model_training/s3/src/s3/util.py @@ -1,4 +1,5 @@ import argparse + import s3.__about__ as about diff --git a/src/kepler_model/cmd/cmd_plot.py b/src/kepler_model/cmd/cmd_plot.py index fff7abb6..189bae1d 100644 --- a/src/kepler_model/cmd/cmd_plot.py +++ b/src/kepler_model/cmd/cmd_plot.py @@ -1,11 +1,11 @@ import os -from kepler_model.util.prom_types import TIMESTAMP_COL -from kepler_model.util import PowerSourceMap -from kepler_model.util.train_types import FeatureGroup, ModelOutputType, weight_support_trainers -from kepler_model.util.loader import load_metadata, load_scaler, get_model_group_path -from kepler_model.train.profiler.node_type_index import NodeTypeIndexCollection from kepler_model.estimate import load_model +from kepler_model.train.profiler.node_type_index import NodeTypeIndexCollection +from kepler_model.util import PowerSourceMap +from kepler_model.util.loader import get_model_group_path, load_metadata, load_scaler +from kepler_model.util.prom_types import TIMESTAMP_COL +from kepler_model.util.train_types import FeatureGroup, ModelOutputType, weight_support_trainers markers = ["o", "s", "^", "v", "<", ">", "p", "P", "*", "x", "+", "|", "_"] @@ -18,14 +18,14 @@ def ts_plot(data, cols, title, output_folder, name, labels=None, subtitles=None, sns.set(font_scale=1.2) fig, axes = plt.subplots(len(cols), 1, figsize=(plot_width, len(cols) * plot_height)) - for i in range(0, len(cols)): + for i in range(len(cols)): if len(cols) == 1: ax = axes else: ax = axes[i] if isinstance(cols[i], list): # multiple lines - for j in range(0, len(cols[i])): + for j in range(len(cols[i])): sns.lineplot(data=data, x=TIMESTAMP_COL, y=cols[i][j], ax=ax, label=labels[j]) ax.set_title(subtitles[i]) else: @@ -52,19 +52,18 @@ def feature_power_plot(data, model_id, output_type, energy_source, feature_cols, col_num = len(actual_power_cols) width = max(10, col_num * plot_width) fig, axes = plt.subplots(row_num, col_num, figsize=(width, row_num * plot_height)) - for xi in range(0, row_num): + for xi in range(row_num): feature_col = feature_cols[xi] - for yi in range(0, col_num): + for yi in range(col_num): if row_num == 1: if col_num == 1: ax = axes else: ax = axes[yi] + elif col_num == 1: + ax = axes[xi] else: - if col_num == 1: - ax = axes[xi] - else: - ax = axes[xi][yi] + ax = axes[xi][yi] sorted_data = data.sort_values(by=[feature_col]) sns.scatterplot(data=sorted_data, x=feature_col, y=actual_power_cols[yi], ax=ax, label="actual") sns.lineplot(data=sorted_data, x=feature_col, y=predicted_power_cols[yi], ax=ax, label="predicted", color="C1") @@ -72,7 +71,7 @@ def feature_power_plot(data, model_id, output_type, energy_source, feature_cols, ax.set_title(actual_power_cols[yi]) if yi == 0: ax.set_ylabel("Power (W)") - title = "{} {} prediction correlation \n by {}".format(energy_source, output_type, model_id) + title = f"{energy_source} {output_type} prediction correlation \n by {model_id}" plt.suptitle(title, x=0.5, y=0.99) plt.tight_layout() filename = os.path.join(output_folder, name + ".png") @@ -96,7 +95,7 @@ def summary_plot(args, energy_source, summary_df, output_folder, name): energy_components = PowerSourceMap[energy_source] col_num = len(energy_components) fig, axes = plt.subplots(col_num, 1, figsize=(plot_width, plot_height * col_num)) - for i in range(0, col_num): + for i in range(col_num): component = energy_components[i] data = summary_df[(summary_df["energy_source"] == energy_source) & (summary_df["energy_component"] == component)] data = data.sort_values(by=["Feature Group", "MAE"]) @@ -111,7 +110,7 @@ def summary_plot(args, energy_source, summary_df, output_folder, name): if i < col_num - 1: ax.set_xlabel("") ax.legend(bbox_to_anchor=(1.05, 1.05)) - plt.suptitle("{} {} error".format(energy_source, args.output_type)) + plt.suptitle(f"{energy_source} {args.output_type} error") plt.tight_layout() filename = os.path.join(output_folder, name + ".png") fig.savefig(filename) @@ -134,7 +133,7 @@ def metadata_plot(args, energy_source, metadata_df, output_folder, name): energy_components = PowerSourceMap[energy_source] col_num = len(energy_components) fig, axes = plt.subplots(col_num, 1, figsize=(plot_width, plot_height * col_num)) - for i in range(0, col_num): + for i in range(col_num): component = energy_components[i] metadata_df = metadata_df.sort_values(by="feature_group") if col_num == 1: @@ -149,7 +148,7 @@ def metadata_plot(args, energy_source, metadata_df, output_folder, name): if i < col_num - 1: ax.set_xlabel("") # ax.legend(bbox_to_anchor=(1.05, 1.05)) - plt.suptitle("Pipieline metadata of {} {}".format(energy_source.upper(), args.output_type)) + plt.suptitle(f"Pipieline metadata of {energy_source.upper()} {args.output_type}") plt.tight_layout() plt.legend(frameon=False) filename = os.path.join(output_folder, name + ".png") @@ -174,7 +173,7 @@ def power_curve_plot(args, data_path, energy_source, output_folder, name): def _get_model(model_toppath, trainer, model_node_type, output_type, name, energy_source): feature_group = FeatureGroup.BPFOnly - model_name = "{}_{}".format(trainer, model_node_type) + model_name = f"{trainer}_{model_node_type}" group_path = get_model_group_path(model_toppath, output_type, feature_group, energy_source, name) model_path = os.path.join(group_path, model_name) model = load_model(model_path) @@ -204,11 +203,12 @@ def _load_all_models(model_toppath, output_type, name, node_types, energy_source def _plot_models(models, cpu_ms_max, energy_source, output_folder, name, max_plot=15, cpu_time_bin_num=10, sample_num=20): - from kepler_model.util.train_types import BPF_FEATURES import numpy as np import pandas as pd import seaborn as sns + from kepler_model.util.train_types import BPF_FEATURES + sns.set_palette("Paired") import matplotlib.pyplot as plt @@ -253,7 +253,7 @@ def _plot_models(models, cpu_ms_max, energy_source, output_folder, name, max_plo ax = axes[axes_index // num_cols][axes_index % num_cols] node_type = data_with_prediction_index[0] data_with_prediction = data_with_prediction_index[1] - sns.lineplot(data=data_with_prediction, x=main_feature_col, y=predicted_col[energy_source], label="type={}".format(node_type), marker=markers[index], ax=ax) + sns.lineplot(data=data_with_prediction, x=main_feature_col, y=predicted_col[energy_source], label=f"type={node_type}", marker=markers[index], ax=ax) index += 1 index = index % len(markers) if index % max_plot == 0: diff --git a/src/kepler_model/cmd/cmd_util.py b/src/kepler_model/cmd/cmd_util.py index 49e7fb78..f5e7ec24 100644 --- a/src/kepler_model/cmd/cmd_util.py +++ b/src/kepler_model/cmd/cmd_util.py @@ -1,12 +1,17 @@ -import os import datetime -import pandas as pd +import os +import pandas as pd -from kepler_model.util.prom_types import node_info_column, prom_responses_to_results, SOURCE_COL, energy_component_to_query -from kepler_model.util.train_types import ModelOutputType, FeatureGroup, PowerSourceMap -from kepler_model.util.loader import load_json, get_pipeline_path, default_node_type +from kepler_model.util.loader import default_node_type, get_pipeline_path, load_json +from kepler_model.util.prom_types import ( + SOURCE_COL, + energy_component_to_query, + node_info_column, + prom_responses_to_results, +) from kepler_model.util.saver import assure_path, save_csv +from kepler_model.util.train_types import FeatureGroup, ModelOutputType, PowerSourceMap UTC_OFFSET_TIMEDELTA = datetime.datetime.utcnow() - datetime.datetime.now() @@ -14,12 +19,12 @@ def print_file_to_stdout(data_path, args): file_path = os.path.join(data_path, args.output) try: - with open(file_path, "r") as file: + with open(file_path) as file: contents = file.read() print(contents) except FileNotFoundError: print(f"Error: Output '{file_path}' not found.") - except IOError: + except OSError: print(f"Error: Unable to read output '{file_path}'.") @@ -41,7 +46,7 @@ def extract_time(data_path, benchmark_filename): def save_query_results(data_path, output_filename, query_response): query_results = prom_responses_to_results(query_response) - save_path = os.path.join(data_path, "{}_csv".format(output_filename)) + save_path = os.path.join(data_path, f"{output_filename}_csv") assure_path(save_path) for query, data in query_results.items(): save_csv(save_path, query, data) @@ -80,13 +85,13 @@ def summary_validation(validate_df): no_data_df = target_df[target_df["count"] == 0] zero_data_df = target_df[target_df[">0"] == 0] valid_df = target_df[target_df[">0"] > 0] - print("==== {} ====".format(metric)) + print(f"==== {metric} ====") if len(no_data_df) > 0: print("{} pods: \tNo data for {}".format(len(no_data_df), pd.unique(no_data_df["scenarioID"]))) if len(zero_data_df) > 0: print("{} pods: \tZero data for {}".format(len(zero_data_df), pd.unique(zero_data_df["scenarioID"]))) - print("{} pods: \tValid\n".format(len(valid_df))) + print(f"{len(valid_df)} pods: \tValid\n") print("Valid data points:") print("Empty" if len(valid_df[">0"]) == 0 else valid_df.groupby(["scenarioID"]).sum()[[">0"]]) for metric, query in metric_to_validate_power.items(): @@ -246,7 +251,7 @@ def check_ot_fg(args, valid_fg): try: fg = FeatureGroup[args.feature_group] if args.feature_group not in valid_fg_name_list: - print("feature group: {} is not available in your data. please choose from the following list: {}".format(args.feature_group, valid_fg_name_list)) + print(f"feature group: {args.feature_group} is not available in your data. please choose from the following list: {valid_fg_name_list}") exit() except KeyError: print("invalid feature group: {}. valid feature group are {}.".format((args.feature_group, [fg.name for fg in valid_fg]))) @@ -268,14 +273,21 @@ def assert_train(trainer, data, energy_components): try: output = trainer.predict(node_type, component, X_values) if output is not None: - assert len(output) == len(X_values), "length of predicted values != features ({}!={})".format(len(output), len(X_values)) + assert len(output) == len(X_values), f"length of predicted values != features ({len(output)}!={len(X_values)})" except sklearn.exceptions.NotFittedError: pass def get_isolator(data_path, isolator, profile, pipeline_name, target_hints, bg_hints, abs_pipeline_name, replace_node_type=default_node_type): pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name) - from kepler_model.train import MinIdleIsolator, NoneIsolator, DefaultProfiler, ProfileBackgroundIsolator, TrainIsolator, generate_profiles + from kepler_model.train import ( + DefaultProfiler, + MinIdleIsolator, + NoneIsolator, + ProfileBackgroundIsolator, + TrainIsolator, + generate_profiles, + ) supported_isolator = { MinIdleIsolator().get_name(): MinIdleIsolator(), @@ -306,13 +318,12 @@ def get_isolator(data_path, isolator, profile, pipeline_name, target_hints, bg_h if abs_pipeline_name != "": trainer_isolator = TrainIsolator(idle_data=idle_data, profiler=DefaultProfiler, target_hints=target_hints, bg_hints=bg_hints, abs_pipeline_name=abs_pipeline_name) supported_isolator[trainer_isolator.get_name()] = trainer_isolator - else: - if abs_pipeline_name != "": - trainer_isolator = TrainIsolator(target_hints=target_hints, bg_hints=bg_hints, abs_pipeline_name=abs_pipeline_name) - supported_isolator[trainer_isolator.get_name()] = trainer_isolator + elif abs_pipeline_name != "": + trainer_isolator = TrainIsolator(target_hints=target_hints, bg_hints=bg_hints, abs_pipeline_name=abs_pipeline_name) + supported_isolator[trainer_isolator.get_name()] = trainer_isolator if isolator not in supported_isolator: - print("isolator {} is not supported. supported isolator: {}".format(isolator, supported_isolator.keys())) + print(f"isolator {isolator} is not supported. supported isolator: {supported_isolator.keys()}") return None return supported_isolator[isolator] @@ -322,7 +333,7 @@ def get_extractor(extractor): supported_extractor = {DefaultExtractor().get_name(): DefaultExtractor(), SmoothExtractor().get_name(): SmoothExtractor()} if extractor not in supported_extractor: - print("extractor {} is not supported. supported extractor: {}".format(extractor, supported_extractor.keys())) + print(f"extractor {extractor} is not supported. supported extractor: {supported_extractor.keys()}") return None return supported_extractor[extractor] diff --git a/src/kepler_model/cmd/main.py b/src/kepler_model/cmd/main.py index cbafa814..f9e3aaf4 100644 --- a/src/kepler_model/cmd/main.py +++ b/src/kepler_model/cmd/main.py @@ -1,9 +1,10 @@ -import os -import sys import argparse import datetime -import pandas as pd +import os +import sys +import threading +import pandas as pd from kepler_model.train import load_class from kepler_model.train.exporter import exporter @@ -11,17 +12,66 @@ from kepler_model.train.prom.prom_query import _range_queries from kepler_model.util.config import ERROR_KEY, model_toppath from kepler_model.util.extract_types import get_expected_power_columns -from kepler_model.util.loader import default_train_output_pipeline, load_json, load_pipeline_metadata, get_pipeline_path, get_model_group_path, list_pipelines, list_model_names, load_metadata, load_csv, get_preprocess_folder, get_general_filename, load_machine_spec -from kepler_model.util.prom_types import PROM_SERVER, PROM_QUERY_INTERVAL, PROM_QUERY_STEP, PROM_QUERY_START_TIME, PROM_QUERY_END_TIME, PROM_HEADERS, PROM_SSL_DISABLE, PROM_THIRDPARTY_METRICS -from kepler_model.util.prom_types import get_valid_feature_group_from_queries -from kepler_model.util.prom_types import metric_prefix as KEPLER_METRIC_PREFIX, prom_responses_to_results, TIMESTAMP_COL, feature_to_query, update_thirdparty_metrics, node_info_column -from kepler_model.util.saver import save_json, save_csv, save_train_args, _pipeline_model_metadata_filename, _power_curve_filename -from kepler_model.util.train_types import ModelOutputType, FeatureGroups, is_single_source_feature_group, all_feature_groups, default_trainers, PowerSourceMap - -from .cmd_plot import ts_plot, feature_power_plot, summary_plot, metadata_plot, power_curve_plot -from .cmd_util import extract_time, save_query_results, get_validate_df, summary_validation, get_extractor, check_ot_fg, get_pipeline, assert_train, get_isolator, UTC_OFFSET_TIMEDELTA - -import threading +from kepler_model.util.loader import ( + default_train_output_pipeline, + get_general_filename, + get_model_group_path, + get_pipeline_path, + get_preprocess_folder, + list_model_names, + list_pipelines, + load_csv, + load_json, + load_machine_spec, + load_metadata, + load_pipeline_metadata, +) +from kepler_model.util.prom_types import ( + PROM_HEADERS, + PROM_QUERY_END_TIME, + PROM_QUERY_INTERVAL, + PROM_QUERY_START_TIME, + PROM_QUERY_STEP, + PROM_SERVER, + PROM_SSL_DISABLE, + PROM_THIRDPARTY_METRICS, + TIMESTAMP_COL, + feature_to_query, + get_valid_feature_group_from_queries, + node_info_column, + prom_responses_to_results, + update_thirdparty_metrics, +) +from kepler_model.util.prom_types import metric_prefix as KEPLER_METRIC_PREFIX +from kepler_model.util.saver import ( + _pipeline_model_metadata_filename, + _power_curve_filename, + save_csv, + save_json, + save_train_args, +) +from kepler_model.util.train_types import ( + FeatureGroups, + ModelOutputType, + PowerSourceMap, + all_feature_groups, + default_trainers, + is_single_source_feature_group, +) + +from .cmd_plot import feature_power_plot, metadata_plot, power_curve_plot, summary_plot, ts_plot +from .cmd_util import ( + UTC_OFFSET_TIMEDELTA, + assert_train, + check_ot_fg, + extract_time, + get_extractor, + get_isolator, + get_pipeline, + get_validate_df, + save_query_results, + summary_validation, +) data_path = "/data" default_output_filename = "output" @@ -67,7 +117,7 @@ def query(args): benchmark_filename = args.input filepath = os.path.join(data_path, benchmark_filename + ".json") if os.path.isfile(filepath): - print("Query from {}.".format(benchmark_filename)) + print(f"Query from {benchmark_filename}.") start, end = extract_time(data_path, benchmark_filename) if start is None or end is None: if args.benchmark: @@ -77,12 +127,12 @@ def query(args): exit() if args.start_time != "" and args.end_time != "": # by [start time, end time] - print("Query from start_time {} to end_time {}.".format(args.start_time, args.end_time)) + print(f"Query from start_time {args.start_time} to end_time {args.end_time}.") start = datetime.datetime.strptime(args.start_time, "%Y-%m-%dT%H:%M:%SZ") end = datetime.datetime.strptime(args.end_time, "%Y-%m-%dT%H:%M:%SZ") else: # by interval - print("Query last {} interval.".format(args.interval)) + print(f"Query last {args.interval} interval.") end = datetime.datetime.now(datetime.timezone.utc) start = end - datetime.timedelta(seconds=args.interval) # save benchmark @@ -98,12 +148,12 @@ def query(args): queries = None if args.thirdparty_metrics != "": queries = [m for m in available_metrics if args.metric_prefix in m or m in args.thirdparty_metrics] - elif PROM_THIRDPARTY_METRICS != [""]: + elif [""] != PROM_THIRDPARTY_METRICS: queries = [m for m in available_metrics if args.metric_prefix in m or m in PROM_THIRDPARTY_METRICS] else: queries = [m for m in available_metrics if args.metric_prefix in m] - print("Start {} End {}".format(start, end)) + print(f"Start {start} End {end}") response = _range_queries(prom, queries, start, end, args.step, None) save_json(path=data_path, name=args.output, data=response) if args.to_csv: @@ -163,12 +213,12 @@ def extract(args): # Inject thirdparty_metrics to FeatureGroup if args.thirdparty_metrics != "": update_thirdparty_metrics(args.thirdparty_metrics) - elif PROM_THIRDPARTY_METRICS != [""]: + elif [""] != PROM_THIRDPARTY_METRICS: update_thirdparty_metrics(PROM_THIRDPARTY_METRICS) valid_fg = get_valid_feature_group_from_queries([query for query in query_results.keys() if len(query_results[query]) > 1]) ot, fg = check_ot_fg(args, valid_fg) if fg is None or ot is None: - print("feature group {} or model output type {} is wrong. (valid feature group: {})".format(args.feature_group, args.output_type, valid_fg)) + print(f"feature group {args.feature_group} or model output type {args.output_type} is wrong. (valid feature group: {valid_fg})") exit() energy_components = PowerSourceMap[args.energy_source] @@ -281,12 +331,12 @@ def train_from_data(args): # Inject thirdparty_metrics to FeatureGroup if args.thirdparty_metrics != "": update_thirdparty_metrics(args.thirdparty_metrics) - elif PROM_THIRDPARTY_METRICS != [""]: + elif [""] != PROM_THIRDPARTY_METRICS: update_thirdparty_metrics(PROM_THIRDPARTY_METRICS) valid_fg = [fg_key for fg_key in FeatureGroups.keys()] ot, fg = check_ot_fg(args, valid_fg) if fg is None or ot is None: - print("feature group {} or model output type {} is wrong. (valid feature group: {})".format(args.feature_group, args.output_type, all_feature_groups)) + print(f"feature group {args.feature_group} or model output type {args.output_type} is wrong. (valid feature group: {all_feature_groups})") exit() energy_components = PowerSourceMap[args.energy_source] @@ -310,7 +360,7 @@ def train_from_data(args): new_spec = NodeTypeSpec() new_spec.load(machine_spec_json) node_type = node_collection.index_train_machine(machine_id, new_spec) - print("Replace {} with {}".format(node_info_column, node_type)) + print(f"Replace {node_info_column} with {node_type}") data[node_info_column] = int(node_type) if node_type is None: @@ -361,6 +411,7 @@ def train_from_data(args): def train(args): import warnings + from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning) @@ -372,7 +423,7 @@ def train(args): # Inject thirdparty_metrics to FeatureGroup if args.thirdparty_metrics != "": update_thirdparty_metrics(args.thirdparty_metrics) - elif PROM_THIRDPARTY_METRICS != [""]: + elif [""] != PROM_THIRDPARTY_METRICS: update_thirdparty_metrics(PROM_THIRDPARTY_METRICS) pipeline_name = default_train_output_pipeline @@ -425,7 +476,7 @@ def train(args): energy_components = PowerSourceMap[energy_source] for feature_group in valid_feature_groups: success, abs_data, dyn_data = pipeline.process_multiple_query(input_query_results_list, energy_components, energy_source, feature_group=feature_group.name, replace_node_type=node_type) - assert success, "failed to process pipeline {}".format(pipeline.name) + assert success, f"failed to process pipeline {pipeline.name}" for trainer in pipeline.trainers: if trainer.feature_group == feature_group and trainer.energy_source == energy_source: if trainer.node_level and abs_data is not None: @@ -439,7 +490,7 @@ def train(args): if dyn_data is not None: save_csv(data_saved_path, get_general_filename("preprocess", energy_source, feature_group, ModelOutputType.DynPower, args.extractor, args.isolator), dyn_data) - print("=========== Train {} Summary ============".format(energy_source)) + print(f"=========== Train {energy_source} Summary ============") # save args argparse_dict = vars(args) save_train_args(pipeline.path, argparse_dict) @@ -488,12 +539,12 @@ def estimate(args): print("must give input filename (query response) via --input for estimation.") exit() - from kepler_model.estimate import load_model, default_predicted_col_func, compute_error + from kepler_model.estimate import compute_error, default_predicted_col_func, load_model # Inject thirdparty_metrics to FeatureGroup if args.thirdparty_metrics != "": update_thirdparty_metrics(args.thirdparty_metrics) - elif PROM_THIRDPARTY_METRICS != [""]: + elif [""] != PROM_THIRDPARTY_METRICS: update_thirdparty_metrics(PROM_THIRDPARTY_METRICS) inputs = args.input.split(",") @@ -527,11 +578,11 @@ def estimate(args): pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name) pipeline_metadata = load_metadata(pipeline_path) if pipeline_metadata is None: - print("no metadata for pipeline {}.".format(pipeline_name)) + print(f"no metadata for pipeline {pipeline_name}.") continue pipeline = get_pipeline(data_path, pipeline_name, pipeline_metadata["extractor"], args.profile, args.target_hints, args.bg_hints, args.abs_pipeline_name, pipeline_metadata["isolator"], pipeline_metadata["abs_trainers"], pipeline_metadata["dyn_trainers"], energy_sources, valid_fg) if pipeline is None: - print("cannot get pipeline {}.".format(pipeline_name)) + print(f"cannot get pipeline {pipeline_name}.") continue for fg in valid_fg: print(" Feature Group: ", fg) @@ -542,7 +593,7 @@ def estimate(args): model_names = list_model_names(group_path) if args.model_name: if args.model_name not in model_names: - print("model: {} is not availble in pipeline {}, continue. available models are {}".format(args.model_name, pipeline_name, model_names)) + print(f"model: {args.model_name} is not availble in pipeline {pipeline_name}, continue. available models are {model_names}") continue model_names = [args.model_name] for model_name in model_names: @@ -577,7 +628,7 @@ def estimate(args): best_result = data_with_prediction.copy() best_model_path = model_path best_mae = max_mae - print(" Model {}: ".format(model_name), max_mae) + print(f" Model {model_name}: ", max_mae) # save best result if best_model_path is not None: @@ -589,16 +640,16 @@ def estimate(args): # save model import shutil - best_model = "{}_model".format(energy_source) + best_model = f"{energy_source}_model" if not args.id: # not only for export shutil.make_archive(os.path.join(output_folder, best_model), "zip", best_model_path) # save result - estimation_result = "{}_estimation_result".format(energy_source) + estimation_result = f"{energy_source}_estimation_result" save_csv(output_folder, estimation_result, best_result) best_result_map[energy_source] = best_result path_splits = best_model_path.split("/") - best_model_id_map[energy_source] = "{} using {}".format(path_splits[-1], path_splits[-2]) + best_model_id_map[energy_source] = f"{path_splits[-1]} using {path_splits[-2]}" return best_result_map, power_labels_map, best_model_id_map, pd.DataFrame(summary_items) @@ -652,21 +703,22 @@ def plot(args): return data = load_csv(data_saved_path, data_filename) if data is None: - print("cannot load data from {}/{}".format(data_saved_path, data_filename)) + print(f"cannot load data from {data_saved_path}/{data_filename}") continue feature_plot += [fg.name] feature_cols = FeatureGroups[fg] power_cols = [col for col in data.columns if "power" in col] feature_data = data.groupby([TIMESTAMP_COL]).sum() - ts_plot(feature_data, feature_cols, "Feature group: {}".format(fg.name), output_folder, data_filename) + ts_plot(feature_data, feature_cols, f"Feature group: {fg.name}", output_folder, data_filename) if not energy_plot: power_data = data.groupby([TIMESTAMP_COL]).max() data_filename = get_general_filename(args.target_data, energy_source, None, ot, args.extractor, args.isolator) - ts_plot(power_data, power_cols, "Power source: {}".format(energy_source), output_folder, data_filename, ylabel="Power (W)") + ts_plot(power_data, power_cols, f"Power source: {energy_source}", output_folder, data_filename, ylabel="Power (W)") elif args.target_data == "estimate": - from kepler_model.estimate import default_predicted_col_func from sklearn.preprocessing import MaxAbsScaler + from kepler_model.estimate import default_predicted_col_func + best_result_map, power_labels_map, best_model_id_map, summary_df = estimate(args) print(summary_df) for energy_source, best_restult in best_result_map.items(): @@ -690,21 +742,22 @@ def plot(args): predicted_power_cols += [predicted_power_colname] data_filename = get_general_filename(args.target_data, energy_source, fg, ot, args.extractor, args.isolator) # plot prediction - ts_plot(data, cols, "{} {} Prediction Result \n by {}".format(energy_source, ot.name, model_id), output_folder, "{}_{}".format(data_filename, model_id), subtitles=subtitles, labels=plot_labels, ylabel="Power (W)") + ts_plot(data, cols, f"{energy_source} {ot.name} Prediction Result \n by {model_id}", output_folder, f"{data_filename}_{model_id}", subtitles=subtitles, labels=plot_labels, ylabel="Power (W)") # plot correlation to utilization if feature group is set if fg is not None: feature_cols = FeatureGroups[fg] scaler = MaxAbsScaler() data[feature_cols] = best_restult[[TIMESTAMP_COL] + feature_cols].groupby([TIMESTAMP_COL]).sum().sort_index() # plot raw feature data to confirm min-max value - ts_plot(data, feature_cols, "Features {}".format(fg), output_folder, "{}_{}".format(data_filename, fg), labels=None, subtitles=None, ylabel=None) + ts_plot(data, feature_cols, f"Features {fg}", output_folder, f"{data_filename}_{fg}", labels=None, subtitles=None, ylabel=None) data[feature_cols] = scaler.fit_transform(data[feature_cols]) - feature_power_plot(data, model_id, ot.name, energy_source, feature_cols, actual_power_cols, predicted_power_cols, output_folder, "{}_{}_corr".format(data_filename, model_id)) + feature_power_plot(data, model_id, ot.name, energy_source, feature_cols, actual_power_cols, predicted_power_cols, output_folder, f"{data_filename}_{model_id}_corr") elif args.target_data == "error": - from kepler_model.estimate import default_predicted_col_func from sklearn.preprocessing import MaxAbsScaler + from kepler_model.estimate import default_predicted_col_func + _, _, _, summary_df = estimate(args) for energy_source in energy_sources: data_filename = get_general_filename(args.target_data, energy_source, fg, ot, args.extractor, args.isolator) @@ -870,11 +923,11 @@ def plot_scenario(args): feature_cols = FeatureGroups[fg] power_cols = [col for col in data.columns if "power" in col] feature_data = data.groupby([TIMESTAMP_COL]).sum() - ts_plot(feature_data, feature_cols, "Feature group: {} ({})".format(fg.name, args.scenario), output_folder, data_filename) + ts_plot(feature_data, feature_cols, f"Feature group: {fg.name} ({args.scenario})", output_folder, data_filename) if not energy_plot: power_data = data.groupby([TIMESTAMP_COL]).max() data_filename = get_general_filename(args.target_data, energy_source, None, ot, args.extractor, args.isolator) + "_" + args.scenario - ts_plot(power_data, power_cols, "Power source: {} ({})".format(energy_source, args.scenario), output_folder, data_filename, ylabel="Power (W)") + ts_plot(power_data, power_cols, f"Power source: {energy_source} ({args.scenario})", output_folder, data_filename, ylabel="Power (W)") def run(): @@ -946,8 +999,8 @@ def run(): if not os.path.exists(data_path): if args.command == "query": os.makedirs(data_path) - print("create new folder for data: {}".format(data_path)) + print(f"create new folder for data: {data_path}") else: - print('{0} not exists. For docker run, {0} must be mount, add -v "$(pwd)":{0}. For native run, set DATAPATH'.format(data_path)) + print(f'{data_path} not exists. For docker run, {data_path} must be mount, add -v "$(pwd)":{data_path}. For native run, set DATAPATH') exit() getattr(sys.modules[__name__], args.command)(args) diff --git a/src/kepler_model/estimate/__init__.py b/src/kepler_model/estimate/__init__.py index a23ad142..3162a26d 100644 --- a/src/kepler_model/estimate/__init__.py +++ b/src/kepler_model/estimate/__init__.py @@ -1,17 +1,16 @@ from .model.estimate_common import compute_error - from .model.model import ( - load_model, - get_background_containers, + default_idle_predicted_col_func, default_predicted_col_func, - get_predicted_power_colname, - get_predicted_background_power_colname, + get_background_containers, get_dynamic_power_colname, - get_predicted_dynamic_power_colname, - get_predicted_dynamic_background_power_colname, get_label_power_colname, + get_predicted_background_power_colname, + get_predicted_dynamic_background_power_colname, + get_predicted_dynamic_power_colname, + get_predicted_power_colname, get_reconstructed_power_colname, - default_idle_predicted_col_func, + load_model, ) __all__ = [ diff --git a/src/kepler_model/estimate/archived_model.py b/src/kepler_model/estimate/archived_model.py index 86d18dbb..a3f05766 100644 --- a/src/kepler_model/estimate/archived_model.py +++ b/src/kepler_model/estimate/archived_model.py @@ -1,9 +1,9 @@ import requests from kepler_model.estimate.model_server_connector import unpack -from kepler_model.util.train_types import ModelOutputType from kepler_model.util.config import get_init_model_url from kepler_model.util.loader import load_metadata +from kepler_model.util.train_types import ModelOutputType failed_list = [] @@ -70,7 +70,7 @@ def get_achived_model(power_request): if url == "": print("no URL set for ", output_type_name, power_request.energy_source) return None - print("try getting archieved model from URL: {} for {}".format(url, output_type_name)) + print(f"try getting archieved model from URL: {url} for {output_type_name}") response = requests.get(url) print(response) if response.status_code != 200: diff --git a/src/kepler_model/estimate/estimator.py b/src/kepler_model/estimate/estimator.py index b492ba62..00a23786 100644 --- a/src/kepler_model/estimate/estimator.py +++ b/src/kepler_model/estimate/estimator.py @@ -1,20 +1,20 @@ import json +import logging import os import shutil +import signal +import socket import sys -import click -import logging +import click import pandas as pd -import socket -import signal -from kepler_model.estimate.model_server_connector import make_request, is_model_server_enabled from kepler_model.estimate.archived_model import get_achived_model from kepler_model.estimate.model.model import load_downloaded_model +from kepler_model.estimate.model_server_connector import is_model_server_enabled, make_request +from kepler_model.util.config import SERVE_SOCKET, download_path, set_env_from_model_config from kepler_model.util.loader import get_download_output_path -from kepler_model.util.config import set_env_from_model_config, SERVE_SOCKET, download_path -from kepler_model.util.train_types import is_output_type_supported, ModelOutputType +from kepler_model.util.train_types import ModelOutputType, is_output_type_supported ############################################### # power request @@ -85,7 +85,7 @@ def handle_request(data: str) -> dict: # find from config output_path = get_achived_model(power_request) if output_path is None: - msg = "failed to get model from request {}".format(data) + msg = f"failed to get model from request {data}" logger.error(msg) return {"powers": dict(), "msg": msg} logger.info(f"load model from config: {output_path}") diff --git a/src/kepler_model/estimate/model/curvefit_model.py b/src/kepler_model/estimate/model/curvefit_model.py index 1e0f72f7..2bfc4806 100644 --- a/src/kepler_model/estimate/model/curvefit_model.py +++ b/src/kepler_model/estimate/model/curvefit_model.py @@ -1,8 +1,13 @@ import collections.abc +from kepler_model.estimate.model.estimate_common import ( + is_component_model, + load_model_by_json, + load_model_by_pickle, + transform_and_predict, +) from kepler_model.util import ModelOutputType -from kepler_model.util.train_types import main_feature, get_valid_feature_groups -from kepler_model.estimate.model.estimate_common import transform_and_predict, load_model_by_pickle, load_model_by_json, is_component_model +from kepler_model.util.train_types import get_valid_feature_groups, main_feature class CurveFitModelEstimator: diff --git a/src/kepler_model/estimate/model/estimate_common.py b/src/kepler_model/estimate/model/estimate_common.py index b0e8c369..565fafe5 100644 --- a/src/kepler_model/estimate/model/estimate_common.py +++ b/src/kepler_model/estimate/model/estimate_common.py @@ -1,9 +1,10 @@ import os + import cpuinfo import numpy as np +from sklearn.metrics import mean_absolute_error, mean_squared_error -from sklearn.metrics import mean_squared_error, mean_absolute_error -from kepler_model.util.loader import load_pkl, load_json +from kepler_model.util.loader import load_json, load_pkl keras_enabled = True cpu_info = cpuinfo.get_cpu_info() @@ -32,7 +33,7 @@ def transform_and_predict(model, datapoint): y[y < 0] = 0 y = y.tolist() except Exception as e: - msg = "{}\n".format(e) + msg = f"{e}\n" y = [] return y, msg diff --git a/src/kepler_model/estimate/model/keras_model.py b/src/kepler_model/estimate/model/keras_model.py index d26be159..ce3d75f6 100644 --- a/src/kepler_model/estimate/model/keras_model.py +++ b/src/kepler_model/estimate/model/keras_model.py @@ -1,5 +1,11 @@ +from kepler_model.estimate.model.estimate_common import ( + is_component_model, + load_model_by_json, + load_model_by_keras, + load_model_by_pickle, + transform_and_predict, +) from kepler_model.estimate.model_server_connector import ModelOutputType -from kepler_model.estimate.model.estimate_common import load_model_by_pickle, load_model_by_keras, load_model_by_json, transform_and_predict, is_component_model class KerasModelEstimator: diff --git a/src/kepler_model/estimate/model/model.py b/src/kepler_model/estimate/model/model.py index 4b435570..2e8a657b 100644 --- a/src/kepler_model/estimate/model/model.py +++ b/src/kepler_model/estimate/model/model.py @@ -1,14 +1,15 @@ import json import logging + import pandas as pd -from kepler_model.util.loader import load_metadata, get_download_output_path +from kepler_model.estimate.model.curvefit_model import CurveFitModelEstimator +from kepler_model.estimate.model.scikit_model import ScikitModelEstimator +from kepler_model.estimate.model.xgboost_model import XgboostModelEstimator from kepler_model.util.config import download_path +from kepler_model.util.loader import get_download_output_path, load_metadata from kepler_model.util.prom_types import valid_container_query -from kepler_model.estimate.model.scikit_model import ScikitModelEstimator -from kepler_model.estimate.model.xgboost_model import XgboostModelEstimator -from kepler_model.estimate.model.curvefit_model import CurveFitModelEstimator # from keras_model import KerasModelEstimator logger = logging.getLogger(__name__) @@ -23,11 +24,11 @@ def default_predicted_col_func(energy_component): - return "default_{}_power".format(energy_component) + return f"default_{energy_component}_power" def default_idle_predicted_col_func(energy_component): - return "default_idle_{}_power".format(energy_component) + return f"default_idle_{energy_component}_power" def get_background_containers(idle_data): @@ -35,31 +36,31 @@ def get_background_containers(idle_data): def get_label_power_colname(energy_component): - return "node_{}_power".format(energy_component) + return f"node_{energy_component}_power" def get_predicted_power_colname(energy_component): - return "predicted_container_{}_power".format(energy_component) + return f"predicted_container_{energy_component}_power" def get_predicted_background_power_colname(energy_component): - return "predicted_container_{}_background_power".format(energy_component) + return f"predicted_container_{energy_component}_background_power" def get_dynamic_power_colname(energy_component): - return "container_{}_dynamic_power".format(energy_component) + return f"container_{energy_component}_dynamic_power" def get_predicted_dynamic_power_colname(energy_component): - return "predicted_container_{}_dynamic_power".format(energy_component) + return f"predicted_container_{energy_component}_dynamic_power" def get_predicted_dynamic_background_power_colname(energy_component): - return "predicted_container_{}_dynamic_background_power".format(energy_component) + return f"predicted_container_{energy_component}_dynamic_background_power" def get_reconstructed_power_colname(energy_component): - return "{}_reconstructed_power".format(energy_component) + return f"{energy_component}_reconstructed_power" class Model: @@ -114,18 +115,17 @@ def is_valid_model(self, filters): if attrb == "features": if not self.feature_check(val): return False + elif not hasattr(self, attrb) or getattr(self, attrb) is None: + self.print_log(f"{self.model_name} has no {attrb}") else: - if not hasattr(self, attrb) or getattr(self, attrb) is None: - self.print_log("{} has no {}".format(self.model_name, attrb)) - else: - cmp_val = getattr(self, attrb) - val = float(val) - if attrb == "abs_max_corr": # higher is better - valid = cmp_val >= val - else: # lower is better - valid = cmp_val <= val - if not valid: - return False + cmp_val = getattr(self, attrb) + val = float(val) + if attrb == "abs_max_corr": # higher is better + valid = cmp_val >= val + else: # lower is better + valid = cmp_val <= val + if not valid: + return False return True def feature_check(self, features): @@ -149,7 +149,7 @@ def append_prediction(self, data, predicted_col_func=default_predicted_col_func) return predicted_power_map, data_with_prediction def print_log(self, message): - print("{} model: {}".format(self.model_name, message)) + print(f"{self.model_name} model: {message}") def append_idle_prediction(self, data, predicted_col_func=default_idle_predicted_col_func): idle_data = data.copy() @@ -161,7 +161,7 @@ def append_idle_prediction(self, data, predicted_col_func=default_idle_predicted def load_model(model_path): metadata = load_metadata(model_path) if not metadata: - logger.warn(f"no metadata in {model_path}") + logger.warning(f"no metadata in {model_path}") return None metadata["model_path"] = model_path diff --git a/src/kepler_model/estimate/model/scikit_model.py b/src/kepler_model/estimate/model/scikit_model.py index 90e1e6ee..a6fafb1a 100644 --- a/src/kepler_model/estimate/model/scikit_model.py +++ b/src/kepler_model/estimate/model/scikit_model.py @@ -1,8 +1,13 @@ -from kepler_model.estimate.model.estimate_common import transform_and_predict, load_model_by_pickle, load_model_by_json, is_component_model -from kepler_model.util import ModelOutputType - import collections.abc +from kepler_model.estimate.model.estimate_common import ( + is_component_model, + load_model_by_json, + load_model_by_pickle, + transform_and_predict, +) +from kepler_model.util import ModelOutputType + class ScikitModelEstimator: def __init__(self, model_path, model_name, output_type, model_file, features, fe_files, component_init=False): diff --git a/src/kepler_model/estimate/model/xgboost_model.py b/src/kepler_model/estimate/model/xgboost_model.py index 05a42d8e..0bbec26b 100644 --- a/src/kepler_model/estimate/model/xgboost_model.py +++ b/src/kepler_model/estimate/model/xgboost_model.py @@ -1,8 +1,14 @@ import collections.abc import os + import xgboost as xgb -from kepler_model.estimate.model.estimate_common import transform_and_predict, load_model_by_pickle, load_model_by_json, is_component_model +from kepler_model.estimate.model.estimate_common import ( + is_component_model, + load_model_by_json, + load_model_by_pickle, + transform_and_predict, +) from kepler_model.util import ModelOutputType diff --git a/src/kepler_model/estimate/model_server_connector.py b/src/kepler_model/estimate/model_server_connector.py index 650fd0f5..a50ecae2 100644 --- a/src/kepler_model/estimate/model_server_connector.py +++ b/src/kepler_model/estimate/model_server_connector.py @@ -1,11 +1,16 @@ -import requests +import codecs +import json import os import shutil -import json -import codecs +import requests -from kepler_model.util.config import is_model_server_enabled, get_model_server_req_endpoint, get_model_server_list_endpoint, download_path +from kepler_model.util.config import ( + download_path, + get_model_server_list_endpoint, + get_model_server_req_endpoint, + is_model_server_enabled, +) from kepler_model.util.loader import get_download_output_path from kepler_model.util.train_types import ModelOutputType @@ -56,7 +61,7 @@ def make_request(power_request): try: response = requests.post(get_model_server_req_endpoint(), json=model_request) except Exception as err: - print("cannot make request to {}: {}".format(get_model_server_req_endpoint(), err)) + print(f"cannot make request to {get_model_server_req_endpoint()}: {err}") return None if response.status_code != 200: return None @@ -69,7 +74,7 @@ def list_all_models(): try: response = requests.get(get_model_server_list_endpoint()) except Exception as err: - print("cannot list model: {}".format(err)) + print(f"cannot list model: {err}") return dict() if response.status_code != 200: return dict() diff --git a/src/kepler_model/server/model_server.py b/src/kepler_model/server/model_server.py index c0598d6b..46add84a 100644 --- a/src/kepler_model/server/model_server.py +++ b/src/kepler_model/server/model_server.py @@ -1,18 +1,46 @@ -import sys -import os import codecs -import shutil import logging +import os +import shutil +import sys -import requests import click -from flask import Flask, request, json, make_response, send_file - -from kepler_model.util.train_types import get_valid_feature_groups, ModelOutputType, FeatureGroups, FeatureGroup, PowerSourceMap, weight_support_trainers -from kepler_model.util.config import getConfig, model_toppath, ERROR_KEY, MODEL_SERVER_MODEL_REQ_PATH, MODEL_SERVER_MODEL_LIST_PATH, initial_pipeline_urls, download_path -from kepler_model.util.loader import parse_filters, is_valid_model, load_json, load_weight, get_model_group_path, get_archived_file, METADATA_FILENAME, CHECKPOINT_FOLDERNAME, get_pipeline_path, any_node_type, is_matched_type, get_largest_candidates +import requests +from flask import Flask, json, make_response, request, send_file + +from kepler_model.train import NodeTypeIndexCollection, NodeTypeSpec +from kepler_model.util.config import ( + ERROR_KEY, + MODEL_SERVER_MODEL_LIST_PATH, + MODEL_SERVER_MODEL_REQ_PATH, + download_path, + getConfig, + initial_pipeline_urls, + model_toppath, +) +from kepler_model.util.loader import ( + CHECKPOINT_FOLDERNAME, + METADATA_FILENAME, + any_node_type, + get_archived_file, + get_largest_candidates, + get_model_group_path, + get_pipeline_path, + is_matched_type, + is_valid_model, + load_json, + load_weight, + parse_filters, +) from kepler_model.util.saver import WEIGHT_FILENAME -from kepler_model.train import NodeTypeSpec, NodeTypeIndexCollection +from kepler_model.util.train_types import ( + FeatureGroup, + FeatureGroups, + ModelOutputType, + PowerSourceMap, + get_valid_feature_groups, + weight_support_trainers, +) logger = logging.getLogger(__name__) @@ -82,26 +110,26 @@ def select_best_model(spec, valid_groupath, filters, energy_source, pipeline_nam candidates = get_largest_candidates(model_names, pipeline_name, nodeCollection, energy_source) logger.info(f"no matched models; selecting from large candidates: {candidates}") if candidates is None: - logger.warn("no large candidates; selecting from all available") + logger.warning("no large candidates; selecting from all available") candidates = model_names for model_name in candidates: model_savepath = os.path.join(valid_groupath, model_name) metadata = load_json(model_savepath, METADATA_FILENAME) if metadata is None or not is_valid_model(metadata, filters) or ERROR_KEY not in metadata: # invalid metadata - logger.warn(f"invalid metadata {is_valid_model(metadata, filters)} : {metadata}") + logger.warning(f"invalid metadata {is_valid_model(metadata, filters)} : {metadata}") continue if weight: response = load_weight(model_savepath) if response is None: # fail to get weight file - logger.warn(f"weight failed: {model_savepath}") + logger.warning(f"weight failed: {model_savepath}") continue else: response = get_archived_file(valid_groupath, model_name) if not os.path.exists(response): # archived model file does not exists - logger.warn(f"archive failed: {response}") + logger.warning(f"archive failed: {response}") continue if best_cadidate is None or best_cadidate[ERROR_KEY] > metadata[ERROR_KEY]: best_cadidate = metadata @@ -141,18 +169,18 @@ def get_model(): best_model = best_candidate best_response = response if best_model is None: - return make_response("cannot find model for {} at the moment".format(model_request), 400) + return make_response(f"cannot find model for {model_request} at the moment", 400) if req.weight: try: response = app.response_class(response=json.dumps(best_response), status=200, mimetype="application/json") return response except ValueError as err: - return make_response("get weight response error: {}".format(err), 400) + return make_response(f"get weight response error: {err}", 400) else: try: return send_file(best_response, as_attachment=True) except ValueError as err: - return make_response("send archived model error: {}".format(err), 400) + return make_response(f"send archived model error: {err}", 400) # get_available_models: return name list of best-candidate pipelines @@ -195,7 +223,7 @@ def get_available_models(): response = app.response_class(response=json.dumps(model_names), status=200, mimetype="application/json") return response except (ValueError, Exception) as err: - return make_response("failed to get best model list: {}".format(err), 400) + return make_response(f"failed to get best model list: {err}", 400) # upack_zip_files: unpack all model.zip files to model folder and copy model.json to model/weight.zip diff --git a/src/kepler_model/train/__init__.py b/src/kepler_model/train/__init__.py index fd8d1762..70b34b97 100644 --- a/src/kepler_model/train/__init__.py +++ b/src/kepler_model/train/__init__.py @@ -2,11 +2,11 @@ from .extractor.extractor import DefaultExtractor from .extractor.smooth_extractor import SmoothExtractor -from .profiler.profiler import Profiler, generate_profiles -from .profiler.node_type_index import NodeTypeIndexCollection, NodeTypeSpec -from .isolator.isolator import MinIdleIsolator, ProfileBackgroundIsolator, NoneIsolator +from .isolator.isolator import MinIdleIsolator, NoneIsolator, ProfileBackgroundIsolator from .isolator.train_isolator import TrainIsolator from .pipeline import NewPipeline, load_class +from .profiler.node_type_index import NodeTypeIndexCollection, NodeTypeSpec +from .profiler.profiler import Profiler, generate_profiles DefaultProfiler = Profiler(extractor=DefaultExtractor()) diff --git a/src/kepler_model/train/ec2_pipeline.py b/src/kepler_model/train/ec2_pipeline.py index 1bdc9622..89159d71 100644 --- a/src/kepler_model/train/ec2_pipeline.py +++ b/src/kepler_model/train/ec2_pipeline.py @@ -15,19 +15,23 @@ DATAPATH=/path/to/models python cmd/main.py export --pipeline-name ec2-0.7.11 -o /path/to/kepler-model-db/models --publisher sunya-ch --zip=true --collect-date "July 2024" """ -import os import json +import os + import boto3 -from kepler_model.train.profiler.node_type_index import NodeTypeSpec, NodeAttribute -from kepler_model.train.pipeline import NewPipeline from kepler_model.train.extractor import DefaultExtractor from kepler_model.train.isolator.isolator import MinIdleIsolator -from kepler_model.util.prom_types import node_info_column, prom_responses_to_results, get_valid_feature_group_from_queries - -from kepler_model.util.train_types import default_trainer_names, PowerSourceMap -from kepler_model.util.saver import save_json +from kepler_model.train.pipeline import NewPipeline +from kepler_model.train.profiler.node_type_index import NodeTypeSpec from kepler_model.util.config import model_toppath +from kepler_model.util.prom_types import ( + get_valid_feature_group_from_queries, + node_info_column, + prom_responses_to_results, +) +from kepler_model.util.saver import save_json +from kepler_model.util.train_types import PowerSourceMap, default_trainer_names data_path = os.path.join(model_toppath, "..", "data") @@ -52,7 +56,7 @@ def read_response_in_json(key): response = s3.get_object(Bucket=bucket_name, Key=key) global last_modified last_modified = response["LastModified"] - print("{} last modified time: {}".format(key, last_modified)) + print(f"{key} last modified time: {last_modified}") return json.loads(response["Body"].read().decode("utf-8")) diff --git a/src/kepler_model/train/exporter/exporter.py b/src/kepler_model/train/exporter/exporter.py index ac4f5f1a..ef56816f 100644 --- a/src/kepler_model/train/exporter/exporter.py +++ b/src/kepler_model/train/exporter/exporter.py @@ -1,11 +1,17 @@ import datetime -from kepler_model.train.exporter.validator import get_validated_export_items, BestModelCollection -from kepler_model.util.loader import load_metadata, load_node_type_index, get_version_path, get_export_path -from kepler_model.util.saver import save_pipeline_metadata, save_node_type_index -from kepler_model.util.format import time_to_str -from kepler_model.train.exporter.writer import generate_pipeline_page, generate_report_results, generate_pipeline_readme, append_version_readme, get_workload_content +from kepler_model.train.exporter.validator import BestModelCollection, get_validated_export_items +from kepler_model.train.exporter.writer import ( + append_version_readme, + generate_pipeline_page, + generate_pipeline_readme, + generate_report_results, + get_workload_content, +) from kepler_model.util.config import ERROR_KEY +from kepler_model.util.format import time_to_str +from kepler_model.util.loader import get_export_path, get_version_path, load_metadata, load_node_type_index +from kepler_model.util.saver import save_node_type_index, save_pipeline_metadata repo_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models" diff --git a/src/kepler_model/train/exporter/validator.py b/src/kepler_model/train/exporter/validator.py index 97a1809f..d97ee7ab 100644 --- a/src/kepler_model/train/exporter/validator.py +++ b/src/kepler_model/train/exporter/validator.py @@ -1,10 +1,11 @@ import os -import pandas as pd import shutil -from kepler_model.util.train_types import ModelOutputType, PowerSourceMap, FeatureGroup, weight_support_trainers -from kepler_model.util.loader import load_pipeline_metadata, get_model_group_path, load_weight, get_archived_file +import pandas as pd + +from kepler_model.util.loader import get_archived_file, get_model_group_path, load_pipeline_metadata, load_weight from kepler_model.util.saver import save_json +from kepler_model.util.train_types import FeatureGroup, ModelOutputType, PowerSourceMap, weight_support_trainers # mae and mape are exclusive thresholds to balance between absolute error value and relative error value mae_threshold = 10 diff --git a/src/kepler_model/train/exporter/writer.py b/src/kepler_model/train/exporter/writer.py index f5fce4cb..4c3ebf2c 100644 --- a/src/kepler_model/train/exporter/writer.py +++ b/src/kepler_model/train/exporter/writer.py @@ -1,10 +1,10 @@ import os -import pandas as pd +import pandas as pd -from kepler_model.util.loader import load_json, version -from kepler_model.util.saver import assure_path, _pipeline_model_metadata_filename, _power_curve_filename from kepler_model.train.exporter.validator import mae_threshold, mape_threshold +from kepler_model.util.loader import load_json, version +from kepler_model.util.saver import _pipeline_model_metadata_filename, _power_curve_filename, assure_path from kepler_model.util.train_types import ModelOutputType, PowerSourceMap error_report_foldername = "error_report" @@ -16,7 +16,7 @@ def write_markdown(markdown_filepath, markdown_content): # Write the Markdown content to the file markdown_file.write(markdown_content) print(f"Markdown file '{markdown_filepath}' has been created successfully.") - except IOError as e: + except OSError as e: print(f"Cannot write '{markdown_filepath}': {e}") @@ -64,7 +64,7 @@ def get_workload_content(data_path, inputs): # read file directly filepath = os.path.join(data_path, input) if os.path.exists(filepath): - with open(filepath, "r") as file: + with open(filepath) as file: content = file.read() workload_content += """ @@ -85,7 +85,7 @@ def get_workload_content(data_path, inputs): def format_trainer(trainers): trainer_content = "" for trainer in trainers: - trainer_content += " - {}\n".format(trainer) + trainer_content += f" - {trainer}\n" return trainer_content @@ -93,7 +93,7 @@ def generate_pipeline_page(version_path, pipeline_metadata, workload_content, sk doc_path = os.path.join(version_path, ".doc") assure_path(doc_path) pipeline_name = pipeline_metadata["name"] - markdown_filename = "{}.md".format(pipeline_name) + markdown_filename = f"{pipeline_name}.md" markdown_filepath = os.path.join(doc_path, markdown_filename) if skip_if_exist and os.path.exists(markdown_filepath): print(f"Markdown file '{markdown_filepath}' already exists.") @@ -131,7 +131,7 @@ def _error_report_url(export_path, node_type, assure): error_report_folder = os.path.join(export_path, error_report_foldername) if assure: assure_path(error_report_folder) - node_type_file = "node_type_{}.md".format(node_type) + node_type_file = f"node_type_{node_type}.md" return os.path.join(error_report_folder, node_type_file) @@ -164,7 +164,7 @@ def format_error_report(error_dict): for energy_source in sorted(error_dict.keys()): for outputy_type_name in sorted(error_dict[energy_source].keys()): df = error_dict[energy_source][outputy_type_name] - content += "### {} {} model\n\n".format(energy_source, outputy_type_name) + content += f"### {energy_source} {outputy_type_name} model\n\n" if len(df) == 0: content += "No model available\n\n" else: @@ -178,7 +178,7 @@ def generate_report_results(local_export_path, best_model_collections, node_type if best_model_collections[int(node_type)].has_model: markdown_filepath = _error_report_url(local_export_path, node_type, assure=True) error_dict, error_dict_with_weight = get_error_dict(remote_version_path, collection) - markdown_content = "# Validation results on node type {}\n\n".format(node_type) + markdown_content = f"# Validation results on node type {node_type}\n\n" markdown_content += data_to_markdown_table(pd.DataFrame([node_type_index_json[str(node_type)]["attrs"]])) + "\n" # add links @@ -195,8 +195,8 @@ def generate_report_results(local_export_path, best_model_collections, node_type # generate_pipeline_readme - version/pipeline_name/README.md def generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_json, best_model_collections): markdown_filepath = os.path.join(local_export_path, "README.md") - markdown_content = "# {} on v{} Build\n\n".format(pipeline_name, version) - markdown_content += "MAE Threshold = {}, MAPE Threshold = {}%\n\n".format(mae_threshold, int(mape_threshold)) + markdown_content = f"# {pipeline_name} on v{version} Build\n\n" + markdown_content += f"MAE Threshold = {mae_threshold}, MAPE Threshold = {int(mape_threshold)}%\n\n" items = [] for node_type, spec_json in node_type_index_json.items(): if best_model_collections[int(node_type)].has_model: @@ -204,15 +204,15 @@ def generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_j item = {"node type": node_type} item.update(spec_json["attrs"]) item["member size"] = len(spec_json["members"]) - item["error report"] = "[link]({})".format(error_file) + item["error report"] = f"[link]({error_file})" items += [item] df = pd.DataFrame(items) - markdown_content += "Available Node Type: {}\n\n".format(len(df)) + markdown_content += f"Available Node Type: {len(df)}\n\n" # add metadata figures for ot in ModelOutputType: for energy_source in PowerSourceMap.keys(): data_filename = _pipeline_model_metadata_filename(energy_source, ot.name) - markdown_content += "![]({}.png)\n".format(data_filename) + markdown_content += f"![]({data_filename}.png)\n" markdown_content += data_to_markdown_table(df.sort_values(by=["node type"])) # add power curve figures @@ -220,8 +220,8 @@ def generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_j for energy_source in PowerSourceMap.keys(): data_filename = _power_curve_filename(energy_source, ot.name) png_filename = data_filename + ".png" - markdown_content += "## {} ({})\n".format(energy_source, ot.name) - markdown_content += "![]({})\n".format(png_filename) + markdown_content += f"## {energy_source} ({ot.name})\n" + markdown_content += f"![]({png_filename})\n" write_markdown(markdown_filepath, markdown_content) return markdown_filepath diff --git a/src/kepler_model/train/extractor/extractor.py b/src/kepler_model/train/extractor/extractor.py index ebc074cc..a7741220 100644 --- a/src/kepler_model/train/extractor/extractor.py +++ b/src/kepler_model/train/extractor/extractor.py @@ -1,12 +1,30 @@ -import pandas as pd -import numpy as np from abc import ABCMeta, abstractmethod -from kepler_model.util.prom_types import TIMESTAMP_COL, SOURCE_COL, get_energy_unit, usage_ratio_query, node_info_query, energy_component_to_query, feature_to_query, pkg_id_column, container_id_cols, node_info_column -from kepler_model.util.train_types import FeatureGroups, FeatureGroup, SYSTEM_FEATURES -from kepler_model.util.loader import default_node_type -from kepler_model.util.extract_types import container_id_colname, ratio_to_col, component_to_col, get_unit_vals, accelerator_type_colname +import numpy as np +import pandas as pd + from kepler_model.train.extractor.preprocess import drop_zero_column, find_correlations +from kepler_model.util.extract_types import ( + accelerator_type_colname, + component_to_col, + container_id_colname, + get_unit_vals, + ratio_to_col, +) +from kepler_model.util.loader import default_node_type +from kepler_model.util.prom_types import ( + SOURCE_COL, + TIMESTAMP_COL, + container_id_cols, + energy_component_to_query, + feature_to_query, + get_energy_unit, + node_info_column, + node_info_query, + pkg_id_column, + usage_ratio_query, +) +from kepler_model.util.train_types import SYSTEM_FEATURES, FeatureGroup, FeatureGroups # append ratio for each unit @@ -173,7 +191,7 @@ def get_workload_feature_data(self, query_results, features): # separate based on type label grouped = aggr_query_data.groupby([accelerator_type_colname]) for group_name, group_data in grouped: - new_colname = "{}_{}".format(feature, group_name) + new_colname = f"{feature}_{group_name}" cur_accelerator_features.append(new_colname) group_data.rename(columns={query: new_colname}, inplace=True) group_data = group_data[[TIMESTAMP_COL, new_colname]] diff --git a/src/kepler_model/train/extractor/preprocess.py b/src/kepler_model/train/extractor/preprocess.py index e2b58ee4..c2b9d140 100644 --- a/src/kepler_model/train/extractor/preprocess.py +++ b/src/kepler_model/train/extractor/preprocess.py @@ -1,9 +1,9 @@ import numpy as np -from kepler_model.util.train_types import PowerSourceMap -from kepler_model.util.prom_types import TIMESTAMP_COL -from kepler_model.util.extract_types import col_to_component from kepler_model.estimate.model.model import get_label_power_colname +from kepler_model.util.extract_types import col_to_component +from kepler_model.util.prom_types import TIMESTAMP_COL +from kepler_model.util.train_types import PowerSourceMap def drop_zero_column(data, cols): diff --git a/src/kepler_model/train/extractor/smooth_extractor.py b/src/kepler_model/train/extractor/smooth_extractor.py index 9e2776ac..9944dff1 100644 --- a/src/kepler_model/train/extractor/smooth_extractor.py +++ b/src/kepler_model/train/extractor/smooth_extractor.py @@ -1,5 +1,6 @@ +from kepler_model.util.train_types import SYSTEM_FEATURES, FeatureGroup, FeatureGroups + from .extractor import DefaultExtractor, find_correlations -from kepler_model.util.train_types import FeatureGroups, FeatureGroup, SYSTEM_FEATURES class SmoothExtractor(DefaultExtractor): diff --git a/src/kepler_model/train/isolator/isolator.py b/src/kepler_model/train/isolator/isolator.py index 1021c4ce..4dbcdc97 100644 --- a/src/kepler_model/train/isolator/isolator.py +++ b/src/kepler_model/train/isolator/isolator.py @@ -1,12 +1,16 @@ from abc import ABCMeta, abstractmethod + import pandas as pd +from kepler_model.estimate import ( + get_background_containers, + get_predicted_background_power_colname, + get_predicted_power_colname, + get_reconstructed_power_colname, +) +from kepler_model.util.extract_types import all_container_key, col_to_component, container_id_colname, get_num_of_unit +from kepler_model.util.prom_types import TIMESTAMP_COL, get_container_name_from_id, node_info_column from kepler_model.util.train_types import PowerSourceMap -from kepler_model.util.extract_types import container_id_colname, col_to_component, get_num_of_unit, all_container_key -from kepler_model.util.prom_types import TIMESTAMP_COL, node_info_column, get_container_name_from_id - -from kepler_model.estimate import get_background_containers, get_predicted_power_colname, get_predicted_background_power_colname, get_reconstructed_power_colname - container_indexes = [TIMESTAMP_COL, container_id_colname] diff --git a/src/kepler_model/train/isolator/train_isolator.py b/src/kepler_model/train/isolator/train_isolator.py index 0924787d..09971a28 100644 --- a/src/kepler_model/train/isolator/train_isolator.py +++ b/src/kepler_model/train/isolator/train_isolator.py @@ -1,18 +1,26 @@ import os + import numpy as np import pandas as pd -from .isolator import Isolator, isolate_container - -from kepler_model.estimate import load_model, get_predicted_power_colname, get_predicted_background_power_colname, get_dynamic_power_colname, get_reconstructed_power_colname, get_label_power_colname, get_background_containers +from kepler_model.estimate import ( + get_background_containers, + get_dynamic_power_colname, + get_label_power_colname, + get_predicted_background_power_colname, + get_predicted_power_colname, + get_reconstructed_power_colname, + load_model, +) from kepler_model.train.extractor.preprocess import find_correlations, get_extracted_power_labels - from kepler_model.util import PowerSourceMap -from kepler_model.util.train_types import get_valid_feature_groups -from kepler_model.util.prom_types import TIMESTAMP_COL, get_container_name_from_id -from kepler_model.util.extract_types import container_level_index, container_id_colname, col_to_component from kepler_model.util.config import model_toppath -from kepler_model.util.loader import list_all_abs_models, default_train_output_pipeline +from kepler_model.util.extract_types import col_to_component, container_id_colname, container_level_index +from kepler_model.util.loader import default_train_output_pipeline, list_all_abs_models +from kepler_model.util.prom_types import TIMESTAMP_COL, get_container_name_from_id +from kepler_model.util.train_types import get_valid_feature_groups + +from .isolator import Isolator, isolate_container def is_better(curr_min_err, err, curr_max_corr, corr, corr_threshold=0.7): diff --git a/src/kepler_model/train/offline_trainer.py b/src/kepler_model/train/offline_trainer.py index c913c910..2cecddf8 100644 --- a/src/kepler_model/train/offline_trainer.py +++ b/src/kepler_model/train/offline_trainer.py @@ -9,20 +9,19 @@ # python tests/offline_trainer_test.py import importlib +import shutil + +from flask import Flask, make_response, request, send_file -from kepler_model.util.config import model_toppath -from kepler_model.util.loader import get_pipeline_path, default_pipelines -from kepler_model.util.train_types import PowerSourceMap -from kepler_model.util.prom_types import get_valid_feature_group_from_queries, prom_responses_to_results -from kepler_model.train.profiler.profiler import Profiler, generate_profiles from kepler_model.train.extractor.extractor import DefaultExtractor from kepler_model.train.isolator.isolator import ProfileBackgroundIsolator from kepler_model.train.isolator.train_isolator import TrainIsolator from kepler_model.train.pipeline import NewPipeline - -import shutil - -from flask import Flask, request, make_response, send_file +from kepler_model.train.profiler.profiler import Profiler, generate_profiles +from kepler_model.util.config import model_toppath +from kepler_model.util.loader import default_pipelines, get_pipeline_path +from kepler_model.util.prom_types import get_valid_feature_group_from_queries, prom_responses_to_results +from kepler_model.util.train_types import PowerSourceMap serve_port = 8102 @@ -115,14 +114,14 @@ def train(): train_request = request.get_json() req = TrainRequest(**train_request) model = req.get_model() - print("Get Model: {}".format(model)) + print(f"Get Model: {model}") if model is None: - return make_response("Cannot train model {}".format(req.name), 400) + return make_response(f"Cannot train model {req.name}", 400) else: try: return send_file(model, as_attachment=True) except ValueError as err: - return make_response("Send trained model error: {}".format(err), 400) + return make_response(f"Send trained model error: {err}", 400) def run(): diff --git a/src/kepler_model/train/online_trainer.py b/src/kepler_model/train/online_trainer.py index 2b627535..b5a3d76a 100644 --- a/src/kepler_model/train/online_trainer.py +++ b/src/kepler_model/train/online_trainer.py @@ -1,16 +1,15 @@ # TODO: test import time -from kepler_model.train.prom.prom_query import PrometheusClient -from kepler_model.util.prom_types import get_valid_feature_group_from_queries, PROM_QUERY_INTERVAL -from kepler_model.util.config import getConfig -from kepler_model.util.loader import default_train_output_pipeline -from kepler_model.util.train_types import PowerSourceMap, FeatureGroups -from kepler_model.train.pipeline import NewPipeline from kepler_model.train.extractor import DefaultExtractor from kepler_model.train.isolator.isolator import MinIdleIsolator, ProfileBackgroundIsolator +from kepler_model.train.pipeline import NewPipeline from kepler_model.train.profiler.profiler import load_all_profiles - +from kepler_model.train.prom.prom_query import PrometheusClient +from kepler_model.util.config import getConfig +from kepler_model.util.loader import default_train_output_pipeline +from kepler_model.util.prom_types import PROM_QUERY_INTERVAL, get_valid_feature_group_from_queries +from kepler_model.util.train_types import FeatureGroups, PowerSourceMap SAMPLING_INTERVAL = PROM_QUERY_INTERVAL SAMPLING_INTERVAL = getConfig("SAMPLING_INTERVAL", SAMPLING_INTERVAL) diff --git a/src/kepler_model/train/pipeline.py b/src/kepler_model/train/pipeline.py index 80ff6400..cbb12c1e 100644 --- a/src/kepler_model/train/pipeline.py +++ b/src/kepler_model/train/pipeline.py @@ -1,29 +1,26 @@ +import datetime import os -import sys +import shutil import threading -from concurrent.futures import ThreadPoolExecutor -from concurrent.futures import wait +from concurrent.futures import ThreadPoolExecutor, wait import pandas as pd -import shutil -import datetime -from kepler_model.train.profiler.node_type_index import NodeTypeIndexCollection from kepler_model.train.extractor import DefaultExtractor from kepler_model.train.isolator.isolator import MinIdleIsolator - -from kepler_model.util.train_types import PowerSourceMap, FeatureGroups, ModelOutputType +from kepler_model.train.profiler.node_type_index import NodeTypeIndexCollection +from kepler_model.util.config import ERROR_KEY, model_toppath +from kepler_model.util.format import print_bounded_multiline_message, time_to_str +from kepler_model.util.loader import get_all_metadata, get_archived_file, get_metadata_df, get_pipeline_path from kepler_model.util.prom_types import node_info_column -from kepler_model.util.config import model_toppath, ERROR_KEY -from kepler_model.util.loader import get_all_metadata, get_pipeline_path, get_metadata_df, get_archived_file from kepler_model.util.saver import save_pipeline_metadata -from kepler_model.util.format import print_bounded_multiline_message, time_to_str +from kepler_model.util.train_types import FeatureGroups, ModelOutputType, PowerSourceMap def load_class(module_name, class_name): import importlib - module_path = importlib.import_module("kepler_model.train.{}.{}.main".format(module_name, class_name)) + module_path = importlib.import_module(f"kepler_model.train.{module_name}.{class_name}.main") return getattr(module_path, class_name) @@ -66,14 +63,14 @@ def prepare_data(self, input_query_results, energy_components, energy_source, fe if extracted_data is None: self.print_log("cannot extract data") return None, None, None - self.print_log("{} extraction done.".format(feature_group)) + self.print_log(f"{feature_group} extraction done.") abs_data = extracted_data.copy() # 2. get dyn_data isolated_data = self.get_dyn_data(query_results, energy_components, feature_group, energy_source) if isolated_data is None: self.print_log("cannot isolate data") return abs_data, None, power_labels - self.print_log("{} isolation done.".format(feature_group)) + self.print_log(f"{feature_group} isolation done.") dyn_data = isolated_data.copy() return abs_data, dyn_data, power_labels @@ -85,14 +82,14 @@ def prepare_data_from_input_list(self, input_query_results_list, energy_componen for input_query_results in input_query_results_list: extracted_data, isolated_data, extracted_labels = self.prepare_data(input_query_results, energy_components, energy_source, feature_group, aggr) if extracted_data is None: - self.print_log("cannot extract data index={}".format(index)) + self.print_log(f"cannot extract data index={index}") continue abs_data_list += [extracted_data] if power_labels is None: # set power_labels once power_labels = extracted_labels if isolated_data is None: - self.print_log("cannot isolate data index={}".format(index)) + self.print_log(f"cannot isolate data index={index}") continue dyn_data_list += [isolated_data] index += 1 @@ -121,7 +118,7 @@ def _train(self, abs_data, dyn_data, power_labels, energy_source, feature_group) elif dyn_data is not None: future = executor.submit(run_train, trainer, dyn_data, power_labels, pipeline_lock=self.lock) futures += [future] - self.print_log("Waiting for {} trainers to complete...".format(len(futures))) + self.print_log(f"Waiting for {len(futures)} trainers to complete...") wait(futures) # Handle exceptions if any for future in futures: @@ -129,15 +126,15 @@ def _train(self, abs_data, dyn_data, power_labels, energy_source, feature_group) # Handle the exception here print(f"Exception occurred: {future.exception()}") - self.print_log("{}/{} trainers are trained from {} to {}".format(len(futures), len(self.trainers), feature_group, energy_source)) + self.print_log(f"{len(futures)}/{len(self.trainers)} trainers are trained from {feature_group} to {energy_source}") def process(self, input_query_results, energy_components, energy_source, feature_group, aggr=True, replace_node_type=None): - self.print_log("{} start processing.".format(feature_group)) + self.print_log(f"{feature_group} start processing.") abs_data, dyn_data, power_labels = self.prepare_data(input_query_results, energy_components, energy_source, feature_group, aggr) if abs_data is None and dyn_data is None: return False, None, None if replace_node_type is not None: - self.print_log("Replace Node Type: {}".format(replace_node_type)) + self.print_log(f"Replace Node Type: {replace_node_type}") abs_data[node_info_column] = replace_node_type dyn_data[node_info_column] = replace_node_type self._train(abs_data, dyn_data, power_labels, energy_source, feature_group) @@ -150,7 +147,7 @@ def process_multiple_query(self, input_query_results_list, energy_components, en if (abs_data is None or len(abs_data) == 0) and (dyn_data is None or len(dyn_data) == 0): return False, None, None if replace_node_type is not None: - self.print_log("Replace Node Type: {}".format(replace_node_type)) + self.print_log(f"Replace Node Type: {replace_node_type}") abs_data[node_info_column] = replace_node_type dyn_data[node_info_column] = replace_node_type self._train(abs_data, dyn_data, power_labels, energy_source, feature_group) @@ -159,7 +156,7 @@ def process_multiple_query(self, input_query_results_list, energy_components, en return True, abs_data, dyn_data def print_log(self, message): - print("{} pipeline: {}".format(self.name, message), flush=True) + print(f"{self.name} pipeline: {message}", flush=True) def save_metadata(self): all_metadata = get_all_metadata(model_toppath, self.name) @@ -177,19 +174,19 @@ def print_pipeline_process_end(self, energy_source, feature_group, abs_data, dyn node_types = pd.unique(abs_metadata_df[node_info_column]) abs_messages = [ - "Pipeline {} has finished for modeling {} power by {} feature".format(self.name, energy_source, feature_group), + f"Pipeline {self.name} has finished for modeling {energy_source} power by {feature_group} feature", " Extractor: {}".format(self.metadata["extractor"]), " Isolator: {}".format(self.metadata["isolator"]), "Absolute Power Modeling:", - " Input data size: {}".format(len(abs_data)), - " Model Trainers: {}".format(abs_trainer_names), - " Output: {}".format(abs_group_path), + f" Input data size: {len(abs_data)}", + f" Model Trainers: {abs_trainer_names}", + f" Output: {abs_group_path}", " ", ] for node_type in node_types: filtered_data = abs_metadata_df[abs_metadata_df[node_info_column] == node_type] min_mae = -1 if len(filtered_data) == 0 else filtered_data.loc[filtered_data[ERROR_KEY].idxmin()][ERROR_KEY] - abs_messages += [" NodeType {} Min {}: {}".format(node_type, ERROR_KEY, min_mae)] + abs_messages += [f" NodeType {node_type} Min {ERROR_KEY}: {min_mae}"] abs_messages += [" "] if dyn_data is not None: @@ -197,14 +194,14 @@ def print_pipeline_process_end(self, energy_source, feature_group, abs_data, dyn dyn_metadata_df, dyn_group_path = get_metadata_df(model_toppath, ModelOutputType.DynPower.name, feature_group, energy_source, self.name) dyn_messages = [ "Dynamic Power Modeling:", - " Input data size: {}".format(len(dyn_data)), - " Model Trainers: {}".format(dyn_trainer_names), - " Output: {}".format(dyn_group_path), + f" Input data size: {len(dyn_data)}", + f" Model Trainers: {dyn_trainer_names}", + f" Output: {dyn_group_path}", ] for node_type in node_types: filtered_data = dyn_metadata_df[dyn_metadata_df[node_info_column] == node_type] min_mae = -1 if len(filtered_data) == 0 else filtered_data.loc[filtered_data[ERROR_KEY].idxmin()][ERROR_KEY] - dyn_messages += [" NodeType {} Min {}: {}".format(node_type, ERROR_KEY, min_mae)] + dyn_messages += [f" NodeType {node_type} Min {ERROR_KEY}: {min_mae}"] messages = abs_messages + dyn_messages print_bounded_multiline_message(messages) diff --git a/src/kepler_model/train/profiler/generate_scaler.py b/src/kepler_model/train/profiler/generate_scaler.py index eb6bd48c..be5442c8 100644 --- a/src/kepler_model/train/profiler/generate_scaler.py +++ b/src/kepler_model/train/profiler/generate_scaler.py @@ -15,12 +15,12 @@ import os import pickle -from sklearn.preprocessing import MaxAbsScaler import pandas as pd +from sklearn.preprocessing import MaxAbsScaler from kepler_model.train import DefaultExtractor -from kepler_model.util.prom_types import node_info_column, TIMESTAMP_COL -from kepler_model.util.train_types import SYSTEM_FEATURES, FeatureGroups, FeatureGroup +from kepler_model.util.prom_types import TIMESTAMP_COL, node_info_column +from kepler_model.util.train_types import SYSTEM_FEATURES, FeatureGroup, FeatureGroups # WARN: unable to find this anymore # from profile_background import profile_path diff --git a/src/kepler_model/train/profiler/node_type_index.py b/src/kepler_model/train/profiler/node_type_index.py index bd4b90da..bf0c97e2 100644 --- a/src/kepler_model/train/profiler/node_type_index.py +++ b/src/kepler_model/train/profiler/node_type_index.py @@ -5,18 +5,16 @@ # node_type = index_collection.index_train_machine(machine_id, new_spec) # index_collection.save() -import re import enum +import re import subprocess -import psutil import cpuinfo - +import psutil import pyudev - -from kepler_model.util.saver import save_node_type_index, save_machine_spec from kepler_model.util.loader import load_node_type_index +from kepler_model.util.saver import save_machine_spec, save_node_type_index def rename(name: str) -> str: @@ -66,7 +64,7 @@ def generate_spec(data_path, machine_id): cpu_freq_mhz = round(max(freq.max, freq.current) / 100) * 100 # round to one decimal of GHz spec_values = {"vendor": vendor, "processor": processor, "cores": cores, "chips": chips, "memory": memory_gb, "frequency": cpu_freq_mhz, "threads_per_core": threads_per_core} spec = NodeTypeSpec(**spec_values) - print("Save machine spec ({}): ".format(data_path)) + print(f"Save machine spec ({data_path}): ") print(str(spec)) save_machine_spec(data_path, machine_id, spec) @@ -140,14 +138,14 @@ def cover(self, compare_spec): def __str__(self): out_str = "" for attr in NodeAttribute: - out_str += "{} ({})\n".format(attr, str(self.attrs[attr])) + out_str += f"{attr} ({self.attrs[attr]!s})\n" return out_str def get_json(self): json_obj = dict() json_obj["attrs"] = dict() for attr in NodeAttribute: - json_obj["attrs"]["{}".format(attr)] = self.attrs[attr] + json_obj["attrs"][f"{attr}"] = self.attrs[attr] json_obj["members"] = self.members return json_obj diff --git a/src/kepler_model/train/profiler/profiler.py b/src/kepler_model/train/profiler/profiler.py index a55fa6a6..43b98f0d 100644 --- a/src/kepler_model/train/profiler/profiler.py +++ b/src/kepler_model/train/profiler/profiler.py @@ -13,19 +13,18 @@ ## {component: {node_type: {min_watt: ,max_watt: } }} ############################################################ +import json import os from urllib.request import urlopen -import joblib +import joblib import pandas as pd -import json -from kepler_model.util.train_types import PowerSourceMap, FeatureGroups -from kepler_model.util.prom_types import node_info_column, node_info_query, generate_dataframe_from_response from kepler_model.util.extract_types import component_to_col -from kepler_model.util.saver import save_profile from kepler_model.util.loader import default_node_type - +from kepler_model.util.prom_types import generate_dataframe_from_response, node_info_column, node_info_query +from kepler_model.util.saver import save_profile +from kepler_model.util.train_types import FeatureGroups, PowerSourceMap min_watt_key = "min_watt" max_watt_key = "max_watt" @@ -185,7 +184,7 @@ def get_min_power(self, source, component): return self.profile[source][component][min_watt_key] def print_profile(self): - print("Profile (node type={}): \n Available energy components: {}\n Available maxabs scalers: {}".format(self.node_type, ["{}/{}".format(key, list(self.profile[key].keys())) for key in self.profile.keys()], self.max_scaler.keys())) + print("Profile (node type={}): \n Available energy components: {}\n Available maxabs scalers: {}".format(self.node_type, [f"{key}/{list(self.profile[key].keys())}" for key in self.profile.keys()], self.max_scaler.keys())) def generate_profiles(profile_map): @@ -210,7 +209,7 @@ def load_all_profiles(): response = urlopen(url_path) profile = json.loads(response.read()) except Exception as e: - print("Failed to load profile {}: {}".format(source, e)) + print(f"Failed to load profile {source}: {e}") continue profile_map[source] = profile return generate_profiles(profile_map) diff --git a/src/kepler_model/train/prom/prom_query.py b/src/kepler_model/train/prom/prom_query.py index 7bfa2662..a631def1 100644 --- a/src/kepler_model/train/prom/prom_query.py +++ b/src/kepler_model/train/prom/prom_query.py @@ -1,9 +1,16 @@ import datetime -from prometheus_api_client import PrometheusConnect -from kepler_model.util.prom_types import PROM_SERVER, PROM_HEADERS, PROM_SSL_DISABLE, PROM_QUERY_INTERVAL, PROM_QUERY_STEP, metric_prefix -from kepler_model.util.prom_types import generate_dataframe_from_response +from prometheus_api_client import PrometheusConnect +from kepler_model.util.prom_types import ( + PROM_HEADERS, + PROM_QUERY_INTERVAL, + PROM_QUERY_STEP, + PROM_SERVER, + PROM_SSL_DISABLE, + generate_dataframe_from_response, + metric_prefix, +) UTC_OFFSET_TIMEDELTA = datetime.datetime.utcnow() - datetime.datetime.now() diff --git a/src/kepler_model/train/specpower_pipeline.py b/src/kepler_model/train/specpower_pipeline.py index 2a8938e4..d689761a 100644 --- a/src/kepler_model/train/specpower_pipeline.py +++ b/src/kepler_model/train/specpower_pipeline.py @@ -8,21 +8,22 @@ docker run -it -p 8080:80 quay.io/sustainability/kepler_spec_power_db:v0.7 """ -import requests -from io import StringIO +import datetime +import json import os +from io import StringIO + import pandas as pd -import json -import datetime +import requests -from kepler_model.train.profiler.node_type_index import NodeTypeSpec -from kepler_model.train.pipeline import NewPipeline from kepler_model.train.extractor import DefaultExtractor from kepler_model.train.isolator.isolator import MinIdleIsolator -from kepler_model.util.format import time_to_str -from kepler_model.util.prom_types import node_info_column, TIMESTAMP_COL +from kepler_model.train.pipeline import NewPipeline +from kepler_model.train.profiler.node_type_index import NodeTypeSpec from kepler_model.util.extract_types import component_to_col -from kepler_model.util.train_types import FeatureGroup, default_trainer_names, BPF_FEATURES, PowerSourceMap +from kepler_model.util.format import time_to_str +from kepler_model.util.prom_types import TIMESTAMP_COL, node_info_column +from kepler_model.util.train_types import BPF_FEATURES, FeatureGroup, PowerSourceMap, default_trainer_names platform_energy_source = "acpi" acpi_component = PowerSourceMap[platform_energy_source][0] diff --git a/src/kepler_model/train/trainer/ExponentialRegressionTrainer/main.py b/src/kepler_model/train/trainer/ExponentialRegressionTrainer/main.py index 85f5e77d..df74270f 100644 --- a/src/kepler_model/train/trainer/ExponentialRegressionTrainer/main.py +++ b/src/kepler_model/train/trainer/ExponentialRegressionTrainer/main.py @@ -1,7 +1,8 @@ -import numpy as np import math -from kepler_model.train.trainer.curvefit import CurveFitTrainer, CurveFitModel +import numpy as np + +from kepler_model.train.trainer.curvefit import CurveFitModel, CurveFitTrainer def p0_func(x, y): diff --git a/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/main.py b/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/main.py index 037dfd57..b64f1e52 100644 --- a/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/main.py +++ b/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/main.py @@ -1,4 +1,5 @@ from sklearn.ensemble import GradientBoostingRegressor + from kepler_model.train.trainer.scikit import ScikitTrainer model_class = "scikit" diff --git a/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/main.py b/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/main.py index 252e386c..3452f951 100644 --- a/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/main.py +++ b/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/main.py @@ -1,4 +1,5 @@ from sklearn.neighbors import KNeighborsRegressor + from kepler_model.train.trainer.scikit import ScikitTrainer model_class = "scikit" diff --git a/src/kepler_model/train/trainer/LinearRegressionTrainer/main.py b/src/kepler_model/train/trainer/LinearRegressionTrainer/main.py index 6ea1ef55..636dda55 100644 --- a/src/kepler_model/train/trainer/LinearRegressionTrainer/main.py +++ b/src/kepler_model/train/trainer/LinearRegressionTrainer/main.py @@ -1,4 +1,5 @@ from sklearn.linear_model import LinearRegression + from kepler_model.train.trainer.scikit import ScikitTrainer model_class = "scikit" diff --git a/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/main.py b/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/main.py index 34bba2db..b4ee1003 100644 --- a/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/main.py +++ b/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/main.py @@ -1,5 +1,6 @@ import numpy as np -from kepler_model.train.trainer.curvefit import CurveFitTrainer, CurveFitModel + +from kepler_model.train.trainer.curvefit import CurveFitModel, CurveFitTrainer def p0_func(x, y): diff --git a/src/kepler_model/train/trainer/LogisticRegressionTrainer/main.py b/src/kepler_model/train/trainer/LogisticRegressionTrainer/main.py index fd1473e3..acbf3b35 100644 --- a/src/kepler_model/train/trainer/LogisticRegressionTrainer/main.py +++ b/src/kepler_model/train/trainer/LogisticRegressionTrainer/main.py @@ -1,5 +1,6 @@ import numpy as np -from kepler_model.train.trainer.curvefit import CurveFitTrainer, CurveFitModel + +from kepler_model.train.trainer.curvefit import CurveFitModel, CurveFitTrainer def p0_func(x, y): diff --git a/src/kepler_model/train/trainer/SGDRegressorTrainer/main.py b/src/kepler_model/train/trainer/SGDRegressorTrainer/main.py index 7ee8945a..e34d6581 100644 --- a/src/kepler_model/train/trainer/SGDRegressorTrainer/main.py +++ b/src/kepler_model/train/trainer/SGDRegressorTrainer/main.py @@ -1,4 +1,5 @@ from sklearn.linear_model import SGDRegressor + from kepler_model.train.trainer.scikit import ScikitTrainer diff --git a/src/kepler_model/train/trainer/SVRRegressorTrainer/main.py b/src/kepler_model/train/trainer/SVRRegressorTrainer/main.py index bb84009a..9683fbaa 100644 --- a/src/kepler_model/train/trainer/SVRRegressorTrainer/main.py +++ b/src/kepler_model/train/trainer/SVRRegressorTrainer/main.py @@ -1,4 +1,5 @@ from sklearn.svm import SVR + from kepler_model.train.trainer.scikit import ScikitTrainer common_node_type = 1 diff --git a/src/kepler_model/train/trainer/XGBoostTrainer/main.py b/src/kepler_model/train/trainer/XGBoostTrainer/main.py index 71363ec8..5ec25673 100644 --- a/src/kepler_model/train/trainer/XGBoostTrainer/main.py +++ b/src/kepler_model/train/trainer/XGBoostTrainer/main.py @@ -1,16 +1,24 @@ import datetime import json import os -from typing import List, Optional, Tuple, Dict, Any -import pandas as pd -from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error -from sklearn.model_selection import train_test_split, RepeatedKFold, cross_val_score +from typing import Any + import numpy as np +import pandas as pd import xgboost as xgb +from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score +from sklearn.model_selection import RepeatedKFold, cross_val_score, train_test_split - -from kepler_model.util.train_types import FeatureGroup, FeatureGroups, EnergyComponentLabelGroups, EnergyComponentLabelGroup, XGBoostMissingModelXOrModelDescException, XGBoostModelFeatureOrLabelIncompatabilityException, XGBoostRegressionTrainType from kepler_model.train.extractor.extractor import DefaultExtractor +from kepler_model.util.train_types import ( + EnergyComponentLabelGroup, + EnergyComponentLabelGroups, + FeatureGroup, + FeatureGroups, + XGBoostMissingModelXOrModelDescException, + XGBoostModelFeatureOrLabelIncompatabilityException, + XGBoostRegressionTrainType, +) # Currently Cgroup Metrics are not exported @@ -77,7 +85,7 @@ def train(self, prom_client=None, refined_results=None) -> None: raise Exception("extractor failed") # Accepts JSON Input with feature and corresponding prediction - def predict(self, features_and_predictions: List[Dict[str, float]]) -> Tuple[List[float], Dict[Any, Any]]: + def predict(self, features_and_predictions: list[dict[str, float]]) -> tuple[list[float], dict[Any, Any]]: # features Convert to List[List[float]] list_of_predictions = [] for prediction in features_and_predictions: @@ -104,11 +112,11 @@ class XGBoostRegressionModelGenerationPipeline: """ - feature_names: List[str] - label_names: List[str] + feature_names: list[str] + label_names: list[str] model_name: str - def __init__(self, feature_names_in_order: List[str], label_names_in_order: List[str], save_location: str, model_name: str) -> None: + def __init__(self, feature_names_in_order: list[str], label_names_in_order: list[str], save_location: str, model_name: str) -> None: # model data will be generated consistently using the list of feature names and labels (Order does not matter) self.feature_names = feature_names_in_order.copy() @@ -141,7 +149,7 @@ def model_json_data_exists(self) -> bool: filename_path = self._generate_model_data_filepath() return os.path.exists(os.path.join(filename_path, self.model_desc)) - def retrieve_all_model_data(self) -> Tuple[Optional[xgb.XGBRegressor], Optional[Dict[Any, Any]]]: + def retrieve_all_model_data(self) -> tuple[xgb.XGBRegressor | None, dict[Any, Any] | None]: # Note that when generating base model, it does not need to contain default hyperparameters if it will just be # used for prediction # Returns model and model_desc @@ -151,14 +159,14 @@ def retrieve_all_model_data(self) -> Tuple[Optional[xgb.XGBRegressor], Optional[ raise XGBoostMissingModelXOrModelDescException(missing_model=self.model_exists(), missing_model_desc=self.model_json_data_exists()) if self.model_exists() and self.model_json_data_exists(): new_model.load_model(os.path.join(filename_path, self.model_filename)) - with open(os.path.join(filename_path, self.model_desc), "r") as f: + with open(os.path.join(filename_path, self.model_desc)) as f: json_data = json.load(f) if json_data["feature_names"] != self.feature_names or json_data["label_names"] != self.label_names: raise XGBoostModelFeatureOrLabelIncompatabilityException(json_data["feature_names"], json_data["label_names"], self.feature_names, self.label_names) return new_model, json_data return None, None - def _save_model(self, model: xgb.XGBRegressor, model_desc: Dict[Any, Any]) -> None: + def _save_model(self, model: xgb.XGBRegressor, model_desc: dict[Any, Any]) -> None: filename_path = self._generate_model_data_filepath() if not self._model_data_filepath_exists(): os.makedirs(filename_path) @@ -302,7 +310,7 @@ def __perform_kfold_train(self, all_model_data_exists: bool, ready_model_data: p # Receives list of features and returns a list of predictions in order # Return None if no available model - def predict(self, input_values: List[List[float]]) -> Tuple[Optional[List[float]], Optional[Dict[Any, Any]]]: + def predict(self, input_values: list[list[float]]) -> tuple[list[float] | None, dict[Any, Any] | None]: retrieved_model, retrieved_model_desc = self.retrieve_all_model_data() predicted_results = [] if retrieved_model is not None: diff --git a/src/kepler_model/train/trainer/__init__.py b/src/kepler_model/train/trainer/__init__.py index 610c9a83..8c4363be 100644 --- a/src/kepler_model/train/trainer/__init__.py +++ b/src/kepler_model/train/trainer/__init__.py @@ -1,17 +1,30 @@ +import os import shutil - from abc import ABCMeta, abstractmethod -import os - - -from kepler_model.util import assure_path, ModelOutputType, FeatureGroups, FeatureGroup, save_json, save_metadata, load_metadata, save_scaler, save_weight - +from kepler_model.util import ( + FeatureGroup, + FeatureGroups, + ModelOutputType, + assure_path, + load_metadata, + save_json, + save_metadata, + save_scaler, + save_weight, +) +from kepler_model.util.config import model_toppath +from kepler_model.util.extract_types import component_to_col, get_unit_vals, ratio_to_col +from kepler_model.util.loader import ( + CHECKPOINT_FOLDERNAME, + get_archived_file, + get_model_group_path, + get_model_name, + get_save_path, + load_scaler, +) from kepler_model.util.prom_types import node_info_column from kepler_model.util.train_types import main_feature -from kepler_model.util.extract_types import component_to_col, get_unit_vals, ratio_to_col -from kepler_model.util.loader import get_model_group_path, get_save_path, get_model_name, get_archived_file, CHECKPOINT_FOLDERNAME, load_scaler -from kepler_model.util.config import model_toppath def get_assured_checkpoint_path(group_path, assure=True): @@ -59,7 +72,7 @@ def _model_filename(self, node_type): return model_name, model_file def _checkpoint_filename(self, component, node_type): - return "{}_{}_{}".format(self.trainer_name, component, node_type) + return f"{self.trainer_name}_{component}_{node_type}" def _checkpoint_filepath(self, component, node_type): checkpoint_filename = self._checkpoint_filename(component, node_type) @@ -119,11 +132,11 @@ def load_model(self, node_type): model, ok = self.load_local_checkpoint(local_checkpoint) if ok: self.node_models[node_type][component] = model - self.print_log("Continue from last checkpoint ({})".format(component)) + self.print_log(f"Continue from last checkpoint ({component})") else: # init if failed to load any checkpoint self.node_models[node_type][component] = self.init_model() - self.print_log("Newly initialize model ({})".format(component)) + self.print_log(f"Newly initialize model ({component})") if hasattr(self.node_models[node_type][component], "set_feature_index"): feature_index = main_feature(self.feature_group_name, component) self.node_models[node_type][component].set_feature_index(feature_index) @@ -138,7 +151,7 @@ def process(self, data, power_labels, pipeline_lock): node_type_filtered_data = data[data[node_info_column] == node_type] if self.node_scalers[node_type] is None: - self.print_log("fit scaler to latest data {1} for node_type={0}".format(node_type, self.feature_group_name)) + self.print_log(f"fit scaler to latest data {self.feature_group_name} for node_type={node_type}") # no profiled scaler x_values = node_type_filtered_data[self.features].values self.node_scalers[node_type] = MaxAbsScaler() @@ -155,14 +168,14 @@ def process(self, data, power_labels, pipeline_lock): self.train(node_type, component, X_train, y_train) self.save_checkpoint(self.node_models[node_type][component], self._checkpoint_filepath(component, node_type)) except Exception as err: - self.print_log("failed to process {}: {}".format(node_type, err)) + self.print_log(f"failed to process {node_type}: {err}") continue if self.should_archive(node_type): pipeline_lock.acquire() try: self.save_model_and_metadata(node_type, X_test_map, y_test_map) except Exception as err: - self.print_log("failed to save model {}: {}".format(node_type, err)) + self.print_log(f"failed to save model {node_type}: {err}") finally: pipeline_lock.release() @@ -255,12 +268,12 @@ def save_model_and_metadata(self, node_type, X_test_map, y_test_map): max_mae = mae if max_mape is None or mape > max_mape: max_mape = mape - mae_map["{}_mae".format(component)] = mae - mape_map["{}_mape".format(component)] = mape + mae_map[f"{component}_mae"] = mae + mape_map[f"{component}_mape"] = mape self.save_metadata(node_type, max_mae, mae_map, mape, mape_map, item) # archive model self.archive_model(node_type) - print("save model to {}".format(save_path)) + print(f"save model to {save_path}") def predict(self, node_type, component, X_values, skip_preprocess=False): save_path = self._get_save_path(node_type) @@ -281,7 +294,7 @@ def predict(self, node_type, component, X_values, skip_preprocess=False): return model.predict(features) def print_log(self, message): - print("{}: {}".format(self.to_string(), message), flush=True) + print(f"{self.to_string()}: {message}", flush=True) def to_string(self): return "{} trainer ({}/{}/{})".format(self.trainer_name, "Abs" if self.node_level else "Dyn", self.feature_group, self.energy_source) diff --git a/src/kepler_model/train/trainer/curvefit.py b/src/kepler_model/train/trainer/curvefit.py index e0258d92..546c40c6 100644 --- a/src/kepler_model/train/trainer/curvefit.py +++ b/src/kepler_model/train/trainer/curvefit.py @@ -1,10 +1,11 @@ -from sklearn.metrics import mean_absolute_error -from sklearn.exceptions import NotFittedError +import os + import numpy as np from scipy.optimize import curve_fit -import os +from sklearn.exceptions import NotFittedError +from sklearn.metrics import mean_absolute_error -from kepler_model.util import save_pkl, load_pkl +from kepler_model.util import load_pkl, save_pkl from kepler_model.util.train_types import main_feature from . import Trainer diff --git a/src/kepler_model/train/trainer/scikit.py b/src/kepler_model/train/trainer/scikit.py index 0dd2923b..15912786 100644 --- a/src/kepler_model/train/trainer/scikit.py +++ b/src/kepler_model/train/trainer/scikit.py @@ -1,8 +1,9 @@ -from sklearn.metrics import mean_absolute_error -import numpy as np import os -from kepler_model.util import save_pkl, load_pkl +import numpy as np +from sklearn.metrics import mean_absolute_error + +from kepler_model.util import load_pkl, save_pkl from . import Trainer diff --git a/src/kepler_model/train/trainer/xgboost_interface.py b/src/kepler_model/train/trainer/xgboost_interface.py index f204ddb7..f697d4bb 100644 --- a/src/kepler_model/train/trainer/xgboost_interface.py +++ b/src/kepler_model/train/trainer/xgboost_interface.py @@ -1,11 +1,12 @@ -from sklearn.metrics import mean_absolute_error +import base64 import os -import xgboost as xgb +from abc import abstractmethod + import numpy as np -import base64 +import xgboost as xgb +from sklearn.metrics import mean_absolute_error -from kepler_model.util import save_pkl, load_pkl -from abc import abstractmethod +from kepler_model.util import load_pkl, save_pkl from . import Trainer @@ -102,7 +103,7 @@ def get_weight_dict(self, node_type): if not os.path.exists(filename): self.print_log("cannot get checkpoint file (in json) for xgboost") return - with open(filename, "r") as f: + with open(filename) as f: contents = f.read() weight_dict[component] = {"All_Weights": {"Categorical_Variables": dict(), "Numerical_Variables": {self.features[i]: {"scale": scaler.scale_[i]} for i in range(len(self.features))}, "XGboost_Weights": base64.b64encode(contents.encode("utf-8")).decode("utf-8")}} return weight_dict diff --git a/src/kepler_model/util/__init__.py b/src/kepler_model/util/__init__.py index 288e8d5f..8f8789c1 100644 --- a/src/kepler_model/util/__init__.py +++ b/src/kepler_model/util/__init__.py @@ -1,11 +1,32 @@ # commonly-used definitions -from .loader import load_json, load_csv, load_pkl, load_metadata, load_scaler, load_weight, load_remote_pkl, list_model_names, default_train_output_pipeline, class_to_json, version -from .saver import assure_path, save_csv, save_json, save_pkl, save_metadata, save_scaler, save_weight from .config import getConfig, model_toppath +from .loader import ( + class_to_json, + default_train_output_pipeline, + list_model_names, + load_csv, + load_json, + load_metadata, + load_pkl, + load_remote_pkl, + load_scaler, + load_weight, + version, +) from .prom_types import get_valid_feature_group_from_queries -from .train_types import SYSTEM_FEATURES, COUNTER_FEAUTRES, BPF_FEATURES, IRQ_FEATURES, WORKLOAD_FEATURES -from .train_types import PowerSourceMap, FeatureGroup, FeatureGroups, ModelOutputType, get_feature_group - +from .saver import assure_path, save_csv, save_json, save_metadata, save_pkl, save_scaler, save_weight +from .train_types import ( + BPF_FEATURES, + COUNTER_FEAUTRES, + IRQ_FEATURES, + SYSTEM_FEATURES, + WORKLOAD_FEATURES, + FeatureGroup, + FeatureGroups, + ModelOutputType, + PowerSourceMap, + get_feature_group, +) __all__ = [ "load_json", diff --git a/src/kepler_model/util/config.py b/src/kepler_model/util/config.py index 0971404e..bcb104ef 100644 --- a/src/kepler_model/util/config.py +++ b/src/kepler_model/util/config.py @@ -13,8 +13,9 @@ ################################################# import os -from .loader import get_url, get_pipeline_url, base_model_url, default_pipelines, default_train_output_pipeline -from .train_types import ModelOutputType, is_output_type_supported, FeatureGroup + +from .loader import base_model_url, default_pipelines, default_train_output_pipeline, get_pipeline_url, get_url +from .train_types import FeatureGroup, ModelOutputType, is_output_type_supported # must be writable (for shared volume mount) MNT_PATH = "/mnt" @@ -33,7 +34,7 @@ MODEL_SERVER_SVC = "kepler-model-server.kepler.svc.cluster.local" DEFAULT_MODEL_SERVER_PORT = 8100 -MODEL_SERVER_ENDPOINT = "http://{}:{}".format(MODEL_SERVER_SVC, DEFAULT_MODEL_SERVER_PORT) +MODEL_SERVER_ENDPOINT = f"http://{MODEL_SERVER_SVC}:{DEFAULT_MODEL_SERVER_PORT}" MODEL_SERVER_MODEL_REQ_PATH = "/model" MODEL_SERVER_MODEL_LIST_PATH = "/best-models" MODEL_SERVER_ENABLE = False @@ -45,7 +46,7 @@ def getConfig(key: str, default): # check configmap path file = os.path.join(CONFIG_PATH, key) if os.path.exists(file): - with open(file, "r") as f: + with open(file) as f: return f.read().strip() # check env cfg = os.environ.get(key, default) @@ -95,7 +96,7 @@ def _model_server_endpoint(): if MODEL_SERVER_URL == MODEL_SERVER_SVC: MODEL_SERVER_PORT = getConfig("MODEL_SERVER_PORT", DEFAULT_MODEL_SERVER_PORT) MODEL_SERVER_PORT = int(MODEL_SERVER_PORT) - modelServerEndpoint = "http://{}:{}".format(MODEL_SERVER_URL, MODEL_SERVER_PORT) + modelServerEndpoint = f"http://{MODEL_SERVER_URL}:{MODEL_SERVER_PORT}" else: modelServerEndpoint = MODEL_SERVER_URL return modelServerEndpoint @@ -119,7 +120,7 @@ def set_env_from_model_config(): splits = line.split("=") if len(splits) > 1: os.environ[splits[0].strip()] = splits[1].strip() - print("set {} to {}.".format(splits[0], splits[1])) + print(f"set {splits[0]} to {splits[1]}.") def is_estimator_enable(prefix): @@ -151,9 +152,9 @@ def get_init_model_url(energy_source, output_type, model_topurl=model_topurl): modelURL = get_init_url(prefix) print("get init url", modelURL) if modelURL == "" and is_output_type_supported(output_type): - print("init URL is not set, try using default URL".format(output_type)) + print("init URL is not set, try using default URL".format()) return get_url(feature_group=FeatureGroup.BPFOnly, output_type=ModelOutputType[output_type], energy_source=energy_source, model_topurl=model_topurl, pipeline_name=pipeline_name) else: return modelURL - print("no match config for {}, {}".format(output_type, energy_source)) + print(f"no match config for {output_type}, {energy_source}") return "" diff --git a/src/kepler_model/util/extract_types.py b/src/kepler_model/util/extract_types.py index 166aaa38..84376d75 100644 --- a/src/kepler_model/util/extract_types.py +++ b/src/kepler_model/util/extract_types.py @@ -13,10 +13,10 @@ def component_to_col(component, unit_col=None, unit_val=None): - power_colname = "{}_power".format(component) + power_colname = f"{component}_power" if unit_col is None: return power_colname - return "{}_{}_{}".format(unit_col, unit_val, power_colname) + return f"{unit_col}_{unit_val}_{power_colname}" def col_to_component(component_col): @@ -32,7 +32,7 @@ def col_to_unit_val(component_col): def ratio_to_col(unit_val): - return "packge_ratio_{}".format(unit_val) + return f"packge_ratio_{unit_val}" def get_unit_vals(power_columns): diff --git a/src/kepler_model/util/format.py b/src/kepler_model/util/format.py index 788ca334..c83fbb42 100644 --- a/src/kepler_model/util/format.py +++ b/src/kepler_model/util/format.py @@ -12,14 +12,16 @@ def print_bounded_multiline_message(input_lines, maxlength=200): max_line_length = max(len(line) for line in lines) border = '#' * (max_line_length + 4) print(border) - + for line in lines: formatted_line = f"# {line.ljust(max_line_length)} #" print(formatted_line) - + print(border) from datetime import datetime + + def time_to_str(time): if isinstance(time, datetime): return time.strftime("%Y-%m-%d %H:%M:%S") diff --git a/src/kepler_model/util/loader.py b/src/kepler_model/util/loader.py index 65ed58c0..26fe18ad 100644 --- a/src/kepler_model/util/loader.py +++ b/src/kepler_model/util/loader.py @@ -1,14 +1,24 @@ -import os +import codecs import json import logging -import joblib -import pandas as pd -from .saver import assure_path, METADATA_FILENAME, SCALER_FILENAME, WEIGHT_FILENAME, TRAIN_ARGS_FILENAME, NODE_TYPE_INDEX_FILENAME, MACHINE_SPEC_PATH, _pipeline_model_metadata_filename -from .train_types import ModelOutputType, FeatureGroup, PowerSourceMap, all_feature_groups +import os from urllib.request import urlopen +import joblib +import pandas as pd import requests -import codecs + +from .saver import ( + MACHINE_SPEC_PATH, + METADATA_FILENAME, + NODE_TYPE_INDEX_FILENAME, + SCALER_FILENAME, + TRAIN_ARGS_FILENAME, + WEIGHT_FILENAME, + _pipeline_model_metadata_filename, + assure_path, +) +from .train_types import FeatureGroup, ModelOutputType, PowerSourceMap, all_feature_groups major_version = "0.7" version = "0.7.11" @@ -24,12 +34,12 @@ # pipeline loader ## default_train_output_pipeline: a default pipeline name which is output from the training pipeline -default_train_output_pipeline = "std_v{}".format(version) +default_train_output_pipeline = f"std_v{version}" default_pipelines = { - "rapl-sysfs": "ec2-{}".format(version), - "acpi": "specpower-{}".format(version) + "rapl-sysfs": f"ec2-{version}", + "acpi": f"specpower-{version}" } -base_model_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v{}".format(major_version) +base_model_url = f"https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v{major_version}" logger = logging.getLogger(__name__) @@ -181,7 +191,7 @@ def is_valid_model(metadata, filters): def get_model_name(trainer_name, node_type): - return "{}_{}".format(trainer_name, node_type) + return f"{trainer_name}_{node_type}" def get_node_type_from_name(model_name): @@ -253,14 +263,14 @@ def download_and_save(url, filepath): try: response = requests.get(url) except Exception as e: - print("Failed to load {} to {}: {}".format(url, filepath, e)) + print(f"Failed to load {url} to {filepath}: {e}") return None if response.status_code != 200: - print("Failed to load {} to {}: {}".format(url, filepath, response.status_code)) + print(f"Failed to load {url} to {filepath}: {response.status_code}") return None with codecs.open(filepath, "wb") as f: f.write(response.content) - print("Successfully load {} to {}".format(url, filepath)) + print(f"Successfully load {url} to {filepath}") return filepath @@ -365,7 +375,7 @@ def class_to_json(class_obj): def get_version_path(output_path, assure=True): - version_path = os.path.join(output_path, "v{}".format(major_version)) + version_path = os.path.join(output_path, f"v{major_version}") if assure: return assure_path(version_path) return version_path @@ -389,7 +399,7 @@ def get_preprocess_folder(pipeline_path, assure=True): def get_general_filename(prefix, energy_source, fg, ot, extractor, isolator=None): fg_suffix = "" if fg is None else "_" + fg.name if ot.name == ModelOutputType.DynPower.name: - return "{}_dyn_{}_{}_{}{}".format(prefix, extractor, isolator, energy_source, fg_suffix) + return f"{prefix}_dyn_{extractor}_{isolator}_{energy_source}{fg_suffix}" if ot.name == ModelOutputType.AbsPower.name: - return "{}_abs_{}_{}{}".format(prefix, extractor, energy_source, fg_suffix) + return f"{prefix}_abs_{extractor}_{energy_source}{fg_suffix}" return None diff --git a/src/kepler_model/util/prom_types.py b/src/kepler_model/util/prom_types.py index 7a4fe6eb..e857d72c 100644 --- a/src/kepler_model/util/prom_types.py +++ b/src/kepler_model/util/prom_types.py @@ -1,7 +1,14 @@ import pandas as pd from .config import getConfig -from .train_types import SYSTEM_FEATURES, WORKLOAD_FEATURES, FeatureGroups, FeatureGroup, deep_sort, get_valid_feature_groups +from .train_types import ( + SYSTEM_FEATURES, + WORKLOAD_FEATURES, + FeatureGroup, + FeatureGroups, + deep_sort, + get_valid_feature_groups, +) PROM_SERVER = "http://localhost:9090" PROM_SSL_DISABLE = "True" @@ -53,16 +60,16 @@ def get_energy_unit(component): def feature_to_query(feature): if feature in SYSTEM_FEATURES: - return "{}_{}".format(node_query_prefix, feature) + return f"{node_query_prefix}_{feature}" if feature in FeatureGroups[FeatureGroup.AcceleratorOnly]: - return "{}_{}".format(node_query_prefix, feature) + return f"{node_query_prefix}_{feature}" if FeatureGroup.ThirdParty in FeatureGroups is not None and feature in FeatureGroups[FeatureGroup.ThirdParty]: return feature - return "{}_{}_{}".format(container_query_prefix, feature, container_query_suffix) + return f"{container_query_prefix}_{feature}_{container_query_suffix}" def energy_component_to_query(component): - return "{}_{}_{}".format(node_query_prefix, component, node_query_suffix) + return f"{node_query_prefix}_{component}_{node_query_suffix}" def update_thirdparty_metrics(metrics): diff --git a/src/kepler_model/util/saver.py b/src/kepler_model/util/saver.py index b34f9992..e16c4545 100644 --- a/src/kepler_model/util/saver.py +++ b/src/kepler_model/util/saver.py @@ -1,7 +1,8 @@ import json -import joblib import os +import joblib + METADATA_FILENAME = 'metadata' SCALER_FILENAME = 'scaler' WEIGHT_FILENAME = 'weight' @@ -11,10 +12,10 @@ MACHINE_SPEC_PATH = "machine_spec" def _pipeline_model_metadata_filename(energy_source, model_type): - return "{}_{}_model_metadata".format(energy_source, model_type) + return f"{energy_source}_{model_type}_model_metadata" def _power_curve_filename(energy_source, model_type): - return "{}_{}_power_curve".format(energy_source, model_type) + return f"{energy_source}_{model_type}_power_curve" def assure_path(path): if path == '': @@ -27,7 +28,7 @@ def save_json(path, name, data): if '.json' not in name: name = name + '.json' assure_path(path) - filename = os.path.join(path, name) + filename = os.path.join(path, name) with open(filename, "w") as f: json.dump(data, f) return name @@ -36,7 +37,7 @@ def save_pkl(path, name, data): if '.pkl' not in name: name = name + '.pkl' assure_path(path) - filename = os.path.join(path, name) + filename = os.path.join(path, name) joblib.dump(data, filename) return name @@ -44,7 +45,7 @@ def save_csv(path, name, data): if '.csv' not in name: name = name + '.csv' assure_path(path) - filename = os.path.join(path, name) + filename = os.path.join(path, name) data.to_csv(filename) return name @@ -76,4 +77,4 @@ def save_pipeline_metadata(pipeline_path, pipeline_metadata, energy_source, mode def save_profile(profile_path, source, profile): profile_filename = os.path.join(profile_path, source + ".json") with open(profile_filename, "w") as f: - json.dump(profile, f) \ No newline at end of file + json.dump(profile, f) diff --git a/src/kepler_model/util/train_types.py b/src/kepler_model/util/train_types.py index 8d8c5b34..a7c15eea 100644 --- a/src/kepler_model/util/train_types.py +++ b/src/kepler_model/util/train_types.py @@ -10,7 +10,6 @@ import enum import random -from typing import List SYSTEM_FEATURES = ["node_info", "cpu_scaling_frequency_hertz"] @@ -152,14 +151,14 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception): labels_incompatible: true if expected_labels == actual_labels else false """ - expected_features: List[str] - expected_labels: List[str] - actual_features: List[str] - actual_labels: List[str] + expected_features: list[str] + expected_labels: list[str] + actual_features: list[str] + actual_labels: list[str] features_incompatible: bool labels_incompatible: bool - def __init__(self, expected_features: List[str], expected_labels: List[str], received_features: List[str], received_labels: List[str], message="expected features/labels are the not the same as the features/labels of the training data") -> None: + def __init__(self, expected_features: list[str], expected_labels: list[str], received_features: list[str], received_labels: list[str], message="expected features/labels are the not the same as the features/labels of the training data") -> None: self.expected_features = expected_features self.expected_labels = expected_labels self.received_features = received_features @@ -199,7 +198,7 @@ def __init__(self, missing_model: bool, missing_model_desc: bool, message="model EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL), } -all_feature_groups = [fg.name for fg in FeatureGroups.keys()] +all_feature_groups = [fg.name for fg in FeatureGroups] def get_feature_group(features): diff --git a/tests/client_load_tester.py b/tests/client_load_tester.py index 9982d41e..eb84f37b 100644 --- a/tests/client_load_tester.py +++ b/tests/client_load_tester.py @@ -1,7 +1,9 @@ -from estimator_power_request_test import Client -from estimator_model_test import generate_request, model_names -from estimator import SERVE_SOCKET import time + +from estimator import SERVE_SOCKET +from estimator_model_test import generate_request, model_names +from estimator_power_request_test import Client + loads = range(10, 11, 10) duration = 120 @@ -12,7 +14,7 @@ request_json = generate_request(model_name, load) start_time = time.time() client.make_request(request_json) - elapsed_time = time.time() - start_time - output = '{},{},{}'.format(model_name, load, elapsed_time) + elapsed_time = time.time() - start_time + output = f'{model_name},{load},{elapsed_time}' print(output) - time.sleep(1) \ No newline at end of file + time.sleep(1) diff --git a/tests/common_plot.py b/tests/common_plot.py index 5ee37b8e..e7889d71 100644 --- a/tests/common_plot.py +++ b/tests/common_plot.py @@ -2,18 +2,17 @@ # to visualize data import os -import seaborn as sns import matplotlib.pyplot as plt import numpy as np import pandas as pd +import seaborn as sns from sklearn.preprocessing import MaxAbsScaler - -from kepler_model.util import assure_path, FeatureGroups, FeatureGroup, PowerSourceMap -from kepler_model.util.prom_types import TIMESTAMP_COL -from kepler_model.util.extract_types import col_to_component -from kepler_model.train.extractor.preprocess import get_extracted_power_labels from kepler_model.estimate import get_label_power_colname +from kepler_model.train.extractor.preprocess import get_extracted_power_labels +from kepler_model.util import FeatureGroup, FeatureGroups, PowerSourceMap, assure_path +from kepler_model.util.extract_types import col_to_component +from kepler_model.util.prom_types import TIMESTAMP_COL plot_output_path = os.path.join(os.path.dirname(__file__), "data", "plot_output") assure_path(plot_output_path) @@ -32,7 +31,7 @@ def preprocess_data(df): # plot extract result -from extractor_test import test_energy_source, get_extract_results, get_expected_power_columns, test_extractors +from extractor_test import get_expected_power_columns, get_extract_results, test_energy_source, test_extractors def plot_extract_result(extractor_name, feature_group, result, energy_source=test_energy_source, label_cols=get_expected_power_columns(), save_path=plot_output_path, features=None, title=None): @@ -48,27 +47,27 @@ def plot_extract_result(extractor_name, feature_group, result, energy_source=tes i = 0 for feature in features: sns.lineplot(data=result, x=TIMESTAMP_COL, y=feature, ax=axes[i]) - axes[i].set_title("{}".format(feature)) + axes[i].set_title(f"{feature}") axes[i].set_ylabel("") i += 1 while i < ncols: - fig.delaxes(axes[i]) + fig.delaxes(axes[i]) i += 1 i = ncols for energy_component in energy_components: component_label_col = get_label_power_colname(energy_component) sns.lineplot(data=extracted_power_labels, x=TIMESTAMP_COL, y=component_label_col, ax=axes[i]) - axes[i].set_title("{}".format(component_label_col)) + axes[i].set_title(f"{component_label_col}") axes[i].set_ylabel("") i += 1 while i < 2*ncols: - fig.delaxes(axes[i]) + fig.delaxes(axes[i]) i += 1 if title is None: - title = "{} on {}".format(extractor_name, feature_group) + title = f"{extractor_name} on {feature_group}" plt.suptitle(title) - - figname = "extract_result_{}_{}".format(extractor_name, feature_group) + + figname = f"extract_result_{extractor_name}_{feature_group}" plt.tight_layout() fig.savefig(_fig_filename(figname, save_path=save_path)) @@ -80,9 +79,9 @@ def plot_power_cols(extractor_name, result, energy_source=test_energy_source, la target_df = result[[TIMESTAMP_COL] + target_cols].groupby([TIMESTAMP_COL]).mean().reset_index().sort_index() df = pd.melt(target_df, id_vars=[TIMESTAMP_COL], var_name="source", value_name="watts") ax = sns.lineplot(data=df, x=TIMESTAMP_COL, y="watts", hue="source") - title = "{} {}".format(extractor_name, energy_source) + title = f"{extractor_name} {energy_source}" ax.set_title(title) - figname = "extract_result_{}_{}".format(extractor_name, energy_source) + figname = f"extract_result_{extractor_name}_{energy_source}" plt.tight_layout() fig.savefig(_fig_filename(figname, save_path=save_path)) diff --git a/tests/estimator_model_request_test.py b/tests/estimator_model_request_test.py index 3054cd1b..80e82329 100644 --- a/tests/estimator_model_request_test.py +++ b/tests/estimator_model_request_test.py @@ -9,28 +9,28 @@ # ######################### # import external modules -import shutil -import requests +import json # import from src import os -import json +import shutil -from kepler_model.util.train_types import FeatureGroups, FeatureGroup, ModelOutputType -from kepler_model.util.loader import get_download_output_path, default_train_output_pipeline, get_url -from kepler_model.util.config import get_init_model_url, set_env_from_model_config, download_path -from kepler_model.estimate.estimator import handle_request, loaded_model, PowerRequest -from kepler_model.estimate.model_server_connector import list_all_models -from kepler_model.estimate.archived_model import get_achived_model, reset_failed_list -from tests.extractor_test import test_energy_source +import requests +from kepler_model.estimate.archived_model import get_achived_model, reset_failed_list +from kepler_model.estimate.estimator import PowerRequest, handle_request, loaded_model +from kepler_model.estimate.model_server_connector import list_all_models +from kepler_model.util.config import download_path, get_init_model_url, set_env_from_model_config +from kepler_model.util.loader import default_train_output_pipeline, get_download_output_path, get_url +from kepler_model.util.train_types import FeatureGroup, FeatureGroups, ModelOutputType from tests.estimator_power_request_test import generate_request +from tests.extractor_test import test_energy_source from tests.http_server import http_file_server file_server_port = 8110 # set environment os.environ["MODEL_SERVER_URL"] = "http://localhost:8100" -model_topurl = "http://localhost:{}".format(file_server_port) +model_topurl = f"http://localhost:{file_server_port}" os.environ["MODEL_TOPURL"] = model_topurl os.environ["INITIAL_PIPELINE_URL"] = os.path.join(model_topurl, "std_v0.7.11") @@ -55,7 +55,7 @@ def test_model_request(): request_json = generate_request(train_name=None, n=10, metrics=metrics, output_type=output_type_name) data = json.dumps(request_json) output = handle_request(data) - print("result {}/{} from model server: {}".format(output_type_name, fg_name, output)) + print(f"result {output_type_name}/{fg_name} from model server: {output}") assert len(output["powers"]) > 0, "cannot get power {}\n {}".format(output["msg"], request_json) # test with initial models @@ -76,7 +76,7 @@ def test_model_request(): data = json.dumps(request_json) output = handle_request(data) assert len(output["powers"]) > 0, "cannot get power {}\n {}".format(output["msg"], request_json) - print("result from {}: {}".format(url, output)) + print(f"result from {url}: {output}") output_type_name = "AbsPower" estimator_enable_key = "NODE_COMPONENTS_ESTIMATOR" @@ -99,7 +99,7 @@ def test_model_request(): data = json.dumps(request_json) output = handle_request(data) assert len(output["powers"]) > 0, "cannot get power {}\n {}".format(output["msg"], request_json) - print("result {}/{} from static set: {}".format(output_type_name, FeatureGroup.BPFOnly.name, output)) + print(f"result {output_type_name}/{FeatureGroup.BPFOnly.name} from static set: {output}") del loaded_model[output_type_name][energy_source] # invalid model os.environ[init_url_key] = get_url(energy_source=energy_source, output_type=output_type, feature_group=FeatureGroup.BPFOnly, model_topurl=model_topurl, pipeline_name=default_train_output_pipeline) @@ -108,8 +108,8 @@ def test_model_request(): data = json.dumps(request_json) power_request = json.loads(data, object_hook=lambda d: PowerRequest(**d)) output_path = get_achived_model(power_request) - assert output_path is None, "model should be invalid\n {}".format(output_path) - os.environ["MODEL_CONFIG"] = "{}=true\n{}={}\n".format(estimator_enable_key, init_url_key, get_url(energy_source=energy_source, output_type=output_type, feature_group=FeatureGroup.BPFOnly, model_topurl=model_topurl, pipeline_name=default_train_output_pipeline)) + assert output_path is None, f"model should be invalid\n {output_path}" + os.environ["MODEL_CONFIG"] = f"{estimator_enable_key}=true\n{init_url_key}={get_url(energy_source=energy_source, output_type=output_type, feature_group=FeatureGroup.BPFOnly, model_topurl=model_topurl, pipeline_name=default_train_output_pipeline)}\n" set_env_from_model_config() print("Requesting from ", os.environ[init_url_key]) reset_failed_list() diff --git a/tests/estimator_model_test.py b/tests/estimator_model_test.py index 78f19ce1..d05588a9 100644 --- a/tests/estimator_model_test.py +++ b/tests/estimator_model_test.py @@ -3,16 +3,22 @@ # - model.get_power() import os + import pandas as pd -from kepler_model.estimate import load_model, default_predicted_col_func, compute_error +from kepler_model.estimate import compute_error, default_predicted_col_func, load_model from kepler_model.train.trainer import model_toppath -from kepler_model.util.loader import get_model_group_path, default_train_output_pipeline from kepler_model.util import FeatureGroup, ModelOutputType, list_model_names +from kepler_model.util.loader import default_train_output_pipeline, get_model_group_path from kepler_model.util.prom_types import TIMESTAMP_COL - -from tests.isolator_test import test_isolators, get_isolate_results, isolator_output_path -from tests.extractor_test import test_extractors, get_extract_results, test_energy_source, get_expected_power_columns, extractor_output_path +from tests.extractor_test import ( + extractor_output_path, + get_expected_power_columns, + get_extract_results, + test_energy_source, + test_extractors, +) +from tests.isolator_test import get_isolate_results, isolator_output_path, test_isolators # extract_result, power_columns, corr, features = extractor.extract(query_results, energy_components, feature_group, energy_source, node_level) diff --git a/tests/estimator_power_request_test.py b/tests/estimator_power_request_test.py index 05c3fd93..e77d0897 100644 --- a/tests/estimator_power_request_test.py +++ b/tests/estimator_power_request_test.py @@ -1,8 +1,13 @@ -import socket import json +import socket -from kepler_model.util.train_types import WORKLOAD_FEATURES, SYSTEM_FEATURES, ModelOutputType, CATEGORICAL_LABEL_TO_VOCAB from kepler_model.util.config import SERVE_SOCKET +from kepler_model.util.train_types import ( + CATEGORICAL_LABEL_TO_VOCAB, + SYSTEM_FEATURES, + WORKLOAD_FEATURES, + ModelOutputType, +) from tests.extractor_test import test_energy_source trainer_names = ["SGDRegressorTrainer"] diff --git a/tests/extractor_test.py b/tests/extractor_test.py index a31c965d..d803b630 100644 --- a/tests/extractor_test.py +++ b/tests/extractor_test.py @@ -8,18 +8,22 @@ # import external src import os -from kepler_model.train.pipeline import load_class from kepler_model.train import DefaultExtractor, SmoothExtractor +from kepler_model.train.pipeline import load_class +from kepler_model.util import ( + FeatureGroup, + FeatureGroups, + PowerSourceMap, + assure_path, + get_valid_feature_group_from_queries, + load_csv, + save_csv, +) from kepler_model.util.extract_types import component_to_col from kepler_model.util.prom_types import node_info_column from kepler_model.util.train_types import all_feature_groups -from kepler_model.util import FeatureGroups, FeatureGroup, PowerSourceMap -from kepler_model.util import assure_path, get_valid_feature_group_from_queries -from kepler_model.util import save_csv, load_csv - from tests.prom_test import get_query_results - data_path = os.path.join(os.path.dirname(__file__), "data") assure_path(data_path) extractor_output_path = os.path.join(data_path, "extractor_output") @@ -37,7 +41,7 @@ def get_filename(extractor_name, feature_group, node_level): - return "{}_{}_{}".format(extractor_name, feature_group, node_level) + return f"{extractor_name}_{feature_group}_{node_level}" def get_extract_result(extractor_name, feature_group, node_level, save_path=extractor_output_path): @@ -70,15 +74,15 @@ def assert_extract(extracted_data, power_columns, energy_components, num_of_unit extracted_data_column_names = extracted_data.columns # basic assert assert extracted_data is not None, "extracted data is None" - assert len(power_columns) > 0, "no power label column {}".format(extracted_data_column_names) - assert node_info_column in extracted_data_column_names, "no {} in column {}".format(node_info_column, extracted_data_column_names) + assert len(power_columns) > 0, f"no power label column {extracted_data_column_names}" + assert node_info_column in extracted_data_column_names, f"no {node_info_column} in column {extracted_data_column_names}" # TODO: if ratio applied, expected_power_column_length = len(energy_components) * num_of_unit expected_power_column_length = len(energy_components) # detail assert - assert len(power_columns) == expected_power_column_length, "unexpected power label columns {}, expected {}".format(power_columns, expected_power_column_length) + assert len(power_columns) == expected_power_column_length, f"unexpected power label columns {power_columns}, expected {expected_power_column_length}" # TODO: if ratio applied, expected_col_size must + 1 for power_ratio expected_col_size = expected_power_column_length + len(FeatureGroups[FeatureGroup[feature_group]]) + num_of_unit # power ratio - assert len(extracted_data_column_names) == expected_col_size, "unexpected column length: expected {}, got {}({}) ".format(expected_col_size, extracted_data_column_names, len(extracted_data_column_names)) + assert len(extracted_data_column_names) == expected_col_size, f"unexpected column length: expected {expected_col_size}, got {extracted_data_column_names}({len(extracted_data_column_names)}) " def process(query_results, feature_group, save_path=extractor_output_path, customize_extractors=test_customize_extractors, energy_source=test_energy_source, num_of_unit=2): diff --git a/tests/http_server.py b/tests/http_server.py index 4178190d..6bcff83c 100644 --- a/tests/http_server.py +++ b/tests/http_server.py @@ -1,7 +1,7 @@ -import os +import atexit import http.server +import os import socketserver -import atexit import threading from kepler_model.util.config import model_toppath @@ -31,7 +31,7 @@ def http_file_server(file_server_port): server_thread.daemon = True server_thread.start() except Exception as err: - print("File server is running: {}".format(err)) + print(f"File server is running: {err}") def run(): diff --git a/tests/isolator_test.py b/tests/isolator_test.py index 863e1d28..d649034d 100644 --- a/tests/isolator_test.py +++ b/tests/isolator_test.py @@ -1,19 +1,27 @@ import os -import numpy as np +import numpy as np -from kepler_model.util import assure_path, save_csv, load_csv, FeatureGroups, FeatureGroup -from kepler_model.util.train_types import all_feature_groups +from kepler_model.train import ( + DefaultProfiler, + MinIdleIsolator, + NoneIsolator, + ProfileBackgroundIsolator, + TrainIsolator, + generate_profiles, +) +from kepler_model.train.extractor.preprocess import find_correlations +from kepler_model.util import FeatureGroup, FeatureGroups, assure_path, load_csv, save_csv from kepler_model.util.extract_types import container_level_index, node_level_index from kepler_model.util.prom_types import prom_responses_to_results - -from kepler_model.train import MinIdleIsolator, ProfileBackgroundIsolator, TrainIsolator, NoneIsolator -from kepler_model.train import generate_profiles -from kepler_model.train.extractor.preprocess import find_correlations - -from kepler_model.train import DefaultProfiler - -from tests.extractor_test import test_energy_source, get_extract_results, get_expected_power_columns, test_extractors, extractor_output_path +from kepler_model.util.train_types import all_feature_groups +from tests.extractor_test import ( + extractor_output_path, + get_expected_power_columns, + get_extract_results, + test_energy_source, + test_extractors, +) from tests.prom_test import get_prom_response isolator_output_path = os.path.join(os.path.dirname(__file__), "data", "isolator_output") @@ -26,7 +34,7 @@ test_isolators = [MinIdleIsolator(), NoneIsolator()] def get_filename(isolator_name, extractor_name, feature_group): - return "{}_{}_{}_{}".format(isolator_name, extractor_name, feature_group, False) + return f"{isolator_name}_{extractor_name}_{feature_group}_{False}" def get_isolate_result(isolator_name, extractor_name, feature_group, save_path=isolator_output_path): filename = get_filename(isolator_name, extractor_name, feature_group) @@ -35,7 +43,7 @@ def get_isolate_result(isolator_name, extractor_name, feature_group, save_path=i def get_isolate_results(isolator_name, extractor_name, save_path=isolator_output_path): all_results = dict() for feature_group in all_feature_groups: - result = get_isolate_result(isolator_name, extractor_name, feature_group, save_path=save_path) + result = get_isolate_result(isolator_name, extractor_name, feature_group, save_path=save_path) if result is not None: all_results[feature_group] = result return all_results @@ -49,8 +57,8 @@ def assert_isolate(extractor_result, isolated_data): assert isolated_data is not None, "isolated data is None" value_df = isolated_data.reset_index().drop(columns=container_level_index) negative_df = value_df[(value_df<0).all(1)] - assert len(negative_df) == 0, "all data must be non-negative \n {}".format(negative_df) - assert len(extractor_result.columns) == len(isolated_data_column_names), "unexpected column length: expected {}, got {}({}) ".format(len(extractor_result.columns), isolated_data_column_names, len(isolated_data_column_names)) + assert len(negative_df) == 0, f"all data must be non-negative \n {negative_df}" + assert len(extractor_result.columns) == len(isolated_data_column_names), f"unexpected column length: expected {len(extractor_result.columns)}, got {isolated_data_column_names}({len(isolated_data_column_names)}) " def find_correlation_of_isolated_data(isolated_data, workload_features, energy_source=test_energy_source, power_columns=get_expected_power_columns()): feature_power_data = isolated_data.groupby(node_level_index).sum() @@ -76,7 +84,7 @@ def process(test_isolators=test_isolators, customize_isolators=[], extract_path= extractor_name = extractor.__class__.__name__ extractor_results = get_extract_results(extractor_name, node_level=False, save_path=extract_path) for feature_group, extract_result in extractor_results.items(): - print("{} isolate {}_{}".format(isolator_name, extractor_name, feature_group)) + print(f"{isolator_name} isolate {extractor_name}_{feature_group}") isolated_data = test_instance.isolate(extract_result,label_cols=get_expected_power_columns(), energy_source=test_energy_source) workload_features = FeatureGroups[FeatureGroup[feature_group]] corr = find_correlation_of_isolated_data(isolated_data, workload_features) diff --git a/tests/minimal_trainer.py b/tests/minimal_trainer.py index fb34372a..db64454f 100644 --- a/tests/minimal_trainer.py +++ b/tests/minimal_trainer.py @@ -1,4 +1,5 @@ from pipeline_test import process + from kepler_model.util import FeatureGroup trainer_names = ["GradientBoostingRegressorTrainer", "SGDRegressorTrainer", "XgboostFitTrainer"] diff --git a/tests/model_server_test.py b/tests/model_server_test.py index d52f1b52..e1d54b1b 100644 --- a/tests/model_server_test.py +++ b/tests/model_server_test.py @@ -1,13 +1,13 @@ -import requests +import codecs +import json import os import shutil -import json -import codecs +import requests -from kepler_model.util.train_types import FeatureGroup, FeatureGroups, ModelOutputType from kepler_model.server.model_server import MODEL_SERVER_PORT from kepler_model.util.config import download_path +from kepler_model.util.train_types import FeatureGroup, FeatureGroups, ModelOutputType TMP_FILE = "tmp.zip" @@ -18,7 +18,7 @@ def get_model_request_json(metrics, output_type, node_type, weight, trainer_name def make_request(metrics, output_type, node_type=-1, weight=False, trainer_name="", energy_source="rapl-sysfs"): model_request = get_model_request_json(metrics, output_type, node_type, weight, trainer_name, energy_source) - response = requests.post("http://localhost:{}/model".format(MODEL_SERVER_PORT), json=model_request) + response = requests.post(f"http://localhost:{MODEL_SERVER_PORT}/model", json=model_request) assert response.status_code == 200, response.text if weight: weight_dict = json.loads(response.text) @@ -26,7 +26,7 @@ def make_request(metrics, output_type, node_type=-1, weight=False, trainer_name= for weight_values in weight_dict.values(): weight_length = len(weight_values["All_Weights"]["Numerical_Variables"]) expected_length = len(metrics) - assert weight_length <= expected_length, "weight metrics should covered by the requested {} > {}".format(weight_length, expected_length) + assert weight_length <= expected_length, f"weight metrics should covered by the requested {weight_length} > {expected_length}" else: output_path = os.path.join(download_path, output_type.name) if os.path.exists(output_path): @@ -38,7 +38,7 @@ def make_request(metrics, output_type, node_type=-1, weight=False, trainer_name= def get_models(): - response = requests.get("http://localhost:{}/best-models".format(MODEL_SERVER_PORT)) + response = requests.get(f"http://localhost:{MODEL_SERVER_PORT}/best-models") assert response.status_code == 200, response.text response = json.loads(response.text) return response diff --git a/tests/model_tester.py b/tests/model_tester.py index ee330b4b..be0b0dd4 100644 --- a/tests/model_tester.py +++ b/tests/model_tester.py @@ -1,21 +1,19 @@ # deprecated # WARN: is this test still needed -import pandas as pd import os -from sklearn.metrics import mean_squared_error, mean_absolute_error from prom_test import get_query_results +from sklearn.metrics import mean_absolute_error, mean_squared_error +from kepler_model.estimate.model.model import load_model from kepler_model.train import DefaultExtractor -from kepler_model.train.profile import profile_process, get_min_max_watt -from kepler_model.util.train_types import ModelOutputType, PowerSourceMap from kepler_model.train.isolator.train_isolator import get_background_containers, isolate_container -from kepler_model.estimate.model.model import load_model +from kepler_model.train.profile import get_min_max_watt, profile_process from kepler_model.util.loader import default_node_type - -from kepler_model.util.prom_types import prom_responses_to_results, TIMESTAMP_COL +from kepler_model.util.prom_types import TIMESTAMP_COL, prom_responses_to_results +from kepler_model.util.train_types import ModelOutputType, PowerSourceMap # model_tester.py # to get the test result across different train/test data set @@ -68,7 +66,7 @@ def process(train_dataset_name, test_dataset_name, target_path): extracted_data, power_columns, _, _ = extractor.extract(test_data, energy_components, feature_group, energy_source, node_level=False) feature_columns = [col for col in extracted_data.columns if col not in power_columns] if not model.feature_check(feature_columns): - print("model {} ({}/{}/{})is not valid to test".format(model.name, energy_source, output_type.name, feature_group)) + print(f"model {model.name} ({energy_source}/{output_type.name}/{feature_group})is not valid to test") continue if output_type == ModelOutputType.AbsPower: data_with_prediction = extracted_data.copy() diff --git a/tests/offline_trainer_test.py b/tests/offline_trainer_test.py index c658b065..604b8976 100644 --- a/tests/offline_trainer_test.py +++ b/tests/offline_trainer_test.py @@ -8,19 +8,18 @@ # test offline trainer # -import requests +import codecs import os import shutil -import codecs - -from kepler_model.train.offline_trainer import TrainAttribute, TrainRequest, serve_port +import requests +from kepler_model.train.offline_trainer import TrainAttribute, TrainRequest, serve_port +from kepler_model.util.loader import class_to_json, list_all_abs_models, list_all_dyn_models +from kepler_model.util.prom_types import get_valid_feature_group_from_queries, prom_responses_to_results +from tests.extractor_test import test_energy_source from tests.model_server_test import TMP_FILE from tests.prom_test import get_prom_response -from tests.extractor_test import test_energy_source -from kepler_model.util.loader import list_all_abs_models, list_all_dyn_models, class_to_json -from kepler_model.util.prom_types import get_valid_feature_group_from_queries, prom_responses_to_results offline_trainer_output_path = os.path.join(os.path.dirname(__file__), "data", "offline_trainer_output") @@ -57,7 +56,7 @@ def make_request(pipeline_name, idle_prom_response, isolator, isolator_args, pro train_request.trainer = class_to_json(trainer) request = class_to_json(train_request) # send request - response = requests.post("http://localhost:{}/train".format(serve_port), json=request) + response = requests.post(f"http://localhost:{serve_port}/train", json=request) assert response.status_code == 200, response.text with codecs.open(TMP_FILE, "wb") as f: f.write(response.content) @@ -67,13 +66,13 @@ def make_request(pipeline_name, idle_prom_response, isolator, isolator_args, pro def get_pipeline_name(dataset_name, isolator): - return "{}_{}".format(dataset_name, isolator) + return f"{dataset_name}_{isolator}" def _assert_offline_trainer(model_list_map): for model_path, models in model_list_map.items(): - assert len(models) > 0, "No trained model in {}".format(model_path) - print("Trained model in {}: {}".format(model_path, models)) + assert len(models) > 0, f"No trained model in {model_path}" + print(f"Trained model in {model_path}: {models}") def assert_offline_trainer_output(target_path, energy_source, valid_fgs, pipeline_name): diff --git a/tests/pipeline_test.py b/tests/pipeline_test.py index b4db65b5..6338537b 100644 --- a/tests/pipeline_test.py +++ b/tests/pipeline_test.py @@ -1,11 +1,10 @@ from kepler_model.train import NewPipeline, NodeTypeSpec -from kepler_model.util import get_valid_feature_group_from_queries, PowerSourceMap -from kepler_model.util.loader import default_train_output_pipeline, default_node_type - -from tests.prom_test import get_query_results, prom_output_path, prom_output_filename -from tests.extractor_test import test_extractors, test_energy_source +from kepler_model.util import PowerSourceMap, get_valid_feature_group_from_queries +from kepler_model.util.loader import default_node_type, default_train_output_pipeline +from tests.extractor_test import test_energy_source, test_extractors from tests.isolator_test import test_isolators -from tests.trainer_test import test_trainer_names, assert_train +from tests.prom_test import get_query_results, prom_output_filename, prom_output_path +from tests.trainer_test import assert_train, test_trainer_names # fake spec value spec_values = {"processor": "test", "cores": 1, "chips": 1, "memory": -1, "frequency": -1} @@ -16,7 +15,7 @@ def assert_pipeline(pipeline, query_results, feature_group, energy_source, energy_components): success, abs_data, dyn_data = pipeline.process(query_results, energy_components, energy_source, feature_group=feature_group.name, replace_node_type=default_node_type) - assert success, "failed to process pipeline {}".format(pipeline.name) + assert success, f"failed to process pipeline {pipeline.name}" for trainer in pipeline.trainers: if trainer.feature_group == feature_group and trainer.energy_source == energy_source: if trainer.node_level: diff --git a/tests/prom_test.py b/tests/prom_test.py index 2ad62d88..c2489ab5 100644 --- a/tests/prom_test.py +++ b/tests/prom_test.py @@ -13,7 +13,7 @@ import os from kepler_model.train.prom import PrometheusClient -from kepler_model.util import save_json, load_json +from kepler_model.util import load_json, save_json from kepler_model.util.prom_types import prom_responses_to_results prom_output_path = os.path.join(os.path.dirname(__file__), "data", "prom_output") diff --git a/tests/query_test.py b/tests/query_test.py index 54ef054e..68ae6498 100644 --- a/tests/query_test.py +++ b/tests/query_test.py @@ -1,13 +1,10 @@ import os # WARN: check if this test is still needed - -from kepler_model.train.prom.prom_query import PrometheusClient, PROM_QUERY_INTERVAL, POD_STAT_QUERY, NODE_STAT_QUERY +from kepler_model.train.prom.prom_query import NODE_STAT_QUERY, POD_STAT_QUERY, PROM_QUERY_INTERVAL, PrometheusClient from kepler_model.util.config import getConfig - from kepler_model.util.train_types import FeatureGroups - SAMPLING_INTERVAL = PROM_QUERY_INTERVAL SAMPLING_INTERVAL = getConfig("SAMPLING_INTERVAL", SAMPLING_INTERVAL) SAMPLING_INTERVAL = int(SAMPLING_INTERVAL) @@ -19,11 +16,11 @@ prom_output_path = os.path.join(os.path.dirname(__file__), "query_data") # save query data in csv for query, result in results.items(): - result.to_csv("{}/{}.csv".format(prom_output_path, query)) + result.to_csv(f"{prom_output_path}/{query}.csv") # print data get by feature list for query_metric in [POD_STAT_QUERY, NODE_STAT_QUERY]: for fg, features in FeatureGroups.items(): data = prom_client.get_data(query_metric, features) - print("Query: {} Type: {} Features: {}".format(query_metric, fg.name, features)) + print(f"Query: {query_metric} Type: {fg.name} Features: {features}") print(None if data is None else data.head()) diff --git a/tests/trainer_test.py b/tests/trainer_test.py index 4b3698cd..836ddc64 100644 --- a/tests/trainer_test.py +++ b/tests/trainer_test.py @@ -1,15 +1,20 @@ # trainer_test.py +import threading + +import pandas as pd import sklearn from kepler_model.train import load_class from kepler_model.util.loader import default_train_output_pipeline from kepler_model.util.train_types import PowerSourceMap, default_trainer_names - -from tests.isolator_test import test_isolators, get_isolate_results -from tests.extractor_test import test_extractors, get_extract_results, test_energy_source, get_expected_power_columns, node_info_column - -import pandas as pd -import threading +from tests.extractor_test import ( + get_expected_power_columns, + get_extract_results, + node_info_column, + test_energy_source, + test_extractors, +) +from tests.isolator_test import get_isolate_results, test_isolators test_trainer_names = default_trainer_names pipeline_lock = threading.Lock() @@ -25,7 +30,7 @@ def assert_train(trainer, data, energy_components): for component in energy_components: try: output = trainer.predict(node_type_str, component, X_values) - assert len(output) == len(X_values), "length of predicted values != features ({}!={})".format(len(output), len(X_values)) + assert len(output) == len(X_values), f"length of predicted values != features ({len(output)}!={len(X_values)})" except sklearn.exceptions.NotFittedError: pass diff --git a/tests/weight_model_request_test.py b/tests/weight_model_request_test.py index db81298f..7765e19f 100644 --- a/tests/weight_model_request_test.py +++ b/tests/weight_model_request_test.py @@ -6,20 +6,19 @@ # ######################### -import os import json -import time +import os import sys +import time import requests -from kepler_model.util.train_types import FeatureGroups, FeatureGroup, ModelOutputType -from kepler_model.util.loader import get_download_output_path from kepler_model.estimate.model_server_connector import list_all_models -from kepler_model.util.config import get_model_server_req_endpoint, download_path - -from tests.extractor_test import test_energy_source +from kepler_model.util.config import download_path, get_model_server_req_endpoint +from kepler_model.util.loader import get_download_output_path +from kepler_model.util.train_types import FeatureGroup, FeatureGroups, ModelOutputType from tests.estimator_power_request_test import generate_request +from tests.extractor_test import test_energy_source os.environ["MODEL_SERVER_URL"] = "http://localhost:8100" @@ -52,8 +51,8 @@ try: response = requests.post(get_model_server_req_endpoint(), json=request_json) except Exception as err: - print("cannot get response from model server: {}".format(err)) + print(f"cannot get response from model server: {err}") sys.exit(1) - assert response.status_code == 200, "response {} not OK".format(request_json) + assert response.status_code == 200, f"response {request_json} not OK" loaded_weight = json.loads(response.content) print(loaded_weight) diff --git a/tests/xgboost_test.py b/tests/xgboost_test.py index a1712d89..d0d51ff0 100644 --- a/tests/xgboost_test.py +++ b/tests/xgboost_test.py @@ -1,10 +1,10 @@ -import os import json +import os from kepler_model.train import DefaultExtractor -from kepler_model.util.train_types import FeatureGroup, XGBoostRegressionTrainType from kepler_model.train.profiler.profiler import response_to_result from kepler_model.train.trainer.XGBoostTrainer.main import XGBoostRegressionStandalonePipeline +from kepler_model.util.train_types import FeatureGroup, XGBoostRegressionTrainType energy_components = ["package", "core", "uncore", "dram"] feature_group = FeatureGroup.BPFIRQ.name