From ebd76f8415f0deb0032a4fbbc4babc0a1ea52d45 Mon Sep 17 00:00:00 2001 From: Pedram Bakh <56321501+PedramBakh@users.noreply.github.com> Date: Mon, 5 Dec 2022 16:35:38 +0100 Subject: [PATCH] Update workflow python version --- .github/workflows/build.yml | 8 +- .github/workflows/publish-pypi.yml | 41 ++++ .gitignore | 180 +++++++++++++++-- README.md | 6 +- carbontracker/__version__.py | 6 - carbontracker/components/component.py | 54 ++--- carbontracker/components/cpu/intel.py | 27 ++- carbontracker/components/gpu/nvidia.py | 8 +- carbontracker/constants.py | 17 +- carbontracker/data/carbon-intensities.csv | 188 ++++++++++++++++++ carbontracker/emissions/conversion/co2eq.csv | 3 +- carbontracker/emissions/conversion/co2eq.py | 5 +- carbontracker/emissions/intensity/fetcher.py | 6 +- .../intensity/fetchers/carbonintensitygb.py | 8 +- .../emissions/intensity/fetchers/co2signal.py | 21 +- .../intensity/fetchers/energidataservice.py | 24 +-- .../emissions/intensity/intensity.py | 103 +++++++--- carbontracker/exceptions.py | 26 ++- carbontracker/loggerutil.py | 44 ++-- carbontracker/parser.py | 46 ++--- carbontracker/tracker.py | 138 ++++++------- pyproject.toml | 26 +++ requirements.txt | 1 - scripts/create_carbon_intensity_csv.py | 45 +++++ setup.py | 32 --- 25 files changed, 753 insertions(+), 310 deletions(-) create mode 100644 .github/workflows/publish-pypi.yml delete mode 100644 carbontracker/__version__.py create mode 100644 carbontracker/data/carbon-intensities.csv create mode 100644 pyproject.toml delete mode 100644 requirements.txt create mode 100644 scripts/create_carbon_intensity_csv.py delete mode 100644 setup.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b92534a..78ad073 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,15 +12,15 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.7, 3.8, 3.9, '3.10'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000..a88c659 --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,41 @@ +name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI + +on: + release: + types: [published] + +jobs: + build-n-publish: + name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@master + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install pypa/build + run: >- + python -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --sdist + --wheel + --outdir dist/ + . + - name: Publish distribution 📦 to Test PyPI + if: startsWith(github.ref, 'refs/tags/test') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + - name: Publish distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags') && !startsWith(github.ref, 'refs/tags/test') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 2262a44..14fc257 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,167 @@ -*__pycache__ -__pycache__ -.DS_Store -data/ -dist/ -logs/ -bin -develop-eggs -downloads -eggs -parts -*.egg-info -lib -lib64 +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python build/ -*.pyo -*.pyc +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# VS Code .vscode/ .vscode + +# macOS +.DS_Store diff --git a/README.md b/README.md index 3b0a048..438446a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # **carbontracker** [![pypi](https://img.shields.io/pypi/v/carbontracker?label=pypi)](https://pypi.org/project/carbontracker/) -[![Python 3.6](https://img.shields.io/pypi/pyversions/django?color=blue&logo=python)](https://www.python.org/downloads/) +[![Python](https://img.shields.io/badge/python-%3E%3D3.7-blue)](https://www.python.org/downloads/) [![build](https://github.com/lfwa/carbontracker/workflows/build/badge.svg)](https://github.com/lfwa/carbontracker/actions) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/lfwa/carbontracker/blob/master/LICENSE) +[![License](https://img.shields.io/github/license/lfwa/carbontracker)](https://github.com/lfwa/carbontracker/blob/master/LICENSE) ## About **carbontracker** is a tool for tracking and predicting the energy consumption and carbon footprint of training deep learning models as described in [Anthony et al. (2020)](https://arxiv.org/abs/2007.03051). @@ -49,6 +49,8 @@ pip install carbontracker If True, only devices (under the chosen components) running processes associated with the main process are measured. If False, all available devices are measured (see Section 'Notes' for jobs running on SLURM or in containers). Note that this requires your devices to have active processes before instantiating the `CarbonTracker` class. - `log_dir` (default=None): Path to the desired directory to write log files. If None, then no logging will be done. +- `log_file_prefix` (default=""): + Prefix to add to the log file name. - `verbose` (default=1): Sets the level of verbosity. - `decimal_precision` (default=6): diff --git a/carbontracker/__version__.py b/carbontracker/__version__.py deleted file mode 100644 index 7475eaa..0000000 --- a/carbontracker/__version__.py +++ /dev/null @@ -1,6 +0,0 @@ -__title__ = "carbontracker" -__description__ = "Tracking and predicting the carbon footprint of training deep learning models." -__url__ = "https://github.com/lfwa/carbontracker" -__version__ = "1.1.6" -__author__ = "Lasse F. Wolff Anthony & Benjamin Kanding" -__license__ = "MIT" diff --git a/carbontracker/components/component.py b/carbontracker/components/component.py index 775a36b..e76c26f 100644 --- a/carbontracker/components/component.py +++ b/carbontracker/components/component.py @@ -4,15 +4,10 @@ from carbontracker.components.gpu import nvidia from carbontracker.components.cpu import intel -components = [{ - "name": "gpu", - "error": exceptions.GPUError("No GPU(s) available."), - "handlers": [nvidia.NvidiaGPU] -}, { - "name": "cpu", - "error": exceptions.CPUError("No CPU(s) available."), - "handlers": [intel.IntelCPU] -}] +components = [ + {"name": "gpu", "error": exceptions.GPUError("No GPU(s) available."), "handlers": [nvidia.NvidiaGPU]}, + {"name": "cpu", "error": exceptions.CPUError("No CPU(s) available."), "handlers": [intel.IntelCPU]}, +] def component_names(): @@ -35,10 +30,8 @@ class Component: def __init__(self, name, pids, devices_by_pid): self.name = name if name not in component_names(): - raise exceptions.ComponentNameError( - f"No component found with name '{self.name}'.") - self._handler = self._determine_handler(pids=pids, - devices_by_pid=devices_by_pid) + raise exceptions.ComponentNameError(f"No component found with name '{self.name}'.") + self._handler = self._determine_handler(pids=pids, devices_by_pid=devices_by_pid) self.power_usages = [] self.cur_epoch = -1 # Sentry @@ -75,20 +68,35 @@ def collect_power_usage(self, epoch): if diff != 0: for _ in range(diff): # Copy previous measurement lists. - latest_measurements = self.power_usages[ - -1] if self.power_usages else [] + latest_measurements = self.power_usages[-1] if self.power_usages else [] self.power_usages.append(latest_measurements) self.power_usages.append([]) - - self.power_usages[-1].append(self.handler.power_usage()) + try: + self.power_usages[-1].append(self.handler.power_usage()) + except exceptions.IntelRaplPermissionError: + # Only raise error if no measurements have been collected. + if not self.power_usages[-1]: + print( + "No sudo access to read Intel's RAPL measurements from the energy_uj file." + "\nSee issue: https://github.com/lfwa/carbontracker/issues/40" + ) + # Append zero measurement to avoid further errors. + self.power_usages.append([0]) + except exceptions.GPUPowerUsageRetrievalError: + if not self.power_usages[-1]: + print( + "GPU model does not support retrieval of power usages in NVML." + "\nSee issue: https://github.com/lfwa/carbontracker/issues/36" + ) + # Append zero measurement to avoid further errors. + self.power_usages.append([0]) def energy_usage(self, epoch_times): """Returns energy (kWh) used by component per epoch.""" energy_usages = [] # We have to compute each epoch in a for loop since numpy cannot # handle lists of uneven length. - for idx, (power, time) in enumerate(zip(self.power_usages, - epoch_times)): + for idx, (power, time) in enumerate(zip(self.power_usages, epoch_times)): # If no power measurement exists, try to use measurements from # later epochs. while not power and idx != len(self.power_usages) - 1: @@ -124,12 +132,8 @@ def shutdown(self): def create_components(components, pids, devices_by_pid): components = components.strip().replace(" ", "").lower() if components == "all": - return [ - Component(name=comp_name, pids=pids, devices_by_pid=devices_by_pid) - for comp_name in component_names() - ] + return [Component(name=comp_name, pids=pids, devices_by_pid=devices_by_pid) for comp_name in component_names()] else: return [ - Component(name=comp_name, pids=pids, devices_by_pid=devices_by_pid) - for comp_name in components.split(",") + Component(name=comp_name, pids=pids, devices_by_pid=devices_by_pid) for comp_name in components.split(",") ] diff --git a/carbontracker/components/cpu/intel.py b/carbontracker/components/cpu/intel.py index e445602..bd4faae 100644 --- a/carbontracker/components/cpu/intel.py +++ b/carbontracker/components/cpu/intel.py @@ -2,6 +2,7 @@ import re import time +from carbontracker import exceptions from carbontracker.components.handler import Handler # RAPL Literature: @@ -30,8 +31,7 @@ def power_usage(self): while attempts > 0: attempts -= 1 power_usages = [ - self._compute_power(before, after) - for before, after in zip(before_measures, after_measures) + self._compute_power(before, after) for before, after in zip(before_measures, after_measures) ] if all(power >= 0 for power in power_usages): return power_usages @@ -45,26 +45,26 @@ def _compute_power(self, before, after): return watt def _read_energy(self, path): - with open(os.path.join(path, "energy_uj"), 'r') as f: + with open(os.path.join(path, "energy_uj"), "r") as f: return int(f.read()) def _get_measurements(self): measurements = [] for package in self._rapl_devices: try: - power_usage = self._read_energy(os.path.join( - RAPL_DIR, package)) + power_usage = self._read_energy(os.path.join(RAPL_DIR, package)) measurements.append(power_usage) + # If there is no sudo access, we cannot read the energy_uj file. + # Permission denied error is raised. + except PermissionError: + raise exceptions.IntelRaplPermissionError() + except FileNotFoundError: # check cpu/gpu/dram - parts = [ - f for f in os.listdir(os.path.join(RAPL_DIR, package)) - if re.match(self.parts_pattern, f) - ] + parts = [f for f in os.listdir(os.path.join(RAPL_DIR, package)) if re.match(self.parts_pattern, f)] total_power_usage = 0 for part in parts: - total_power_usage += self._read_energy( - os.path.join(RAPL_DIR, package, part)) + total_power_usage += self._read_energy(os.path.join(RAPL_DIR, package, part)) measurements.append(total_power_usage) @@ -76,7 +76,7 @@ def _convert_rapl_name(self, name, pattern): def init(self): # Get amount of intel-rapl folders - packages = list(filter(lambda x: ':' in x, os.listdir(RAPL_DIR))) + packages = list(filter(lambda x: ":" in x, os.listdir(RAPL_DIR))) self.device_count = len(packages) self._devices = [] self._rapl_devices = [] @@ -89,8 +89,7 @@ def init(self): name = f.read().strip() if name != "psys": self._rapl_devices.append(package) - self._devices.append( - self._convert_rapl_name(package, devices_pattern)) + self._devices.append(self._convert_rapl_name(package, devices_pattern)) def shutdown(self): pass diff --git a/carbontracker/components/gpu/nvidia.py b/carbontracker/components/gpu/nvidia.py index 1d386db..19527ee 100644 --- a/carbontracker/components/gpu/nvidia.py +++ b/carbontracker/components/gpu/nvidia.py @@ -10,6 +10,7 @@ import pynvml import os +from carbontracker import exceptions from carbontracker.components.handler import Handler @@ -51,8 +52,7 @@ def power_usage(self): power_usage = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000 gpu_power_usages.append(power_usage) except pynvml.NVMLError: - pass - + raise exceptions.GPUPowerUsageRetrievalError() return gpu_power_usages def init(self): @@ -108,8 +108,8 @@ def _get_handles_by_pid(self): handle = pynvml.nvmlDeviceGetHandleByIndex(index) gpu_pids = [ p.pid - for p in pynvml.nvmlDeviceGetComputeRunningProcesses(handle) + - pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle) + for p in pynvml.nvmlDeviceGetComputeRunningProcesses(handle) + + pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle) ] if set(gpu_pids).intersection(self.pids): diff --git a/carbontracker/constants.py b/carbontracker/constants.py index 0e7dbb0..01caafa 100644 --- a/carbontracker/constants.py +++ b/carbontracker/constants.py @@ -1,7 +1,14 @@ # Power Usage Effectiveness Coefficient. -# Rhonda Ascierto. 2020. Uptime Institute Annual Data Center Survey. -PUE = 1.59 +# Rhonda Ascierto. 2020-2021. Uptime Institute Annual Data Center Survey. +# https://www.missioncriticalmagazine.com/ext/resources/whitepapers/2020/2020AnnualSurvey_EndUser_v4s.pdf +PUE_2020 = 1.59 -# Average carbon intensity for EU-28 in 2017. -# https://www.eea.europa.eu/data-and-maps/data/co2-intensity-of-electricity-generation -EU_28_2017_CARBON_INTENSITY = 294.2060978 +# https://uptimeinstitute.com/2021-data-center-industry-survey-results +PUE_2021 = 1.57 + +# https://uptimeinstitute.com/uptime_assets/6768eca6a75d792c8eeede827d76de0d0380dee6b5ced20fde45787dd3688bfe-2022-data-center-industry-survey-en.pdf +PUE_2022 = 1.55 + +# World-wide average carbon intensity of electricity production in 2019. +# https://www.iea.org/reports/global-energy-co2-status-report-2019/emissions +WORLD_2019_CARBON_INTENSITY = 475 diff --git a/carbontracker/data/carbon-intensities.csv b/carbontracker/data/carbon-intensities.csv new file mode 100644 index 0000000..a6ebe2f --- /dev/null +++ b/carbontracker/data/carbon-intensities.csv @@ -0,0 +1,188 @@ +alpha-2,Entity,Code,Year,Carbon intensity of electricity (gCO2/kWh) +AE,United Arab Emirates,ARE,2020,427.86282 +AF,Afghanistan,AFG,2020,115.38463 +AG,Antigua and Barbuda,ATG,2020,687.5 +AL,Albania,ALB,2020,24.482107 +AM,Armenia,ARM,2021,206.94258 +AO,Angola,AGO,2020,168.86728 +AR,Argentina,ARG,2021,347.29196 +AS,American Samoa,ASM,2020,733.3333 +AT,Austria,AUT,2021,81.25418 +AU,Australia,AUS,2021,486.25497 +AW,Aruba,ABW,2020,579.5454 +AZ,Azerbaijan,AZE,2021,481.51907 +BA,Bosnia and Herzegovina,BIH,2021,478.45804 +BB,Barbados,BRB,2020,670.103 +BD,Bangladesh,BGD,2021,446.66843 +BE,Belgium,BEL,2021,139.7274 +BF,Burkina Faso,BFA,2020,631.25 +BG,Bulgaria,BGR,2021,419.39273 +BH,Bahrain,BHR,2020,489.95312 +BI,Burundi,BDI,2021,312.5 +BJ,Benin,BEN,2020,652.17395 +BR,Brazil,BRA,2021,141.77426 +BS,Bahamas,BHS,2020,698.49243 +BT,Bhutan,BTN,2020,23.463686 +BW,Botswana,BWA,2020,800.0 +BY,Belarus,BLR,2021,443.62558 +BZ,Belize,BLZ,2020,474.57626 +CA,Canada,CAN,2021,118.99668 +CF,Central African Republic,CAF,2020,0.0 +CG,Congo,COG,2020,364.14563 +CH,Switzerland,CHE,2021,57.772644 +CK,Cook Islands,COK,2020,500.0 +CL,Chile,CHL,2021,374.46323 +CM,Cameroon,CMR,2020,243.7071 +CN,China,CHN,2021,541.3317 +CO,Colombia,COL,2020,192.62947 +CR,Costa Rica,CRI,2021,30.903326 +CU,Cuba,CUB,2020,575.9049 +CY,Cyprus,CYP,2021,587.49805 +CZ,Czechia,CZE,2021,412.24548 +DE,Germany,DEU,2021,352.42252 +DJ,Djibouti,DJI,2020,800.0 +DK,Denmark,DNK,2021,149.74605 +DM,Dominica,DMA,2020,500.0 +DO,Dominican Republic,DOM,2020,605.7243 +DZ,Algeria,DZA,2020,449.74878 +EC,Ecuador,ECU,2021,137.54825 +EE,Estonia,EST,2021,739.8695 +EG,Egypt,EGY,2021,389.0191 +EH,Western Sahara,ESH,2009,666.6666 +ER,Eritrea,ERI,2020,659.0909 +ES,Spain,ESP,2021,169.03445 +ET,Ethiopia,ETH,2020,25.441698 +FI,Finland,FIN,2021,68.833595 +FJ,Fiji,FJI,2020,292.92926 +FR,France,FRA,2021,58.4792 +GA,Gabon,GAB,2020,294.91525 +GB,United Kingdom,GBR,2021,264.5091 +GD,Grenada,GRD,2020,700.0 +GE,Georgia,GEO,2021,111.374405 +GF,French Guiana,GUF,2020,350.51547 +GH,Ghana,GHA,2020,355.04724 +GL,Greenland,GRL,2020,118.644066 +GM,Gambia,GMB,2020,689.65515 +GN,Guinea,GIN,2020,175.0 +GP,Guadeloupe,GLP,2020,588.6076 +GQ,Equatorial Guinea,GNQ,2020,628.31854 +GR,Greece,GRC,2021,430.25867 +GT,Guatemala,GTM,2020,332.0158 +GU,Guam,GUM,2020,670.58826 +GW,Guinea-Bissau,GNB,2020,750.0 +GY,Guyana,GUY,2020,636.3636 +HK,Hong Kong,HKG,2020,644.89954 +HN,Honduras,HND,2020,358.64594 +HR,Croatia,HRV,2021,131.26709 +HT,Haiti,HTI,2020,606.383 +HU,Hungary,HUN,2021,195.50255 +ID,Indonesia,IDN,2020,624.6789 +IE,Ireland,IRL,2021,381.08688 +IL,Israel,ISR,2020,527.77277 +IN,India,IND,2021,626.0071 +IQ,Iraq,IRQ,2020,419.7325 +IS,Iceland,ISL,2020,28.75471 +IT,Italy,ITA,2021,222.8387 +JM,Jamaica,JAM,2020,532.3383 +JO,Jordan,JOR,2020,432.20337 +JP,Japan,JPN,2021,416.4962 +KE,Kenya,KEN,2021,112.37928 +KG,Kyrgyzstan,KGZ,2020,91.52752 +KH,Cambodia,KHM,2020,423.52942 +KI,Kiribati,KIR,2020,666.6667 +KM,Comoros,COM,2020,692.3078 +KN,Saint Kitts and Nevis,KNA,2020,666.6667 +KW,Kuwait,KWT,2020,438.1219 +KY,Cayman Islands,CYM,2020,681.1594 +KZ,Kazakhstan,KAZ,2021,654.9424 +LB,Lebanon,LBN,2020,544.89166 +LC,Saint Lucia,LCA,2020,696.9697 +LK,Sri Lanka,LKA,2020,439.22418 +LR,Liberia,LBR,2020,292.13483 +LS,Lesotho,LSO,2020,20.0 +LT,Lithuania,LTU,2021,209.03082 +LU,Luxembourg,LUX,2021,0.0 +LV,Latvia,LVA,2021,171.61119 +LY,Libya,LBY,2020,496.51044 +MA,Morocco,MAR,2020,571.06445 +ME,Montenegro,MNE,2021,350.6849 +MG,Madagascar,MDG,2020,452.8302 +MK,North Macedonia,MKD,2021,349.11407 +ML,Mali,MLI,2020,465.625 +MM,Myanmar,MMR,2020,311.10156 +MN,Mongolia,MNG,2021,725.97406 +MO,Macao,MAC,2020,482.14288 +MQ,Martinique,MTQ,2020,653.5948 +MR,Mauritania,MRT,2020,522.7273 +MS,Montserrat,MSR,2020,1000.0 +MT,Malta,MLT,2021,406.50406 +MU,Mauritius,MUS,2020,613.1387 +MV,Maldives,MDV,2020,701.7544 +MW,Malawi,MWI,2020,113.20756 +MX,Mexico,MEX,2021,373.80746 +MY,Malaysia,MYS,2021,541.00055 +MZ,Mozambique,MOZ,2020,129.77527 +NA,Namibia,NAM,2020,56.603775 +NC,New Caledonia,NCL,2020,640.0 +NE,Niger,NER,2020,675.00006 +NG,Nigeria,NGA,2020,395.2415 +NI,Nicaragua,NIC,2020,343.34766 +NL,Netherlands,NLD,2021,328.90408 +NO,Norway,NOR,2021,25.080723 +NP,Nepal,NPL,2020,22.653723 +NR,Nauru,NRU,2020,750.0 +NZ,New Zealand,NZL,2021,135.98497 +OM,Oman,OMN,2020,440.53098 +PA,Panama,PAN,2020,183.39417 +PE,Peru,PER,2021,233.97925 +PF,French Polynesia,PYF,2020,469.69696 +PG,Papua New Guinea,PNG,2020,563.6793 +PH,Philippines,PHL,2021,544.3023 +PK,Pakistan,PAK,2021,295.77448 +PL,Poland,POL,2021,727.7765 +PM,Saint Pierre and Miquelon,SPM,2020,800.0 +PR,Puerto Rico,PRI,2020,664.7727 +PT,Portugal,PRT,2021,181.10257 +PY,Paraguay,PRY,2020,23.915686 +QA,Qatar,QAT,2020,442.76172 +RO,Romania,ROU,2021,252.84236 +RS,Serbia,SRB,2021,545.552 +RW,Rwanda,RWA,2020,289.15662 +SA,Saudi Arabia,SAU,2021,568.50006 +SB,Solomon Islands,SLB,2020,700.0 +SC,Seychelles,SYC,2020,698.1133 +SD,Sudan,SDN,2020,259.31232 +SE,Sweden,SWE,2021,11.770537 +SG,Singapore,SGP,2021,463.89664 +SI,Slovenia,SVN,2021,250.87906 +SK,Slovakia,SVK,2021,100.85782 +SL,Sierra Leone,SLE,2020,47.61905 +SN,Senegal,SEN,2021,534.4203 +SO,Somalia,SOM,2020,648.6486 +SR,Suriname,SUR,2020,298.7013 +SS,South Sudan,SSD,2020,698.1133 +ST,Sao Tome and Principe,STP,2020,600.0 +SV,El Salvador,SLV,2021,245.827 +SZ,Eswatini,SWZ,2020,203.12498 +TC,Turks and Caicos Islands,TCA,2020,720.00006 +TD,Chad,TCD,2020,678.5714 +TG,Togo,TGO,2020,576.92316 +TH,Thailand,THA,2021,503.15494 +TJ,Tajikistan,TJK,2021,83.28969 +TM,Turkmenistan,TKM,2020,412.32855 +TN,Tunisia,TUN,2021,470.38147 +TO,Tonga,TON,2020,666.6667 +TR,Turkey,TUR,2021,429.6388 +TT,Trinidad and Tobago,TTO,2020,497.9688 +UA,Ukraine,UKR,2021,240.50241 +UG,Uganda,UGA,2020,77.0878 +US,United States,USA,2021,357.2021 +UY,Uruguay,URY,2021,152.38716 +UZ,Uzbekistan,UZB,2020,426.88644 +VC,Saint Vincent and the Grenadines,VCT,2020,533.3333 +VU,Vanuatu,VUT,2020,571.4286 +WS,Samoa,WSM,2020,500.0 +YE,Yemen,YEM,2020,538.46155 +ZA,South Africa,ZAF,2021,664.73755 +ZM,Zambia,ZMB,2020,120.77597 +ZW,Zimbabwe,ZWE,2020,279.0279 diff --git a/carbontracker/emissions/conversion/co2eq.csv b/carbontracker/emissions/conversion/co2eq.csv index bdba583..9b5d4f0 100644 --- a/carbontracker/emissions/conversion/co2eq.csv +++ b/carbontracker/emissions/conversion/co2eq.csv @@ -1,2 +1,3 @@ name,gCO2eq_per_unit,unit,lowerbound,upperbound,source -eu_new_car_2018,120.4,km travelled by car,0,inf,https://www.eea.europa.eu/data-and-maps/indicators/average-co2-emissions-from-motor-vehicles/assessment-1 \ No newline at end of file +eu_new_car_2018,120.4,km travelled by car,0,inf,https://www.eea.europa.eu/data-and-maps/indicators/average-co2-emissions-from-motor-vehicles/assessment-1 +eu_new_car_2020,107.5,km travelled by car,0,inf,https://www.eea.europa.eu/ims/co2-performance-of-new-passenger \ No newline at end of file diff --git a/carbontracker/emissions/conversion/co2eq.py b/carbontracker/emissions/conversion/co2eq.py index 31dbf65..eadb60b 100644 --- a/carbontracker/emissions/conversion/co2eq.py +++ b/carbontracker/emissions/conversion/co2eq.py @@ -9,9 +9,8 @@ def convert(g_co2eq): """Converts gCO2eq to all units in range specified by CONVERSION_FILE.""" conversions = [] - df = CONVERSION_DF - converters = df.loc[(df["lowerbound"] <= g_co2eq) - & (df["upperbound"] >= g_co2eq)] + df = CONVERSION_DF.iloc[-1:] # Use latest conversion factors + converters = df.loc[(df["lowerbound"] <= g_co2eq) & (df["upperbound"] >= g_co2eq)] for _, converter in converters.iterrows(): units = g_co2eq / converter["gCO2eq_per_unit"] conversions.append((units, converter["unit"])) diff --git a/carbontracker/emissions/intensity/fetcher.py b/carbontracker/emissions/intensity/fetcher.py index 444ffcd..a0e085b 100644 --- a/carbontracker/emissions/intensity/fetcher.py +++ b/carbontracker/emissions/intensity/fetcher.py @@ -4,6 +4,8 @@ Information about the geocoder object g_location available here: https://geocoder.readthedocs.io """ + + class IntensityFetcher: __metaclass__ = ABCMeta @@ -15,7 +17,7 @@ def suitable(self, g_location): @abstractmethod def carbon_intensity(self, g_location, time_dur=None): """ - Returns the carbon intensity by location and duration (s). - If the API supports predicted intensities time_dur can be used. + Returns the carbon intensity by location and duration (s). + If the API supports predicted intensities time_dur can be used. """ raise NotImplementedError diff --git a/carbontracker/emissions/intensity/fetchers/carbonintensitygb.py b/carbontracker/emissions/intensity/fetchers/carbonintensitygb.py index 5cb2c35..d667135 100644 --- a/carbontracker/emissions/intensity/fetchers/carbonintensitygb.py +++ b/carbontracker/emissions/intensity/fetchers/carbonintensitygb.py @@ -22,8 +22,7 @@ def carbon_intensity(self, g_location, time_dur=None): try: postcode = g_location.postal - ci = self._carbon_intensity_gb_regional(postcode, - time_dur=time_dur) + ci = self._carbon_intensity_gb_regional(postcode, time_dur=time_dur) except: ci = self._carbon_intensity_gb_national(time_dur=time_dur) @@ -32,7 +31,7 @@ def carbon_intensity(self, g_location, time_dur=None): return carbon_intensity def _carbon_intensity_gb_regional(self, postcode, time_dur=None): - """"Retrieves forecasted carbon intensity (gCO2eq/kWh) in GB by + """ "Retrieves forecasted carbon intensity (gCO2eq/kWh) in GB by postcode.""" url = f"{API_URL}/regional" @@ -58,8 +57,7 @@ def _carbon_intensity_gb_regional(self, postcode, time_dur=None): return carbon_intensity def _carbon_intensity_gb_national(self, time_dur=None): - """Retrieves forecasted national carbon intensity (gCO2eq/kWh) in GB. - """ + """Retrieves forecasted national carbon intensity (gCO2eq/kWh) in GB.""" url = f"{API_URL}/intensity" if time_dur is not None: diff --git a/carbontracker/emissions/intensity/fetchers/co2signal.py b/carbontracker/emissions/intensity/fetchers/co2signal.py index 7385837..47c4789 100644 --- a/carbontracker/emissions/intensity/fetchers/co2signal.py +++ b/carbontracker/emissions/intensity/fetchers/co2signal.py @@ -16,20 +16,15 @@ def carbon_intensity(self, g_location, time_dur=None): carbon_intensity = intensity.CarbonIntensity(g_location=g_location) try: - ci = self._carbon_intensity_by_location(lon=g_location.lng, - lat=g_location.lat) + ci = self._carbon_intensity_by_location(lon=g_location.lng, lat=g_location.lat) except: - ci = self._carbon_intensity_by_location( - country_code=g_location.country) + ci = self._carbon_intensity_by_location(country_code=g_location.country) carbon_intensity.carbon_intensity = ci return carbon_intensity - def _carbon_intensity_by_location(self, - lon=None, - lat=None, - country_code=None): + def _carbon_intensity_by_location(self, lon=None, lat=None, country_code=None): """Retrieves carbon intensity (gCO2eq/kWh) by location. Note: @@ -48,11 +43,11 @@ def _carbon_intensity_by_location(self, expected unit. """ if country_code is not None: - params = (("countryCode", country_code), ) - assert (lon is None and lat is None) + params = (("countryCode", country_code),) + assert lon is None and lat is None elif lon is not None and lat is not None: params = (("lon", lon), ("lat", lat)) - assert (country_code is None) + assert country_code is None headers = {"auth-token": AUTH_TOKEN} @@ -63,8 +58,6 @@ def _carbon_intensity_by_location(self, unit = response["units"]["carbonIntensity"] expected_unit = "gCO2eq/kWh" if unit != expected_unit: - raise exceptions.UnitError( - expected_unit, unit, - "Carbon intensity query returned the wrong unit.") + raise exceptions.UnitError(expected_unit, unit, "Carbon intensity query returned the wrong unit.") return carbon_intensity diff --git a/carbontracker/emissions/intensity/fetchers/energidataservice.py b/carbontracker/emissions/intensity/fetchers/energidataservice.py index 8f0ef6f..755f4d7 100644 --- a/carbontracker/emissions/intensity/fetchers/energidataservice.py +++ b/carbontracker/emissions/intensity/fetchers/energidataservice.py @@ -2,6 +2,7 @@ import requests import numpy as np +import time from carbontracker import exceptions from carbontracker.emissions.intensity.fetcher import IntensityFetcher @@ -14,7 +15,6 @@ def suitable(self, g_location): def carbon_intensity(self, g_location, time_dur=None): carbon_intensity = intensity.CarbonIntensity(g_location=g_location) - if time_dur is None: ci = self._emission_current() else: @@ -27,10 +27,7 @@ def carbon_intensity(self, g_location, time_dur=None): def _emission_current(self): def url_creator(area): - return ("https://api.energidataservice.dk/datastore_search_sql?" - """sql=SELECT co2."CO2Emission" from "co2emis" as co2 """ - f"""WHERE co2."PriceArea" = '{area}' ORDER BY """ - """co2."Minutes5UTC" DESC LIMIT 1""") + return 'https://api.energidataservice.dk/dataset/CO2emis?filter={"PriceArea":"' + area + '"}' areas = ["DK1", "DK2"] carbon_intensities = [] @@ -40,22 +37,16 @@ def url_creator(area): response = requests.get(url) if not response.ok: raise exceptions.CarbonIntensityFetcherError(response.json()) - carbon_intensities.append( - response.json()["result"]["records"][0]["CO2Emission"]) - + carbon_intensities.append(response.json()["records"][0]["CO2Emission"]) return np.mean(carbon_intensities) def _emission_prognosis(self, time_dur): from_str, to_str = self._interval(time_dur=time_dur) - url = ("https://api.energidataservice.dk/datastore_search_sql?" - """sql=SELECT co2."CO2Emission" from "co2emisprog" as co2 """ - f"""WHERE co2."Minutes5UTC" > timestamp'{from_str}' AND """ - f"""co2."Minutes5UTC" < timestamp'{to_str}' """ - """ORDER BY co2."Minutes5UTC" DESC""") + url = "https://api.energidataservice.dk/dataset/CO2Emis?start={" + from_str + "&end={" + to_str + "}&limit=4" response = requests.get(url) if not response.ok: raise exceptions.CarbonIntensityFetcherError(response.json()) - data = response.json()["result"]["records"] + data = response.json()["records"] carbon_intensities = [record["CO2Emission"] for record in data] return np.mean(carbon_intensities) @@ -69,7 +60,6 @@ def _interval(self, time_dur): def _nearest_5_min(self, time): date_format = "%Y-%m-%d %H:%M" nearest_5_min = time - datetime.timedelta( - minutes=time.minute % 5, - seconds=time.second, - microseconds=time.microsecond) + minutes=time.minute % 5, seconds=time.second, microseconds=time.microsecond + ) return nearest_5_min.strftime(date_format) diff --git a/carbontracker/emissions/intensity/intensity.py b/carbontracker/emissions/intensity/intensity.py index 30fdfd3..62d9bd7 100644 --- a/carbontracker/emissions/intensity/intensity.py +++ b/carbontracker/emissions/intensity/intensity.py @@ -1,8 +1,9 @@ import traceback import geocoder - +import pkg_resources import numpy as np +import pandas as pd from carbontracker import loggerutil from carbontracker import exceptions @@ -12,15 +13,61 @@ from carbontracker.emissions.intensity.fetchers import energidataservice +def get_default_intensity(): + """Retrieve static default carbon intensity value based on location.""" + try: + g_location = geocoder.ip("me") + if not g_location.ok: + raise exceptions.IPLocationError( + "Failed to retrieve location based on IP." + ) + address = g_location.address + country = g_location.country + except Exception as err: + address = "Unknown" + country = "Unknown" + + try: + carbon_intensities_df = pd.read_csv( + pkg_resources.resource_filename( + "carbontracker", "data/carbon-intensities.csv" + ) + ) + intensity_row = carbon_intensities_df[ + carbon_intensities_df["alpha-2"] == country + ].iloc[0] + intensity = intensity_row["Carbon intensity of electricity (gCO2/kWh)"] + year = intensity_row["Year"] + description = f"Defaulted to average carbon intensity for {country} in {year} of {intensity:.2f} gCO2/kWh." + except Exception as err: + intensity = constants.WORLD_2019_CARBON_INTENSITY + description = f"Defaulted to average carbon intensity for world in 2019 of {intensity:.2f} gCO2/kWh." + + description = ( + f"Live carbon intensity could not be fetched at detected location: {address}. " + + description + ) + default_intensity = { + "carbon_intensity": intensity, + "description": description, + } + return default_intensity + + +default_intensity = get_default_intensity() + + class CarbonIntensity: - def __init__(self, - carbon_intensity=None, - g_location=None, - address="UNDETECTED", - message=None, - success=False, - is_prediction=False, - default=False): + def __init__( + self, + carbon_intensity=None, + g_location=None, + address="UNDETECTED", + message=None, + success=False, + is_prediction=False, + default=False, + ): self.carbon_intensity = carbon_intensity self.g_location = g_location self.address = address @@ -28,10 +75,17 @@ def __init__(self, self.success = success self.is_prediction = is_prediction if default: - self._set_as_default() + self.set_as_default() + + def set_as_default(self): + self.set_default_intensity() + self.set_default_message() - def _set_as_default(self): - self.carbon_intensity = constants.EU_28_2017_CARBON_INTENSITY + def set_default_intensity(self): + self.carbon_intensity = default_intensity["carbon_intensity"] + + def set_default_message(self): + self.message = default_intensity["description"] def carbon_intensity(logger, time_dur=None): @@ -39,7 +93,7 @@ def carbon_intensity(logger, time_dur=None): fetchers = [ energidataservice.EnergiDataService(), carbonintensitygb.CarbonIntensityGB(), - co2signal.CO2Signal() + co2signal.CO2Signal(), ] carbon_intensity = CarbonIntensity(default=True) @@ -48,7 +102,8 @@ def carbon_intensity(logger, time_dur=None): g_location = geocoder.ip("me") if not g_location.ok: raise exceptions.IPLocationError( - "Failed to retrieve location based on IP.") + "Failed to retrieve location based on IP." + ) carbon_intensity.address = g_location.address except: err_str = traceback.format_exc() @@ -78,20 +133,18 @@ def set_carbon_intensity_message(ci, time_dur): ci.message = ( "Carbon intensity for the next " f"{loggerutil.convert_to_timestring(time_dur)} is " - f"predicted to be {ci.carbon_intensity:.2f} gCO2/kWh") + f"predicted to be {ci.carbon_intensity:.2f} gCO2/kWh" + ) else: - ci.message = ("Failed to predict carbon intensity for the next " - f"{loggerutil.convert_to_timestring(time_dur)}, " - f"fallback on average measured intensity") - else: - if ci.success: ci.message = ( - f"Current carbon intensity is {ci.carbon_intensity:.2f} gCO2/kWh" + "Failed to predict carbon intensity for the next " + f"{loggerutil.convert_to_timestring(time_dur)}, " + f"fallback on average measured intensity" ) + else: + if ci.success: + ci.message = f"Current carbon intensity is {ci.carbon_intensity:.2f} gCO2/kWh" else: - ci.message = ( - f"Live carbon intensity could not be fetched at detected location: {ci.address}. " - "Defaulted to average carbon intensity for EU-28 in 2017 of " - f"{constants.EU_28_2017_CARBON_INTENSITY:.2f} gCO2/kWh.") + ci.set_default_message() return ci.message += f" at detected location: {ci.address}." diff --git a/carbontracker/exceptions.py b/carbontracker/exceptions.py index 633649a..ce724c4 100644 --- a/carbontracker/exceptions.py +++ b/carbontracker/exceptions.py @@ -1,21 +1,37 @@ class NoComponentsAvailableError(Exception): def __init__( - self, - msg=("No components were available. CarbonTracker supports Intel " - "CPUs with the RAPL interface and NVIDIA GPUs."), - *args, - **kwargs): + self, + msg=( + "No components were available. CarbonTracker supports Intel " + "CPUs with the RAPL interface and NVIDIA GPUs." + ), + *args, + **kwargs + ): super().__init__(msg, *args, **kwargs) class UnitError(Exception): """Raised when the expected unit does not match the received unit.""" + def __init__(self, expected_unit, received_unit, message): self.expected_unit = expected_unit self.received_unit = received_unit self.message = message +class IntelRaplPermissionError(Exception): + """Raised when an Intel RAPL permission error occurs.""" + + pass + + +class GPUPowerUsageRetrievalError(Exception): + """Raised when a GPU power usage retrieval error occurs.""" + + pass + + class CarbonIntensityFetcherError(Exception): pass diff --git a/carbontracker/loggerutil.py b/carbontracker/loggerutil.py index 89a1c51..4940dfd 100644 --- a/carbontracker/loggerutil.py +++ b/carbontracker/loggerutil.py @@ -3,6 +3,7 @@ import sys import pathlib import datetime +import importlib_metadata as metadata from carbontracker import constants @@ -42,15 +43,22 @@ def formatTime(self, record, datefmt=None): class Logger: - def __init__(self, log_dir=None, verbose=0): - self.logger, self.logger_output, self.logger_err = self._setup( - log_dir=log_dir) + def __init__(self, log_dir=None, verbose=0, log_prefix=""): + self.logger, self.logger_output, self.logger_err = self._setup(log_dir=log_dir, log_prefix=log_prefix) self._log_initial_info() self.verbose = verbose self.msg_prepend = "CarbonTracker: " - def _setup(self, log_dir=None): - logger = logging.getLogger("carbontracker") + def _setup(self, log_dir=None, log_prefix=""): + # Set up logging such that we don't get duplicate messages. + # Get unique logger name. This is needed because we might have multiple instances of carbontracker running. + # Add log_prefix if provided. + if log_prefix: + log_prefix += "_" + + logger_name = f"{log_prefix}{os.getpid()}" + logger = logging.getLogger(logger_name) + logger_err = logging.getLogger("carbontracker.err") logger_output = logging.getLogger("carbontracker.output") # Disable output logging from propagating to parent loggers. @@ -70,8 +78,7 @@ def _setup(self, log_dir=None): # Add error logging to console. ce = logging.StreamHandler(stream=sys.stdout) - ce_formatter = logging.Formatter( - "CarbonTracker: {levelname} - {message}", style="{") + ce_formatter = logging.Formatter("CarbonTracker: {levelname} - {message}", style="{") ce.setLevel(logging.INFO) ce.setFormatter(ce_formatter) logger_err.addHandler(ce) @@ -88,24 +95,20 @@ def _setup(self, log_dir=None): f_formatter = TrackerFormatter(fmt="%(asctime)s - %(message)s") # Add output logging to file. - fh = logging.FileHandler( - f"{log_dir}/{date}_carbontracker_output.log") + fh = logging.FileHandler(f"{log_dir}/{logger_name}_{date}_carbontracker_output.log") fh.setLevel(logging.INFO) fh.setFormatter(f_formatter) logger_output.addHandler(fh) # Add standard logging to file. - f = logging.FileHandler(f"{log_dir}/{date}_carbontracker.log") + f = logging.FileHandler(f"{log_dir}/{logger_name}_{date}_carbontracker.log") f.setLevel(logging.DEBUG) f.setFormatter(f_formatter) logger.addHandler(f) # Add error logging to file. - err_formatter = logging.Formatter( - "{asctime} - {threadName} - {levelname} - {message}", - style="{") - f_err = logging.FileHandler( - f"{log_dir}/{date}_carbontracker_err.log", delay=True) + err_formatter = logging.Formatter("{asctime} - {threadName} - {levelname} - {message}", style="{") + f_err = logging.FileHandler(f"{log_dir}/{logger_name}_{date}_carbontracker_err.log", delay=True) f_err.setLevel(logging.DEBUG) f_err.setFormatter(err_formatter) logger_err.addHandler(f_err) @@ -113,15 +116,12 @@ def _setup(self, log_dir=None): return logger, logger_output, logger_err def _log_initial_info(self): - here = os.path.abspath(os.path.dirname(__file__)) - about = {} - with open(os.path.join(here, "__version__.py")) as f: - exec(f.read(), about) - self.info(f"{about['__title__']} version {about['__version__']}") + self.info(f"{__package__} version {metadata.version(__package__)}") self.info( "Only predicted and actual consumptions are multiplied by a PUE " - f"coefficient of {constants.PUE} (Rhonda Ascierto, 2019, Uptime " - "Institute Global Data Center Survey).") + f"coefficient of {constants.PUE_2022} (Rhonda Ascierto, 2022, Uptime " + "Institute Global Data Center Survey)." + ) def output(self, msg, verbose_level=0): if self.verbose >= verbose_level: diff --git a/carbontracker/parser.py b/carbontracker/parser.py index 55fdb8d..b1050a7 100644 --- a/carbontracker/parser.py +++ b/carbontracker/parser.py @@ -25,7 +25,7 @@ def parse_all_logs(log_dir): "components": parse_logs(log_dir, std, out), "early_stop": early_stop, "actual": actual, - "pred": pred + "pred": pred, } logs.append(entry) @@ -46,10 +46,8 @@ def parse_logs(log_dir, std_log_file=None, output_log_file=None): components = {} for comp, devices in devices.items(): - power_usages = np.array( - avg_power_usages[comp]) if len(avg_power_usages) != 0 else None - durations = np.array( - epoch_durations) if len(epoch_durations) != 0 else None + power_usages = np.array(avg_power_usages[comp]) if len(avg_power_usages) != 0 else None + durations = np.array(epoch_durations) if len(epoch_durations) != 0 else None if power_usages is None or durations is None: energy_usages = None else: @@ -58,7 +56,7 @@ def parse_logs(log_dir, std_log_file=None, output_log_file=None): "avg_power_usages (W)": power_usages, "avg_energy_usages (J)": energy_usages, "epoch_durations (s)": durations, - "devices": devices + "devices": devices, } components[comp] = measurements @@ -97,7 +95,7 @@ def extract_measurements(match): "duration (s)": duration, "energy (kWh)": energy, "co2eq (g)": co2eq, - "equivalents": equivalents + "equivalents": equivalents, } return measurements @@ -108,8 +106,7 @@ def get_time(time_str): if not match: return None match = match.groups() - duration = float(match[0]) * 60 * 60 + float(match[1]) * 60 + float( - match[2]) + duration = float(match[0]) * 60 * 60 + float(match[1]) * 60 + float(match[2]) return duration @@ -117,8 +114,7 @@ def print_aggregate(log_dir): """Prints the aggregate consumption in all log files in log_dir.""" energy, co2eq, equivalents = aggregate_consumption(log_dir) - equivalents_p = " or ".join( - [f"{v:.3f} {k}" for k, v in equivalents.items()]) + equivalents_p = " or ".join([f"{v:.3f} {k}" for k, v in equivalents.items()]) printable = f"The training of models in this work is estimated to use {energy:.3f} kWh of electricity contributing to {co2eq / 1000:.3f} kg of CO2eq. " if equivalents_p: @@ -201,9 +197,9 @@ def parse_equivalents(lines): def get_all_logs(log_dir): """Get all output and standard logs in log_dir.""" files = [ - os.path.join(log_dir, f) for f in os.listdir(log_dir) - if os.path.isfile(os.path.join(log_dir, f)) - and os.path.getsize(os.path.join(log_dir, f)) > 0 + os.path.join(log_dir, f) + for f in os.listdir(log_dir) + if os.path.isfile(os.path.join(log_dir, f)) and os.path.getsize(os.path.join(log_dir, f)) > 0 ] output_re = re.compile(r".*carbontracker_output.log") std_re = re.compile(r".*carbontracker.log") @@ -211,19 +207,20 @@ def get_all_logs(log_dir): std_logs = sorted(list(filter(std_re.match, files))) if len(output_logs) != len(std_logs): # Try to remove the files with no matching output/std logs - op_fn = [f.split('_')[0] for f in output_logs] - std_fn = [f.split('_')[0] for f in std_logs] + op_fn = [f.split("_")[0] for f in output_logs] + std_fn = [f.split("_")[0] for f in std_logs] if len(std_logs) > len(output_logs): missing_logs = list(set(std_fn) - set(op_fn)) - [std_logs.remove(f+'_carbontracker.log') for f in missing_logs] + [std_logs.remove(f + "_carbontracker.log") for f in missing_logs] else: missing_logs = list(set(op_fn) - set(std_fn)) - [output_logs.remove(f+'carbontracker_output.log') for f in missing_logs] + [output_logs.remove(f + "carbontracker_output.log") for f in missing_logs] ### Even after removel if then there is a mismatch, then throw the error if len(output_logs) != len(std_logs): raise exceptions.MismatchedLogFilesError( f"Found {len(output_logs)} output logs and {len(std_logs)} " - "standard logs. Expected equal number of logs.") + "standard logs. Expected equal number of logs." + ) return output_logs, std_logs @@ -239,7 +236,7 @@ def get_devices(std_log_data): devices = {} for comp, device_str in device_matches: - dev = device_str.split(',') + dev = device_str.split(",") devices[comp.lower()] = dev return devices @@ -249,9 +246,7 @@ def get_epoch_durations(std_log_data): """Retrieve epoch durations (s).""" duration_re = re.compile(r"Duration: (\d+):(\d{2}):(\d\d?(?:.\d{2})?)") matches = re.findall(duration_re, std_log_data) - epoch_durations = [ - float(h) * 60 * 60 + float(m) * 60 + float(s) for h, m, s in matches - ] + epoch_durations = [float(h) * 60 * 60 + float(m) * 60 + float(s) for h, m, s in matches] return epoch_durations @@ -280,10 +275,7 @@ def get_avg_power_usages(std_log_data): def get_most_recent_logs(log_dir): """Retrieve the file names of the most recent standard and output logs.""" # Get all files in log_dir. - files = [ - os.path.join(log_dir, f) for f in os.listdir(log_dir) - if os.path.isfile(os.path.join(log_dir, f)) - ] + files = [os.path.join(log_dir, f) for f in os.listdir(log_dir) if os.path.isfile(os.path.join(log_dir, f))] # Find output and standard logs and sort by modified date. output_re = re.compile(r".*carbontracker_output.log") std_re = re.compile(r".*carbontracker.log") diff --git a/carbontracker/tracker.py b/carbontracker/tracker.py index cf1a392..8b4a69b 100644 --- a/carbontracker/tracker.py +++ b/carbontracker/tracker.py @@ -20,6 +20,7 @@ class CarbonIntensityThread(Thread): """Sleeper thread to update Carbon Intensity every 15 minutes.""" + def __init__(self, logger, stop_event, update_interval=900): super(CarbonIntensityThread, self).__init__() self.name = "CarbonIntensityThread" @@ -42,16 +43,12 @@ def run(self): def _fetch_carbon_intensity(self): ci = intensity.carbon_intensity(self.logger) - if ci.success and isinstance( - ci.carbon_intensity, - (int, float)) and not np.isnan(ci.carbon_intensity): + if ci.success and isinstance(ci.carbon_intensity, (int, float)) and not np.isnan(ci.carbon_intensity): self.carbon_intensities.append(ci) def predict_carbon_intensity(self, pred_time_dur): ci = intensity.carbon_intensity(self.logger, time_dur=pred_time_dur) - weighted_intensities = [ - ci.carbon_intensity for ci in self.carbon_intensities - ] + [ci.carbon_intensity] + weighted_intensities = [ci.carbon_intensity for ci in self.carbon_intensities] + [ci.carbon_intensity] # Account for measured intensities by taking weighted average. weight = math.floor(pred_time_dur / self.update_interval) @@ -73,22 +70,22 @@ def average_carbon_intensity(self): self.carbon_intensities.append(ci) # Ensure that we have some carbon intensities. - assert (self.carbon_intensities) + assert self.carbon_intensities location = self.carbon_intensities[-1].address intensities = [ci.carbon_intensity for ci in self.carbon_intensities] avg_intensity = np.mean(intensities) msg = ( f"Average carbon intensity during training was {avg_intensity:.2f}" - f" gCO2/kWh at detected location: {location}.") - avg_ci = intensity.CarbonIntensity(carbon_intensity=avg_intensity, - message=msg, - success=True) + f" gCO2/kWh at detected location: {location}." + ) + avg_ci = intensity.CarbonIntensity(carbon_intensity=avg_intensity, message=msg, success=True) self.logger.info( "Carbon intensities (gCO2/kWh) fetched every " f"{self.update_interval} s at detected location {location}: " - f"{intensities}") + f"{intensities}" + ) self.logger.info(avg_ci.message) self.logger.output(avg_ci.message, verbose_level=2) @@ -97,12 +94,8 @@ def average_carbon_intensity(self): class CarbonTrackerThread(Thread): """Thread to fetch consumptions""" - def __init__(self, - components, - logger, - ignore_errors, - delete, - update_interval=10): + + def __init__(self, components, logger, ignore_errors, delete, update_interval=10): super(CarbonTrackerThread, self).__init__() self.name = "CarbonTrackerThread" self.delete = delete @@ -172,8 +165,7 @@ def _log_components_info(self): def _log_epoch_measurements(self): self.logger.info(f"Epoch {self.epoch_counter}:") duration = self.epoch_times[-1] - self.logger.info( - f"Duration: {loggerutil.convert_to_timestring(duration, True)}") + self.logger.info(f"Duration: {loggerutil.convert_to_timestring(duration, True)}") for comp in self.components: if comp.power_usages and comp.power_usages[-1]: power_avg = np.mean(comp.power_usages[-1], axis=0) @@ -182,17 +174,12 @@ def _log_epoch_measurements(self): # previous measurement. # TODO: Use semaphores to wait for measurement to finish. if np.isnan(power_avg).all(): - power_avg = np.mean( - comp.power_usages[-2], - axis=0) if len(comp.power_usages) >= 2 else None + power_avg = np.mean(comp.power_usages[-2], axis=0) if len(comp.power_usages) >= 2 else None else: - self.logger.err_warn( - "Epoch duration is too short for a measurement to be " - "collected.") + self.logger.err_warn("Epoch duration is too short for a measurement to be " "collected.") power_avg = None - self.logger.info( - f"Average power usage (W) for {comp.name}: {power_avg}") + self.logger.info(f"Average power usage (W) for {comp.name}: {power_avg}") def _components_remove_unavailable(self): self.components = [cmp for cmp in self.components if cmp.available()] @@ -219,13 +206,12 @@ def total_energy_per_epoch(self): for comp in self.components: energy_usage = comp.energy_usage(self.epoch_times) total_energy += energy_usage - return total_energy * constants.PUE + return total_energy * constants.PUE_2022 def _handle_error(self, error): err_str = traceback.format_exc() if self.ignore_errors: - err_str = (f"Ignored error: {err_str}Continued training without " - "monitoring...") + err_str = f"Ignored error: {err_str}Continued training without " "monitoring..." self.logger.err_critical(err_str) self.logger.output(err_str) @@ -238,29 +224,30 @@ def _handle_error(self, error): class CarbonTracker: - def __init__(self, - epochs, - epochs_before_pred=1, - monitor_epochs=1, - update_interval=10, - interpretable=True, - stop_and_confirm=False, - ignore_errors=False, - components="all", - devices_by_pid=False, - log_dir=None, - verbose=1, - decimal_precision=6): + def __init__( + self, + epochs, + epochs_before_pred=1, + monitor_epochs=1, + update_interval=10, + interpretable=True, + stop_and_confirm=False, + ignore_errors=False, + components="all", + devices_by_pid=False, + log_dir=None, + log_file_prefix="", + verbose=1, + decimal_precision=6, + ): self.epochs = epochs - self.epochs_before_pred = (epochs if epochs_before_pred < 0 else - epochs_before_pred) - self.monitor_epochs = (epochs - if monitor_epochs < 0 else monitor_epochs) - if (self.monitor_epochs == 0 - or self.monitor_epochs < self.epochs_before_pred): + self.epochs_before_pred = epochs if epochs_before_pred < 0 else epochs_before_pred + self.monitor_epochs = epochs if monitor_epochs < 0 else monitor_epochs + if self.monitor_epochs == 0 or self.monitor_epochs < self.epochs_before_pred: raise ValueError( "Argument monitor_epochs expected a value in " - f"{{-1, >0, >=epochs_before_pred}}, got {monitor_epochs}.") + f"{{-1, >0, >=epochs_before_pred}}, got {monitor_epochs}." + ) self.interpretable = interpretable self.stop_and_confirm = stop_and_confirm self.ignore_errors = ignore_errors @@ -270,19 +257,16 @@ def __init__(self, try: pids = self._get_pids() - self.logger = loggerutil.Logger(log_dir=log_dir, verbose=verbose) + self.logger = loggerutil.Logger(log_dir=log_dir, verbose=verbose, log_prefix=log_file_prefix) self.tracker = CarbonTrackerThread( delete=self._delete, - components=component.create_components( - components=components, - pids=pids, - devices_by_pid=devices_by_pid), + components=component.create_components(components=components, pids=pids, devices_by_pid=devices_by_pid), logger=self.logger, ignore_errors=ignore_errors, - update_interval=update_interval) + update_interval=update_interval, + ) self.intensity_stopper = Event() - self.intensity_updater = CarbonIntensityThread( - self.logger, self.intensity_stopper) + self.intensity_updater = CarbonIntensityThread(self.logger, self.intensity_stopper) except Exception as e: self._handle_error(e) @@ -321,9 +305,7 @@ def stop(self): stopping, where not all monitor_epochs have been run.""" if self.deleted: return - self.logger.info( - f"Training was interrupted before all {self.monitor_epochs} epochs" - " were monitored.") + self.logger.info(f"Training was interrupted before all {self.monitor_epochs} epochs" " were monitored.") # Decrement epoch_counter with 1 since measurements for ultimate epoch # was interrupted and is not accounted for. self.epoch_counter -= 1 @@ -337,16 +319,14 @@ def set_api_keys(self, api_dict): if name.lower() == "co2signal": co2signal.AUTH_TOKEN = key else: - raise exceptions.FetcherNameError( - f"Invalid API name '{name}' given.") + raise exceptions.FetcherNameError(f"Invalid API name '{name}' given.") except Exception as e: self._handle_error(e) def _handle_error(self, error): err_str = traceback.format_exc() if self.ignore_errors: - err_str = (f"Ignored error: {err_str}Continued training without " - "monitoring...") + err_str = f"Ignored error: {err_str}Continued training without " "monitoring..." self.logger.err_critical(err_str) self.logger.output(err_str) @@ -359,10 +339,12 @@ def _handle_error(self, error): def _output_energy(self, description, time, energy, co2eq, conversions): precision = self.decimal_precision - output = (f"\n{description}\n" - f"\tTime:\t{loggerutil.convert_to_timestring(time)}\n" - f"\tEnergy:\t{energy:.{precision}f} kWh\n" - f"\tCO2eq:\t{co2eq:.{precision}f} g") + output = ( + f"\n{description}\n" + f"\tTime:\t{loggerutil.convert_to_timestring(time)}\n" + f"\tEnergy:\t{energy:.{precision}f} kWh\n" + f"\tCO2eq:\t{co2eq:.{precision}f} g" + ) if conversions: conv_str = "\n\tThis is equivalent to:" @@ -381,26 +363,23 @@ def _output_actual(self): _co2eq = self._co2eq(energy) conversions = co2eq.convert(_co2eq) if self.interpretable else None - self._output_energy( - f"Actual consumption for {self.epoch_counter} epoch(s):", time, - energy, _co2eq, conversions) + self._output_energy(f"Actual consumption for {self.epoch_counter} epoch(s):", time, energy, _co2eq, conversions) def _output_pred(self): """Output predicted usage for full training epochs.""" epoch_energy_usages = self.tracker.total_energy_per_epoch() epoch_times = self.tracker.epoch_times - pred_energy = predictor.predict_energy(self.epochs, - epoch_energy_usages) + pred_energy = predictor.predict_energy(self.epochs, epoch_energy_usages) pred_time = predictor.predict_time(self.epochs, epoch_times) pred_co2eq = self._co2eq(pred_energy, pred_time) conversions = co2eq.convert(pred_co2eq) if self.interpretable else None self._output_energy( - f"Predicted consumption for {self.epochs} epoch(s):", pred_time, - pred_energy, pred_co2eq, conversions) + f"Predicted consumption for {self.epochs} epoch(s):", pred_time, pred_energy, pred_co2eq, conversions + ) def _co2eq(self, energy_usage, pred_time_dur=None): - """"Returns the CO2eq (g) of the energy usage (kWh).""" + """ "Returns the CO2eq (g) of the energy usage (kWh).""" if pred_time_dur: ci = self.intensity_updater.predict_carbon_intensity(pred_time_dur) else: @@ -438,6 +417,5 @@ def _delete(self): def _get_pids(self): """Get current process id and all children process ids.""" process = psutil.Process() - pids = [process.pid - ] + [child.pid for child in process.children(recursive=True)] + pids = [process.pid] + [child.pid for child in process.children(recursive=True)] return pids diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b116e99 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[build-system] +requires = ["setuptools>=61.0", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "carbontracker" +description = "Tracking and predicting the carbon footprint of training deep learning models." +readme = "README.md" +requires-python = ">=3.7" +license = { file = "LICENSE" } +authors = [ + { name = "Lasse F. Wolff Anthony", email = "lfwa@proton.me" }, + { name = "Benjamin Kanding" }, +] +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", +] +dependencies = ["requests", "numpy", "pandas", "geocoder", "pynvml", "psutil", "importlib-metadata"] +dynamic = ["version"] + +[project.urls] +homepage = "https://github.com/lfwa/carbontracker" +repository = "https://github.com/lfwa/carbontracker" + +[tool.setuptools_scm] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d6e1198..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e . diff --git a/scripts/create_carbon_intensity_csv.py b/scripts/create_carbon_intensity_csv.py new file mode 100644 index 0000000..488b99c --- /dev/null +++ b/scripts/create_carbon_intensity_csv.py @@ -0,0 +1,45 @@ +"""Script to create a short csv file with carbon intensity data. + +Uses csv from https://ourworldindata.org/grapher/carbon-intensity-electricity. +""" +import argparse + +import pycountry +import pandas as pd + + +def country_to_alpha2(name): + try: + return pycountry.countries.get(name=name).alpha_2 + except Exception as err: + return None + + +def main(args): + intensity_df = pd.read_csv(args.input_csv) + intensity_df.sort_values(by="Year", inplace=True, ascending=False) + + # Add ISO 3166-1 alpha-2 country codes. + intensity_df["alpha-2"] = intensity_df["Entity"].apply(country_to_alpha2) + + # Drop rows with None or NaN. + intensity_df.dropna(subset=["alpha-2"], inplace=True) + + # Only keep most recent year from every country. + # Note that no country shares alpha-2 code, so should not lose any info. + intensity_df = intensity_df.groupby("alpha-2").first() + + # Save to csv. + intensity_df.to_csv(args.output_csv) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_csv", help="Path to input csv.", required=True + ) + parser.add_argument( + "--output_csv", help="Path to output csv.", required=True + ) + args = parser.parse_args() + main(args) diff --git a/setup.py b/setup.py deleted file mode 100644 index a8b6867..0000000 --- a/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -import os - -from setuptools import setup, find_packages - -here = os.path.abspath(os.path.dirname(__file__)) - -with open("README.md") as f: - readme = f.read() - -about = {} -with open(os.path.join(here, "carbontracker", "__version__.py")) as f: - exec(f.read(), about) - -setup(name=about["__title__"], - version=about["__version__"], - description=about["__description__"], - long_description=readme, - long_description_content_type='text/markdown', - author=about["__author__"], - url=about["__url__"], - license=about["__license__"], - packages=find_packages(exclude=('tests', 'docs')), - include_package_data=True, - install_requires=[ - "geocoder", - "numpy", - "pandas", - "requests", - "pynvml", - "psutil", - ], - python_requires=">=3.6")