diff --git a/.gitignore b/.gitignore index 9954c405..c2058d0a 100644 --- a/.gitignore +++ b/.gitignore @@ -119,6 +119,5 @@ docs/generated/ #beobench beobench_results* notebooks/archive -*beo.yaml -*beo.yml +.beobench.yml perf_tests* \ No newline at end of file diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..6b0cdf09 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,22 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: >- + Beobench: A Toolkit for Unified Access to Building + Simulations for Reinforcement Learning +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software +version: 0.5.0 +url: https://github.com/rdnfn/beobench +authors: + - given-names: Arduin + family-names: Findeis + - given-names: Fiodar + family-names: Kazhamiaka + - given-names: Scott + family-names: Jeen + - given-names: Srinivasan + family-names: Keshav diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 123a555b..6d948477 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -42,6 +42,13 @@ beobench could always use more documentation, whether as part of the official beobench docs, in docstrings, or even on the web in blog posts, articles, and such. +To update the API docs, use the following command inside the ``/docs`` directory: + +.. code-block:: + + sphinx-apidoc -f -o . .. + + Submit Feedback ~~~~~~~~~~~~~~~ diff --git a/HISTORY.rst b/HISTORY.rst index ceb4dbe8..4b3300f0 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,29 @@ History ======= +0.5.0 (2022-05-26) +------------------ + +* Features: + + * Mean and cummulative metrics can now be logged by WandbLogger wrapper. + * Support for automatically running multiple samples/trials of same experiment via ``num_samples`` config parameter. + * Configs named `.beobench.yml` will be automatically parsed when Beobench is run in directory containing such a config. This allows users to set e.g. wandb API keys without referring to the config in every Beobench command call. + * Configs from experiments now specify the Beobench version used. When trying to rerun an experiment this version will be checked, and an error thrown if there is a mismatch between installed and requested version. + * Add improved high-level API for getting started. This uses the CLI arguments ``--method``, ``--gym`` and ``--env``. Example usage: ``beobench run --method ppo --gym sinergym --env Eplus-5Zone-hot-continuous-v1``. + +* Improvements + + * Add ``CITATION.cff`` file to citing software easier. + * By default, docker builds of experiment images are now skipped if an image with tag corresponding to installed Beobench version already exists. + * Remove outdated guides and add yaml configuration description from docs. + * Add support for logging multidimensional actions to wandb. + * Add support for logging summary metrics on every env reset to wandb. + +* Fixes + + * Updated BOPTEST integration to work with current version of Beobench. + 0.4.4 (2022-05-09) ------------------ diff --git a/PYPI_README.rst b/PYPI_README.rst index 923dd5b0..84da9b99 100644 --- a/PYPI_README.rst +++ b/PYPI_README.rst @@ -1,3 +1,5 @@ -A toolbox for benchmarking reinforcement learning (RL) algorithms on building energy optimisation (BEO) problems. Beobench tries to make working on RL for BEO easier: it provides simple access to existing libraries defining BEO problems (such as `BOPTEST `_) and provides a large set of pre-configured RL algorithms. Beobench is *not* a gym library itself - instead it leverages the brilliant work done by many existing gym-type projects and makes their work more easily accessible. +A toolkit providing easy and unified access to building control environments for reinforcement learning (RL). Compared to other domains, `RL environments for building control `_ tend to be more difficult to install and handle. Most environments require the user to either manually install a building simulator (e.g. `EnergyPlus `_) or to manually manage Docker containers. This can be tedious. + +Beobench was created to make building control environments easier to use and experiments more reproducible. Beobench uses Docker to manage all environment dependencies in the background so that the user doesn't have to. A standardised API allows the user to easily configure experiments and evaluate new RL agents on building control environments. For more information go to the `documentation `_ and the `GitHub code repository `_. \ No newline at end of file diff --git a/README.rst b/README.rst index db03371e..b929d580 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ :target: https://opensource.org/licenses/MIT :alt: License -A toolkit providing easy and unified access to building control environments for reinforcement learning (RL). Compared to other domains, `RL environments for building control `_ tend to be more difficult to install and handle. Most environments require the user to either manually install a building simulator (e.g. `EnergyPlus `_) or to manually manage Docker containers. This is tedious. +A toolkit providing easy and unified access to building control environments for reinforcement learning (RL). Compared to other domains, `RL environments for building control `_ tend to be more difficult to install and handle. Most environments require the user to either manually install a building simulator (e.g. `EnergyPlus `_) or to manually manage Docker containers. This can be tedious. Beobench was created to make building control environments easier to use and experiments more reproducible. Beobench uses Docker to manage all environment dependencies in the background so that the user doesn't have to. A standardised API, illustrated in the figure below, allows the user to easily configure experiments and evaluate new RL agents on building control environments. @@ -66,7 +66,7 @@ Installation ------------ 1. `Install docker `_ on your machine (if on Linux, check the `additional installation steps `_) -2. Install *beobench* using: +2. Install Beobench using: .. code-block:: console @@ -94,9 +94,23 @@ Experiment configuration To get started with our first experiment, we set up an *experiment configuration*. Experiment configurations -can be given as a yaml file or a Python dictionary. Such a configuration +can be given as a yaml file or a Python dictionary. The configuration fully defines an experiment, configuring everything -from the RL agent to the environment and its wrappers. +from the RL agent to the environment and its wrappers. The figure below illustrates the config structure. + +.. raw:: html + +

+ +.. image:: https://github.com/rdnfn/beobench/raw/2cf961a8135b25c9a66e70d67eea9890ce0b878a/docs/_static/beobench_config_v1.png + :align: center + :width: 350 px + :alt: Beobench + +.. raw:: html + +

+ Let's look at a concrete example. Consider this ``config.yaml`` file: @@ -174,7 +188,7 @@ Execution .. end-qs-sec3 -Given the configuration and agent script above, we can run the experiment using either via the command line: +Given the configuration and agent script above, we can run the experiment either via the command line: .. code-block:: console diff --git a/beobench/__init__.py b/beobench/__init__.py index 4998ceca..07ea095f 100644 --- a/beobench/__init__.py +++ b/beobench/__init__.py @@ -2,7 +2,7 @@ __author__ = """Beobench authors""" __email__ = "-" -__version__ = "0.4.4" +__version__ = "0.5.0" from beobench.utils import restart from beobench.experiment.scheduler import run diff --git a/beobench/beobench_contrib b/beobench/beobench_contrib index 533f3b02..172d5622 160000 --- a/beobench/beobench_contrib +++ b/beobench/beobench_contrib @@ -1 +1 @@ -Subproject commit 533f3b02bd6ca4e612d60bf898ed45d02d40d49c +Subproject commit 172d5622e17db61cfc3d7c9a9fe5002120b57de6 diff --git a/beobench/cli.py b/beobench/cli.py index 77506738..44218686 100644 --- a/beobench/cli.py +++ b/beobench/cli.py @@ -25,6 +25,11 @@ def cli(): default=None, help="Name of RL method to use in experiment.", ) +@click.option( + "--gym", + default=None, + help="Name of gym framework to use in experiment.", +) @click.option( "--env", default=None, @@ -82,9 +87,15 @@ def cli(): default=None, help="For developer use only: location of custom beobench package version.", ) +@click.option( + "--force-build", + is_flag=True, + help="whether to force a re-build, even if image already exists.", +) def run( config: str, method: str, + gym: str, env: str, local_dir: str, wandb_project: str, @@ -96,6 +107,7 @@ def run( no_additional_container: bool, use_no_cache: bool, dev_path: str, + force_build: bool, ) -> None: """Run beobench experiment from command line. @@ -110,6 +122,7 @@ def run( beobench.experiment.scheduler.run( config=list(config), method=method, + gym=gym, env=env, local_dir=local_dir, wandb_project=wandb_project, @@ -121,6 +134,7 @@ def run( no_additional_container=no_additional_container, use_no_cache=use_no_cache, dev_path=dev_path, + force_build=force_build, ) diff --git a/beobench/constants.py b/beobench/constants.py index 2fa32b28..4d53ffa0 100644 --- a/beobench/constants.py +++ b/beobench/constants.py @@ -2,6 +2,8 @@ import pathlib +USER_CONFIG_PATH = pathlib.Path("./.beobench.yml") + # available gym-framework integrations AVAILABLE_INTEGRATIONS = [ "boptest", diff --git a/beobench/data/agents/random_action.py b/beobench/data/agents/random_action.py index 6e2befd0..7dea0e7c 100644 --- a/beobench/data/agents/random_action.py +++ b/beobench/data/agents/random_action.py @@ -28,10 +28,24 @@ except KeyError: horizon = 1000 +try: + imitate_rllib_env_checks = config["agent"]["config"]["imitate_rllib_env_checks"] +except KeyError: + imitate_rllib_env_checks = False + print("Random agent: starting test.") env = create_env() + +if imitate_rllib_env_checks: + # RLlib appears to reset and take single action in env + # this may be to check compliance of env with space etc. + env.reset() + action = env.action_space.sample() + _, _, _, _ = env.step(action) + + observation = env.reset() num_steps_per_ep = 0 diff --git a/beobench/data/configs/rewex01.yaml b/beobench/data/configs/archive/rewex01.yaml similarity index 100% rename from beobench/data/configs/rewex01.yaml rename to beobench/data/configs/archive/rewex01.yaml diff --git a/beobench/data/configs/rewex01_grid.yaml b/beobench/data/configs/archive/rewex01_grid.yaml similarity index 100% rename from beobench/data/configs/rewex01_grid.yaml rename to beobench/data/configs/archive/rewex01_grid.yaml diff --git a/beobench/data/configs/rewex01_grid02.yaml b/beobench/data/configs/archive/rewex01_grid02.yaml similarity index 100% rename from beobench/data/configs/rewex01_grid02.yaml rename to beobench/data/configs/archive/rewex01_grid02.yaml diff --git a/beobench/data/configs/rewex02.yaml b/beobench/data/configs/archive/rewex02.yaml similarity index 100% rename from beobench/data/configs/rewex02.yaml rename to beobench/data/configs/archive/rewex02.yaml diff --git a/beobench/data/configs/baselines/boptest_arroyo2022_dqn.yaml b/beobench/data/configs/baselines/boptest_arroyo2022_dqn.yaml new file mode 100644 index 00000000..092c3a28 --- /dev/null +++ b/beobench/data/configs/baselines/boptest_arroyo2022_dqn.yaml @@ -0,0 +1,72 @@ +# A first attempt at reproduction of experiments in the following paper by Arroyo et al. +# https://lirias.kuleuven.be/retrieve/658452 +# +# Some of the descriptions of RLlib config values are taken from +# https://docs.ray.io/en/latest/rllib/rllib-training.html +# other from +# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py + +env: + gym: boptest + config: + name: bestest_hydronic_heat_pump + # whether to normalise the observations and actions + normalize: True + discretize: True + gym_kwargs: + actions: ["oveHeaPumY_u"] + # Dictionary mapping observation keys to a tuple with the lower + # and upper bound of each observation. Observation keys must + # belong either to the set of measurements or to the set of + # forecasting variables of the BOPTEST test case. Contrary to + # the actions, the expected minimum and maximum values of the + # measurement and forecasting variables are not provided from + # the BOPTEST framework, although they are still relevant here + # e.g. for normalization or discretization. Therefore, these + # bounds need to be provided by the user. + # If `time` is included as an observation, the time in seconds + # will be passed to the agent. This is the remainder time from + # the beginning of the episode and for periods of the length + # specified in the upper bound of the time feature. + observations: + reaTZon_y: [280.0, 310.0] + # Set to True if desired to use a random start time for each episode + random_start_time: True + # Maximum duration of each episode in seconds + max_episode_length: 31536000 # one year in seconds + # Desired simulation period to initialize each episode + warmup_period: 10 + # Sampling time in seconds + step_period: 900 # = 15min +agent: + origin: rllib + config: + run_or_experiment: DQN + config: + lr: 0.0001 + gamma: 0.99 + # Number of steps after which the episode is forced to terminate. Defaults + # to `env.spec.max_episode_steps` (if present) for Gym envs. + horizon: 24 # one week 672 = 96 * 7 # other previous values: 96 # 10000 # + # Calculate rewards but don't reset the environment when the horizon is + # hit. This allows value estimation and RNN state to span across logical + # episodes denoted by horizon. This only has an effect if horizon != inf. + soft_horizon: True + num_workers: 1 # this is required, otherwise effectively assuming simulator. + # Training batch size, if applicable. Should be >= rollout_fragment_length. + # Samples batches will be concatenated together to a batch of this size, + # which is then passed to SGD. + train_batch_size: 24 + stop: + timesteps_total: 105120 # = 3 years # 35040 # = 365 * 96 (full year) +wrappers: + - origin: general + class: WandbLogger + config: + log_freq: 1 + summary_metric_keys: + - env.returns.reward +general: + wandb_project: boptest_arroyo2022_baseline + wandb_group: random_action + num_samples: 1 \ No newline at end of file diff --git a/beobench/data/configs/baselines/boptest_arroyo2022_random_agent.yaml b/beobench/data/configs/baselines/boptest_arroyo2022_random_agent.yaml new file mode 100644 index 00000000..da0b4891 --- /dev/null +++ b/beobench/data/configs/baselines/boptest_arroyo2022_random_agent.yaml @@ -0,0 +1,59 @@ +# A first attempt at reproduction of experiments in the following paper by Arroyo et al. +# https://lirias.kuleuven.be/retrieve/658452 +# +# Some of the descriptions of RLlib config values are taken from +# https://docs.ray.io/en/latest/rllib/rllib-training.html +# other from +# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py + +env: + gym: boptest + name: bestest_hydronic_heat_pump + config: + boptest_testcase: bestest_hydronic_heat_pump + # whether to normalise the observations and actions + normalize: True + gym_kwargs: + actions: ["oveHeaPumY_u"] + # Dictionary mapping observation keys to a tuple with the lower + # and upper bound of each observation. Observation keys must + # belong either to the set of measurements or to the set of + # forecasting variables of the BOPTEST test case. Contrary to + # the actions, the expected minimum and maximum values of the + # measurement and forecasting variables are not provided from + # the BOPTEST framework, although they are still relevant here + # e.g. for normalization or discretization. Therefore, these + # bounds need to be provided by the user. + # If `time` is included as an observation, the time in seconds + # will be passed to the agent. This is the remainder time from + # the beginning of the episode and for periods of the length + # specified in the upper bound of the time feature. + observations: + reaTZon_y: [280.0, 310.0] + # Set to True if desired to use a random start time for each episode + random_start_time: True + # Maximum duration of each episode in seconds + max_episode_length: 31536000 # one year in seconds + # Desired simulation period to initialize each episode + warmup_period: 10 + # Sampling time in seconds + step_period: 900 # = 15min +agent: + origin: random_action + config: + config: + horizon: 96 + stop: + timesteps_total: 10000 + imitate_rllib_env_checks: True +wrappers: + - origin: general + class: WandbLogger + config: + log_freq: 1 + summary_metric_keys: + - env.returns.reward +general: + wandb_project: boptest_arroyo2022_baseline + wandb_group: random_action + num_samples: 1 \ No newline at end of file diff --git a/beobench/data/configs/baselines/sinergym_figure4_dqn.yaml b/beobench/data/configs/baselines/sinergym_figure4_dqn.yaml new file mode 100644 index 00000000..882f4a19 --- /dev/null +++ b/beobench/data/configs/baselines/sinergym_figure4_dqn.yaml @@ -0,0 +1,51 @@ +# Reproduction of experiment shown in Figure 4 of Sinergym paper. +# The paper can be found at: https://dl.acm.org/doi/abs/10.1145/3486611.3488729 +# +# Some of the descriptions of RLlib config values are taken from +# https://docs.ray.io/en/latest/rllib/rllib-training.html +# +# From environment output we can deduce that the step size is 15 minutes +# (In january there are 2976 steps / 31 days -> 96 steps per day +# -> step size of 15 min) + +env: + gym: sinergym + name: Eplus-5Zone-hot-discrete-v1 + config: + name: Eplus-5Zone-hot-discrete-v1 + # whether to normalise the observations + normalize: True +agent: + origin: rllib + config: + run_or_experiment: DQN + config: + lr: 0.0001 + gamma: 0.99 + # Number of steps after which the episode is forced to terminate. Defaults + # to `env.spec.max_episode_steps` (if present) for Gym envs. + horizon: 24 # one week 672 = 96 * 7 # other previous values: 96 # 10000 # + # Calculate rewards but don't reset the environment when the horizon is + # hit. This allows value estimation and RNN state to span across logical + # episodes denoted by horizon. This only has an effect if horizon != inf. + soft_horizon: True + num_workers: 1 # this is required, otherwise effectively assuming simulator. + # Training batch size, if applicable. Should be >= rollout_fragment_length. + # Samples batches will be concatenated together to a batch of this size, + # which is then passed to SGD. + train_batch_size: 24 + stop: + timesteps_total: 105120 # = 3 years # 35040 # = 365 * 96 (full year) +wrappers: + - origin: general + class: WandbLogger + config: + log_freq: 1 + summary_metric_keys: + - env.returns.reward + - env.returns.info.total_power + - env.returns.info.comfort_penalty +general: + wandb_project: sinergym_fig4_baseline + num_samples: 5 + diff --git a/beobench/data/configs/baselines/sinergym_figure4_random_agent.yaml b/beobench/data/configs/baselines/sinergym_figure4_random_agent.yaml new file mode 100644 index 00000000..4e3cdd99 --- /dev/null +++ b/beobench/data/configs/baselines/sinergym_figure4_random_agent.yaml @@ -0,0 +1,39 @@ +# Reproduction of experiment shown in Figure 4 of Sinergym paper. +# The paper can be found at: https://dl.acm.org/doi/abs/10.1145/3486611.3488729 +# +# Some of the descriptions of RLlib config values are taken from +# https://docs.ray.io/en/latest/rllib/rllib-training.html +# +# From environment output we can deduce that the step size is 15 minutes +# (In january there are 2976 steps / 31 days -> 96 steps per day +# -> step size of 15 min) + +env: + gym: sinergym + name: Eplus-5Zone-hot-discrete-v1 + config: + name: Eplus-5Zone-hot-discrete-v1 + # whether to normalise the observations + normalize: True +agent: + origin: random_action + config: + config: + horizon: 96 + stop: + timesteps_total: 105120 # 35040 + imitate_rllib_env_checks: True +wrappers: + - origin: general + class: WandbLogger + config: + log_freq: 1 + summary_metric_keys: + - env.returns.reward + - env.returns.info.total_power + - env.returns.info.comfort_penalty +general: + wandb_project: sinergym_fig4_baseline + wandb_group: random_action + num_samples: 5 + diff --git a/beobench/data/configs/default.yaml b/beobench/data/configs/default.yaml index 851c1dba..30aab120 100644 --- a/beobench/data/configs/default.yaml +++ b/beobench/data/configs/default.yaml @@ -1,48 +1,112 @@ -# agent config +# Default Beobench configuration. +# +# Some of the descriptions of RLlib config +# values are taken from +# https://docs.ray.io/en/latest/rllib/rllib-training.html. +# +# Agent config agent: - origin: rllib # either path to agent script or name of agent library (rllib) - config: # given to ray.tune.run() as arguments (since rllib set before) + # Either path to agent script or name of built-in agent + # (e.g. `rllib`) + origin: rllib + # Config used by agent. In this case given to + # ray.tune.run() as arguments. + # (since rllib set before) + config: + # RLlib algorithm name run_or_experiment: PPO + # Stopping condition stop: timesteps_total: 400000 + # Config only used by ray.RLlib and not other ray modules. config: + # Learning rate lr: 0.005 + # Neural network model for PPO model: fcnet_activation: relu fcnet_hiddens: [256,256,256,256] post_fcnet_activation: tanh batch_mode: complete_episodes + # Discount factor gamma: 0.999 + # Number of steps after which the episode is forced to + # terminate. Defaults to `env.spec.max_episode_steps` + # (if present) for Gym envs. horizon: 1000 + # Calculate rewards but don't reset the environment when + # the horizon is hit. This allows value estimation and + # RNN state to span across logical episodes denoted by + # horizon. This only has an effect if horizon != inf. + soft_horizon: True + # Number of episodes over which RLlib internal metrics + # are smoothed metrics_smoothing_episodes: 5 + # Deep learning framework framework: torch - num_workers: 1 # this is required for energym to work (can fail silently otherwise) -# environment config + # Number of parallel workers interacting with + # environment for most gym frameworks this has to be + # set to 1 as often the background building simulations + # are not setup for parallel usage + # (can fail silently otherwise). + num_workers: 1 +# Environment config env: - name: MixedUseFanFCU-v0 + # Gym framework of environment gym: energym - config: - days: 365 - energym_environment: MixedUseFanFCU-v0 - gym_kwargs: - max_episode_length: 35040 - normalize: true - step_period: 15 - weather: GRC_A_Athens -# general config + # Environment name + name: MixedUseFanFCU-v0 + # Configuration passed to integration's `create_env()` + # function. Specific to whatever gym framework you use + # (in this case Energym). + config: null +# Wrappers (None added by default) wrappers: [] +# General Beobench config general: + # Directory to write experiment files to. This argument + # is equivalent to the `local_dir` argument in + # `tune.run()`. local_dir: ./beobench_results + # Name of wandb project. wandb_project: null + # Name of wandb entity. wandb_entity: null + # Name of wandb run group. wandb_group: null + # Wandb API key wandb_api_key: null + # Name of MLflow experiment mlflow_name: null + # Whether to use GPU from the host system. Requires that + # GPU is available. use_gpu: False + # Size of the shared memory available to the experiment + # container. docker_shm_size: 4gb + # Whether to force a re-build, even if image already + # exists. + force_build: False + # Whether to use cache IF building experiment container. + # This will not do anything if force_build is disabled, + # and image already exists. use_no_cache: False + # File or github path to beobench package. For + # developement purpose only. This will install a custom + # beobench version inside the experiment container. + # By default the latest PyPI version is installed. dev_path: null + # List of docker flags to be added to docker run command + # of Beobench experiment container. docker_flags: null + # Extra dependencies to install with beobench. Used + # during pip installation in experiment image, + # as in using the command: + # `pip install beobench[]` beobench_extras: "extended" - # Whether to log all episode data to wandb after running experiment - log_full_episode_data: False + # Number of experiment samples to run. This defaults + # to a single sample, i.e. just running the + # experiment once. + num_samples: 1 + # Beobench version + version: 0.5.0 diff --git a/beobench/data/configs/gym_boptest.yaml b/beobench/data/configs/gym_boptest.yaml new file mode 100644 index 00000000..65d57bc3 --- /dev/null +++ b/beobench/data/configs/gym_boptest.yaml @@ -0,0 +1,36 @@ +# BOPTEST default config +# Some of the descriptions taken from +# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py + +env: + gym: boptest + config: + name: bestest_hydronic_heat_pump + # whether to normalise the observations and actions + normalize: True + discretize: True + gym_kwargs: + actions: ["oveHeaPumY_u"] + # Dictionary mapping observation keys to a tuple with the lower + # and upper bound of each observation. Observation keys must + # belong either to the set of measurements or to the set of + # forecasting variables of the BOPTEST test case. Contrary to + # the actions, the expected minimum and maximum values of the + # measurement and forecasting variables are not provided from + # the BOPTEST framework, although they are still relevant here + # e.g. for normalization or discretization. Therefore, these + # bounds need to be provided by the user. + # If `time` is included as an observation, the time in seconds + # will be passed to the agent. This is the remainder time from + # the beginning of the episode and for periods of the length + # specified in the upper bound of the time feature. + observations: + reaTZon_y: [280.0, 310.0] + # Set to True if desired to use a random start time for each episode + random_start_time: True + # Maximum duration of each episode in seconds + max_episode_length: 31536000 # one year in seconds + # Desired simulation period to initialize each episode + warmup_period: 10 + # Sampling time in seconds + step_period: 900 # = 15min \ No newline at end of file diff --git a/beobench/data/configs/gym_energym.yaml b/beobench/data/configs/gym_energym.yaml new file mode 100644 index 00000000..3f14d494 --- /dev/null +++ b/beobench/data/configs/gym_energym.yaml @@ -0,0 +1,20 @@ +# Energym default config + +env: + gym: energym + config: + # Number of simulation days + days: 365 + # Name of energym environment + name: Apartments2Thermal-v0 + gym_kwargs: + # Maximum number of timesteps in one episode + max_episode_length: 35040 # corresponds to a year of 15 min steps + # Whether state and action spaces are normalized + normalize: true + # Number of real-world minutes between timesteps in building simulation + step_period: 15 + # Whether to allow a complete environment reset + ignore_reset: True + # Weather file to use for the scenario (possible files depend on env chosen) + weather: ESP_CT_Barcelona \ No newline at end of file diff --git a/beobench/data/configs/gym_sinergym.yaml b/beobench/data/configs/gym_sinergym.yaml new file mode 100644 index 00000000..ef6a51c9 --- /dev/null +++ b/beobench/data/configs/gym_sinergym.yaml @@ -0,0 +1,9 @@ +# Sinergym default config + +env: + gym: sinergym + config: + # Sinergym env name + name: Eplus-5Zone-hot-continuous-v1 + # whether to normalise the observations + normalize: True \ No newline at end of file diff --git a/beobench/data/configs/ppo.yaml b/beobench/data/configs/method_ppo.yaml similarity index 94% rename from beobench/data/configs/ppo.yaml rename to beobench/data/configs/method_ppo.yaml index 1808820c..c133e571 100644 --- a/beobench/data/configs/ppo.yaml +++ b/beobench/data/configs/method_ppo.yaml @@ -8,8 +8,10 @@ agent: config: # given to ray.tune.run() as arguments (since rllib set before) run_or_experiment: PPO log_to_file: True + stop: + timesteps_total: 35040 # 1 year w 15 min timesteps config: - lr: 0.0005 + lr: 0.005 model: fcnet_activation: relu fcnet_hiddens: [256,256,256,256] diff --git a/beobench/data/configs/random_action.yaml b/beobench/data/configs/method_random_action.yaml similarity index 78% rename from beobench/data/configs/random_action.yaml rename to beobench/data/configs/method_random_action.yaml index 7a2887ff..c6c87b93 100644 --- a/beobench/data/configs/random_action.yaml +++ b/beobench/data/configs/method_random_action.yaml @@ -3,6 +3,8 @@ agent: origin: random_action config: + stop: + timesteps_total: 35040 config: horizon: 480 general: diff --git a/beobench/data/configs/sinergym.yaml b/beobench/data/configs/sinergym.yaml deleted file mode 100644 index f83e6c50..00000000 --- a/beobench/data/configs/sinergym.yaml +++ /dev/null @@ -1,6 +0,0 @@ -env: - name: Eplus-5Zone-hot-continuous-v1 - gym: sinergym - config: - name: Eplus-5Zone-hot-continuous-v1 - normalize: True \ No newline at end of file diff --git a/beobench/data/configs/simple.yaml b/beobench/data/configs/test_energym.yaml similarity index 91% rename from beobench/data/configs/simple.yaml rename to beobench/data/configs/test_energym.yaml index 9c92814b..df266bc2 100644 --- a/beobench/data/configs/simple.yaml +++ b/beobench/data/configs/test_energym.yaml @@ -11,13 +11,12 @@ agent: # environment config env: - name: Apartments2Thermal-v0 gym: energym config: # Number of simulation days days: 365 # Name of energym environment - energym_environment: Apartments2Thermal-v0 + name: Apartments2Thermal-v0 gym_kwargs: # Maximum number of timesteps in one episode max_episode_length: 35040 # corresponds to a year of 15 min steps diff --git a/beobench/experiment/config_parser.py b/beobench/experiment/config_parser.py index fef44a24..92672c11 100644 --- a/beobench/experiment/config_parser.py +++ b/beobench/experiment/config_parser.py @@ -6,9 +6,14 @@ import yaml import ast import sys +import random +import os +import beobench import beobench.utils +from beobench.constants import USER_CONFIG_PATH + # To enable compatiblity with Python<=3.8 (e.g. for sinergym dockerfile) if sys.version_info[1] >= 9: import importlib.resources @@ -116,7 +121,7 @@ def get_standard_config(name: str) -> dict: def get_default() -> dict: - """Get default beobench config + """Get default beobench config. Returns: dict: default beobench config dict @@ -125,23 +130,97 @@ def get_default() -> dict: return get_standard_config("default") -def get_agent(name: str) -> pathlib.Path: +def get_user() -> dict: + """Get user beobench config. + + Returns: + dict: default beobench config dict + """ + + if os.path.isfile(USER_CONFIG_PATH): + print(f"Beobench: recognised user config at '{USER_CONFIG_PATH}'.") + user_config = parse(USER_CONFIG_PATH) + else: + user_config = {} + + return user_config + + +def add_default_and_user_configs(config: dict) -> dict: + """Add default and user configs to existing beobench config. - """Get standard beobench agent from beobench.data.agents. + Args: + config (dict): beobench config Returns: - Path: path/traversible to agent. + dict: merged dict of given config, and user and default configs. """ - agents_path = importlib.resources.files("beobench.data.agents") - agent_path = agents_path.joinpath(f"{name}.py") + default_config = get_default() + user_config = get_user() + user_default_config = beobench.utils.merge_dicts( + a=default_config, b=user_config, let_b_overrule_a=True + ) + config = beobench.utils.merge_dicts( + a=user_default_config, b=config, let_b_overrule_a=True + ) - return agent_path + return config def get_autogen_config() -> dict: - run_id = uuid.uuid4().hex + """Get automatically generated parts of a Beobench configuration.""" + + config = { + "autogen": { + "run_id": uuid.uuid4().hex, + "random_seed": random.randint(1, 10000000), + }, + } + + return config - config = {"autogen": {"run_id": run_id}} + +def check_config(config: dict) -> None: + """Check if config is valid. + + Args: + config (dict): Beobench config. + """ + requested_version = config["general"]["version"] + if requested_version != beobench.__version__: + raise ValueError( + f"Beobench config requests version {requested_version}" + f" that does not match installed version {beobench.__version__}. " + "Change the installed Beobench version to the requested version " + f"{requested_version} or remove general.version parameter from config " + "to prevent this error." + ) + + +def get_high_level_config(method: str, gym: str, env: str) -> dict: + """Get config from agent, gym and env params. + + Args: + method (str): name of method + gym (str): name of gym + env (str): name of environment. + + Returns: + dict: Beobench configuration. + """ + + config = {} + if method is not None: + config = beobench.utils.merge_dicts( + config, + get_standard_config(f"method_{method}"), + ) + if gym is not None and env is not None: + config = beobench.utils.merge_dicts( + config, + get_standard_config(f"gym_{gym}"), + ) + config["env"]["name"] = env return config diff --git a/beobench/experiment/containers.py b/beobench/experiment/containers.py index 7d891ec0..88d86640 100644 --- a/beobench/experiment/containers.py +++ b/beobench/experiment/containers.py @@ -3,7 +3,9 @@ import contextlib import subprocess import os +import docker +import beobench from beobench.constants import AVAILABLE_INTEGRATIONS # To enable compatiblity with Python<=3.6 (e.g. for sinergym dockerfile) @@ -16,12 +18,22 @@ importlib.resources = importlib_resources +def check_image_exists(image: str): + client = docker.from_env() + + try: + client.images.get(image) + return True + except docker.errors.ImageNotFound: + return False + + def build_experiment_container( build_context: str, use_no_cache: bool = False, - version: str = "latest", beobench_package: str = "beobench", beobench_extras: str = "extended", + force_build: bool = False, ) -> None: """Build experiment container from beobench/integrations/boptest/Dockerfile. @@ -31,11 +43,14 @@ def build_experiment_container( of existing beobench integration (e.g. `boptest`). See the official docs https://docs.docker.com/engine/reference/commandline/build/ for more info. use_no_cache (bool, optional): wether to use cache in build. Defaults to False. - version (str, optional): version to add to container tag. Defaults to "latest". beobench_extras (str, optional): which beobench extra dependencies to install As in `pip install beobench[extras]`. Defaults to "extended". + force_build (bool, optional): whether to force a re-build, even if + image already exists. """ + version = beobench.__version__ + # Flags are shared between gym image build and gym_and_beobench image build flags = [] @@ -59,19 +74,27 @@ def build_experiment_container( ) package_build_context = True - print( - ( - f"Recognised integration named {gym_name}: using build" - f" context {build_context}" - ) - ) + print(f"Beobench: recognised integration named {gym_name}.") else: # get alphanumeric name from context context_name = "".join(e for e in build_context if e.isalnum()) image_name = f"beobench_custom_{context_name}" package_build_context = False + # Create tags of different image stages stage0_image_tag = f"{image_name}_base:{version}" + stage1_image_tag = f"{image_name}_intermediate:{version}" + stage2_image_tag = f"{image_name}_complete:{version}" + + # skip build if image already exists. + if not force_build and check_image_exists(stage2_image_tag): + print(f"Beobench: existing image found ({stage2_image_tag}). Skipping build.") + return stage2_image_tag + + print( + f"Beobench: image not found ({stage2_image_tag}) or forced", + "rebuild. Building image.", + ) print(f"Building experiment base image `{stage0_image_tag}`...") @@ -99,7 +122,6 @@ def build_experiment_container( # Part 2: build stage 1 (intermediate) experiment image # This includes installation of beobench in experiment image - stage1_image_tag = f"{image_name}_intermediate:{version}" stage1_dockerfile = str( importlib.resources.files("beobench.data.dockerfiles").joinpath( "Dockerfile.experiment" @@ -148,7 +170,6 @@ def build_experiment_container( build_context = beobench_package # need to add std-in-dockerfile via -f flag and not context directly stage2_docker_flags = ["-f", "-"] - stage2_image_tag = f"{image_name}_complete:{version}" stage2_build_args = [ *build_commands, diff --git a/beobench/experiment/scheduler.py b/beobench/experiment/scheduler.py index 1612a73a..c198b0d9 100644 --- a/beobench/experiment/scheduler.py +++ b/beobench/experiment/scheduler.py @@ -7,6 +7,7 @@ import subprocess import pathlib import yaml +import copy import contextlib from typing import Union @@ -28,6 +29,7 @@ def run( config: Union[str, dict, pathlib.Path, list] = None, method: str = None, + gym: str = None, env: str = None, local_dir: str = None, wandb_project: str = None, @@ -42,6 +44,8 @@ def run( no_additional_container: bool = False, docker_flags: list[str] = None, beobench_extras: str = None, + force_build: str = False, + num_samples: int = None, ) -> None: """Run experiment. @@ -71,12 +75,13 @@ def run( docker_shm_size(str, optional): size of the shared memory available to the container. Defaults to None." use_no_cache (bool, optional): whether to use cache to build experiment - container. Defaults to False. + container. Defaults to False. This will not do anything if force_build is + disabled, and image already exists. dev_path (str, optional): file or github path to beobench package. For developement purpose only. This will install a custom beobench version inside the experiment container. By default the latest PyPI version is installed. - no_additional_container (bool, optional): wether not to start another container + no_additional_container (bool, optional): whether not to start another container to run experiments in. Defaults to False, which means that another container is started to run experiments in. docker_flags (list[str], optional): list of docker flags to be added to @@ -84,6 +89,10 @@ def run( beobench_extras (str, optional): extra dependencies to install with beobench. Used during pip installation in experiment image, as in using the command: `pip install beobench[]` + force_build (bool, optional): whether to force a re-build, even if + image already exists. + num_samples (int, optional): number of experiment samples to run. This defaults + to a single sample, i.e. just running the experiment once. """ print("Beobench: starting experiment run ...") # parsing relevant kwargs and adding them to config @@ -100,183 +109,201 @@ def run( dev_path=dev_path, docker_flags=docker_flags, beobench_extras=beobench_extras, + force_build=force_build, + num_samples=num_samples, ) # parse combined config config = beobench.experiment.config_parser.parse([config, kwarg_config]) - # adding any defaults that haven't been set by user given config - default_config = beobench.experiment.config_parser.get_default() - config = beobench.utils.merge_dicts( - a=default_config, b=config, let_b_overrule_a=True + high_level_config = beobench.experiment.config_parser.get_high_level_config( + method=method, gym=gym, env=env ) - - autogen_config = beobench.experiment.config_parser.get_autogen_config() config = beobench.utils.merge_dicts( - a=autogen_config, b=config, let_b_overrule_a=True + config, high_level_config, let_b_overrule_a=True ) - # select agent script - if config["agent"]["origin"] in AVAILABLE_AGENTS: - agent_file = beobench.experiment.config_parser.get_agent( - config["agent"]["origin"] - ) - package_agent = True - else: - agent_file = pathlib.Path(config["agent"]["origin"]) - package_agent = False - - # TODO add parsing of high level API arguments env and agent - if env or method: - raise ValueError( + # adding any defaults that haven't been set by given config + # The configs can be conflicting: + # config overrules user_config which overrules default_config. + config = beobench.experiment.config_parser.add_default_and_user_configs(config) + beobench.experiment.config_parser.check_config(config) + + # running experiment num_samples times + num_samples = config["general"]["num_samples"] + for i in range(1, num_samples + 1): + # TODO: enable checking whether something is run in container + # and do not print the statement below if inside experiment container. + print( ( - "This functionality has been deprecated. Directly configure" - " the environment or method in the config argument." + f"Beobench: running experiment in container with environment " + f"{config['env']['name']}" + f" and agent from {config['agent']['origin']}. Sample {i} of" + f" {num_samples}." ) ) - print( - ( - f"Beobench: running experiment with environment {config['env']['name']}" - f" and agent from {config['agent']['origin']}." + autogen_config = beobench.experiment.config_parser.get_autogen_config() + config = beobench.utils.merge_dicts( + a=config, b=autogen_config, let_b_overrule_a=True ) - ) - if no_additional_container: - # Execute experiment - # (this is usually reached from inside an experiment container) + if no_additional_container: + # Execute experiment + # (this is usually reached from inside an experiment container) - container_ro_dir_abs = CONTAINER_RO_DIR.absolute() - args = ["python", str(container_ro_dir_abs / agent_file.name)] - subprocess.check_call(args) + container_ro_dir_abs = CONTAINER_RO_DIR.absolute() + args = [ + "python", + str(container_ro_dir_abs / _get_agent_file(config)[0].name), + ] + subprocess.check_call(args) - else: - # build and run experiments in docker container - - ### part 1: build docker images - # Ensure local_dir exists, and create otherwise - local_dir_path = pathlib.Path(config["general"]["local_dir"]) - local_dir_path.mkdir(parents=True, exist_ok=True) - local_dir_path_abs = local_dir_path.absolute() - container_data_dir_abs = CONTAINER_DATA_DIR.absolute() - - if ( - config["general"]["beobench_extras"] == "extended" - and config["agent"]["origin"] == "rllib" - ): - beobench_extras = "extended,rllib" else: - beobench_extras = config["general"]["beobench_extras"] + # First build container image and then execute experiment inside container + # But only run one experiment per container. + config["general"]["num_samples"] = 1 + _build_and_run_in_container(config) - image_tag = beobench.experiment.containers.build_experiment_container( - build_context=config["env"]["gym"], - use_no_cache=config["general"]["use_no_cache"], - beobench_extras=beobench_extras, - beobench_package=config["general"]["dev_path"], - ) - ### part 2: create args and run command in docker container - if config["general"]["docker_flags"] is None: - docker_flags = [] - else: - docker_flags = config["general"]["docker_flags"] +def _build_and_run_in_container(config: dict) -> None: + """Build container image and run experiment in docker container. - # if no wandb API key is given try to get it from env - if config["general"]["wandb_api_key"] is None: - # this will return "" if env var not set - wandb_api_key = os.getenv("WANDB_API_KEY", "") - else: - wandb_api_key = config["general"]["wandb_api_key"] - - # We don't want the key to be logged in wandb - del config["general"]["wandb_api_key"] - - # Save config to local dir and add mount flag for config - config_path = local_dir_path / "tmp" / "config.yaml" - config_path.parent.mkdir(parents=True, exist_ok=True) - config_path_abs = config_path.absolute() - config_container_path_abs = (CONTAINER_RO_DIR / "config.yaml").absolute() - with open(config_path, "w", encoding="utf-8") as conf_file: - yaml.dump(config, conf_file) - docker_flags += [ - "-v", - f"{config_path_abs}:{config_container_path_abs}:ro", - ] + Args: + config (dict): Beobench configuration. + """ - # setup container name with unique identifier - unique_id = uuid.uuid4().hex[:6] - container_name = f"auto_beobench_experiment_{unique_id}" + # We need to deepcopy the config as some sensitive data (API keys) is deleted at + # some point below. This can otherwise cause problems when running multiple + # samples of the same experiment. + config = copy.deepcopy(config) + + ### part 1: build docker images + # Ensure local_dir exists, and create otherwise + local_dir_path = pathlib.Path(config["general"]["local_dir"]) + local_dir_path.mkdir(parents=True, exist_ok=True) + local_dir_path_abs = local_dir_path.absolute() + container_data_dir_abs = CONTAINER_DATA_DIR.absolute() + + if ( + config["general"]["beobench_extras"] == "extended" + and config["agent"]["origin"] == "rllib" + ): + beobench_extras = "extended,rllib" + else: + beobench_extras = config["general"]["beobench_extras"] - # enable docker-from-docker access only for built-in boptest integration. - if config["env"]["gym"] == "boptest": + image_tag = beobench.experiment.containers.build_experiment_container( + build_context=config["env"]["gym"], + use_no_cache=config["general"]["use_no_cache"], + beobench_extras=beobench_extras, + beobench_package=config["general"]["dev_path"], + force_build=config["general"]["force_build"], + ) - # Create docker network (only useful if starting other containers) - beobench.experiment.containers.create_docker_network("beobench-net") + ### part 2: create args and run command in docker container + if config["general"]["docker_flags"] is None: + docker_flags = [] + else: + docker_flags = config["general"]["docker_flags"] - docker_flags += [ - # enable access to docker-from-docker - "-v", - "/var/run/docker.sock:/var/run/docker.sock", - # network allows access to BOPTEST API in other containers - "--network", - "beobench-net", - ] + # if no wandb API key is given try to get it from env + if config["general"]["wandb_api_key"] is None: + # this will return "" if env var not set + wandb_api_key = os.getenv("WANDB_API_KEY", "") + else: + wandb_api_key = config["general"]["wandb_api_key"] - # enabling GPU access in docker container - if config["general"]["use_gpu"]: - docker_flags += [ - # add all available GPUs - "--gpus=all", - ] + # We don't want the key to be logged in wandb + del config["general"]["wandb_api_key"] - # define flags for beobench scheduler call inside experiment container - beobench_flags = [] - beobench_flags.append(f'--config="{config}"') - beobench_flag_str = " ".join(beobench_flags) - - with contextlib.ExitStack() as stack: - # if using package agent, get (potentially temp.) agent file path - if package_agent: - agent_file = stack.enter_context( - importlib.resources.as_file(agent_file) - ) - # load agent file - ag_file_abs = agent_file.absolute() - ag_file_on_docker_abs = (CONTAINER_RO_DIR / agent_file.name).absolute() - docker_flags += [ - "-v", - f"{ag_file_abs}:{ag_file_on_docker_abs}:ro", - ] + # Save config to local dir and add mount flag for config + config_path = local_dir_path / "tmp" / "config.yaml" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path_abs = config_path.absolute() + config_container_path_abs = (CONTAINER_RO_DIR / "config.yaml").absolute() + with open(config_path, "w", encoding="utf-8") as conf_file: + yaml.dump(config, conf_file) + docker_flags += [ + "-v", + f"{config_path_abs}:{config_container_path_abs}:ro", + ] - args = [ - "docker", - "run", - # mount experiment data dir - "-v", - f"{local_dir_path_abs}:{container_data_dir_abs}", - # automatically remove container when stopped/exited - "--rm", - # add more memory - f"--shm-size={config['general']['docker_shm_size']}", - "--name", - container_name, - *docker_flags, - image_tag, - "/bin/bash", - "-c", - ( - f"export WANDB_API_KEY={wandb_api_key} && " - f"beobench run {beobench_flag_str} " - "--no-additional-container && bash" - ), - ] + # setup container name with unique identifier + unique_id = uuid.uuid4().hex[:6] + container_name = f"auto_beobench_experiment_{unique_id}" - arg_str = " ".join(args) - if wandb_api_key: - arg_str = arg_str.replace(wandb_api_key, "") - print(f"Executing docker command: {arg_str}") + # enable docker-from-docker access only for built-in boptest integration. + if config["env"]["gym"] == "boptest": - subprocess.check_call(args) + # Create docker network (only useful if starting other containers) + beobench.experiment.containers.create_docker_network("beobench-net") + + docker_flags += [ + # enable access to docker-from-docker + "-v", + "/var/run/docker.sock:/var/run/docker.sock", + # network allows access to BOPTEST API in other containers + "--network", + "beobench-net", + ] + + # enabling GPU access in docker container + if config["general"]["use_gpu"]: + docker_flags += [ + # add all available GPUs + "--gpus=all", + ] + + # define flags for beobench scheduler call inside experiment container + beobench_flags = [] + beobench_flags.append(f'--config="{config}"') + beobench_flag_str = " ".join(beobench_flags) + + with contextlib.ExitStack() as stack: + # get agent file path + agent_file, uses_importlib = _get_agent_file(config) + # if using package built-in agent, then make sure agent file path exists + # by entering context (because it's potentially temp.). + if uses_importlib: + agent_file = stack.enter_context(importlib.resources.as_file(agent_file)) + # load agent file + ag_file_abs = agent_file.absolute() + ag_file_on_docker_abs = (CONTAINER_RO_DIR / agent_file.name).absolute() + docker_flags += [ + "-v", + f"{ag_file_abs}:{ag_file_on_docker_abs}:ro", + ] + + args = [ + "docker", + "run", + # mount experiment data dir + "-v", + f"{local_dir_path_abs}:{container_data_dir_abs}", + # automatically remove container when stopped/exited + "--rm", + # add more memory + f"--shm-size={config['general']['docker_shm_size']}", + "--name", + container_name, + *docker_flags, + image_tag, + "/bin/bash", + "-c", + ( + f"export WANDB_API_KEY={wandb_api_key} && " + f"beobench run {beobench_flag_str} " + "--no-additional-container && bash" + ), + ] + + arg_str = " ".join(args) + if wandb_api_key: + arg_str = arg_str.replace(wandb_api_key, "") + print(f"Executing docker command: {arg_str}") + + subprocess.check_call(args) def _create_config_from_kwargs(**kwargs) -> dict: @@ -290,3 +317,25 @@ def _create_config_from_kwargs(**kwargs) -> dict: if value: config["general"][key] = value return config + + +def _get_agent_file(config: dict) -> tuple: + """Get agent file path based on config. + + Args: + config (dict): Beobench config. + + Returns: + tuple: path or traversible of agent file, and whether agent is accessed via + importlib. + """ + + if config["agent"]["origin"] in AVAILABLE_AGENTS: + agents_path = importlib.resources.files("beobench.data.agents") + agent_file = agents_path.joinpath(f"{config['agent']['origin']}.py") + uses_importlib = True + else: + agent_file = pathlib.Path(config["agent"]["origin"]) + uses_importlib = False + + return agent_file, uses_importlib diff --git a/beobench/wrappers/general.py b/beobench/wrappers/general.py index a52ec021..6517cdc1 100644 --- a/beobench/wrappers/general.py +++ b/beobench/wrappers/general.py @@ -4,6 +4,7 @@ import gym.spaces import warnings import wandb +import numpy as np from beobench.experiment.provider import config @@ -86,15 +87,29 @@ def reset(self, **kwargs): class WandbLogger(gym.Wrapper): """Wrapper to log all env data for every xth step.""" - def __init__(self, env: gym.Env, log_freq: int = 1): + def __init__( + self, + env: gym.Env, + log_freq: int = 1, + summary_metric_keys: list = None, + restart_sum_metrics_at_reset: bool = False, + ): """Wrapper to log all env data for every xth step. Args: env (gym.Env): environment to wrap. log_freq (int, optional): how often to log the step() method. E.g. for 2 every second step is logged. Defaults to 1. + summary_metric_keys (list, optional): list of keys of logged metrics for + summary metrics such as cummulative sum and mean are computed. This + defaults to ["env.returns.reward"]. WARNING: summary metrics only + work for log_feq == 1, otherwise the cummulative metrics will not be + correct. """ + if summary_metric_keys is None: + summary_metric_keys = ["env.returns.reward"] + super().__init__(env) wandb.init( id=config["autogen"]["run_id"], @@ -104,6 +119,10 @@ def __init__(self, env: gym.Env, log_freq: int = 1): ) self.log_freq = log_freq self.total_env_steps = 0 + self.restart_sum_metrics_at_reset = restart_sum_metrics_at_reset + self.cum_metrics = {key: 0 for key in summary_metric_keys} + self.num_env_resets = 0 + self.last_log_dict = {} def step(self, action): obs, reward, done, info = self.env.step(action) @@ -111,16 +130,51 @@ def step(self, action): self.total_env_steps += 1 if self.total_env_steps % self.log_freq == 0: + # TODO: enable flat dict for dict action and observation spaces + # This would allow for summary values of invididual actions/observations + if isinstance(action, (list, np.ndarray)): + for i, act in enumerate(action): + new_action = {f"{i}": act} + action = new_action + log_dict = { - "env": { - "action": action, - "obs": obs, - "reward": reward, - "done": done, - "info": info, - "step": self.total_env_steps, - } + "env.inputs.action": action, + "env.returns.obs": obs, + "env.returns.reward": reward, + "env.returns.done": done, + "env.total_steps": self.total_env_steps, + **{f"env.returns.info.{key}": value for key, value in info.items()}, } + log_dict = self._create_summary_metrics(log_dict) wandb.log(log_dict) + self.last_log_dict = log_dict return obs, reward, done, info + + def reset(self): + + if self.restart_sum_metrics_at_reset: + wandb.log({"reset": self.last_log_dict, "reset.num": self.num_env_resets}) + self.cum_metrics = {key: 0 for key in self.cum_metrics.keys()} + self.num_env_resets += 1 + + return self.env.reset() + + def _create_summary_metrics(self, log_dict: dict): + """Create summary of variables in log dict. + + In particular, the + + Args: + log_dict (dict): _description_ + + Returns: + _type_: _description_ + """ + + for key in self.cum_metrics.keys(): + self.cum_metrics[key] += log_dict[key] + log_dict[key + "_cum"] = self.cum_metrics[key] + log_dict[key + "_mean"] = self.cum_metrics[key] / self.total_env_steps + + return log_dict diff --git a/docs/_static/beobench_config_v1.png b/docs/_static/beobench_config_v1.png new file mode 100644 index 00000000..e789f321 Binary files /dev/null and b/docs/_static/beobench_config_v1.png differ diff --git a/docs/_static/wandb_demo.png b/docs/_static/wandb_demo.png deleted file mode 100644 index 394d6555..00000000 Binary files a/docs/_static/wandb_demo.png and /dev/null differ diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst deleted file mode 100644 index d5ced58c..00000000 --- a/docs/advanced_usage.rst +++ /dev/null @@ -1,13 +0,0 @@ -============== -Advanced usage -============== - -Welcome to beobench's usage guides! - -.. toctree:: - :maxdepth: 2 - - guides/visualisation - guides/intro_experiment - guides/add_env - guides/dev_env diff --git a/docs/conf.py b/docs/conf.py index 3cd6d35c..eff6e60e 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -38,6 +38,8 @@ "sphinx.ext.viewcode", "sphinx.ext.autosummary", "sphinx_tabs.tabs", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", # "myst_parser", # "m2r2", ] diff --git a/docs/experiments.rst b/docs/experiments.rst deleted file mode 100644 index 5066450b..00000000 --- a/docs/experiments.rst +++ /dev/null @@ -1,15 +0,0 @@ -=========== -Experiments -=========== - -How to run experiments - -Create dev container with vscode - -Then outside of dev container, start dev container:: - - docker start - -And then:: - - docker exec nohup python -m beobench.experiment.scheduler --use-wandb --wandb-api-key= & \ No newline at end of file diff --git a/docs/getting_started.rst b/docs/getting_started.rst index fa868961..95413fed 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -5,7 +5,7 @@ Getting started =============== .. tip:: - This section is (almost) identical to the quickstart section in the repository README. If you already read that one, you may want to skip to the :doc:`advanced usage guides`. + This section is (almost) identical to the quickstart section in the repository README. If you already read that one, you may want to skip to the :doc:`advanced usage guides`. .. Note that below we import multiple separate parts from the main repo readme. This separation is done to allow us to add rst elements inbetween that can't be parsed by GitHubs rst parser. diff --git a/docs/guides/configuration.rst b/docs/guides/configuration.rst new file mode 100644 index 00000000..a033198c --- /dev/null +++ b/docs/guides/configuration.rst @@ -0,0 +1,32 @@ +YAML configuration +================== + +Beobench uses `YAML files `_ to define experiment configurations. The file below is the default configuration for every Beobench experiment. The comments above each setting describe its functionality. + +.. literalinclude:: ../../beobench/data/configs/default.yaml + :language: yaml + + +Gym-specific configurations +--------------------------- + +Each gym framework integration has its own configuration setup. See below for the default environment configurations of the three supported frameworks. + +BOPTEST +^^^^^^^ + +.. literalinclude:: ../../beobench/data/configs/gym_boptest.yaml + :language: yaml + + +Energym +^^^^^^^ + +.. literalinclude:: ../../beobench/data/configs/gym_energym.yaml + :language: YAML + +Sinergym +^^^^^^^^ + +.. literalinclude:: ../../beobench/data/configs/gym_sinergym.yaml + :language: yaml \ No newline at end of file diff --git a/docs/guides/index.rst b/docs/guides/index.rst new file mode 100644 index 00000000..a3e5cc40 --- /dev/null +++ b/docs/guides/index.rst @@ -0,0 +1,14 @@ +====== +Guides +====== + +Welcome to beobench's usage guides! + +.. toctree:: + :maxdepth: 2 + + configuration + add_env + installation_linux + tips + dev_env \ No newline at end of file diff --git a/docs/guides/intro_experiment.rst b/docs/guides/intro_experiment.rst deleted file mode 100644 index 3ad6049c..00000000 --- a/docs/guides/intro_experiment.rst +++ /dev/null @@ -1,45 +0,0 @@ -Creating new experiments ------------------------- - -Overview -^^^^^^^^ - -The diagram below gives an overview of how beobench experiments work. The ``beobench.experiment.scheduler.run()`` function builds and starts an *experiment container*. Within this container all experiments are being run using the *Ray RLlib* and *Ray Tune* libraries. Results are saved to a local folder (by default ``./beobench_results``), and optionally to Weights and Biases (wandb) as well. - -.. image:: ../_static/experiment_run_flow.png - :width: 450 px - :alt: experiment run diagram - :align: center - -Experiment configuration -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Beobench experiments are configured either using a *Python dictionary* or an equivalent *yaml file*. For example, the following ``config.yaml`` file configures an experiment that evaluates an RLlib-based *proximal policy optimisation* (PPO) agent on the ``MixedUseFanFCU-v0`` environment of Energym: - - -.. literalinclude:: ../../beobench/data/configs/default.yaml - :language: yaml - - - -Given this configuration file ``config.yaml``, we can run the experiment using the following commands: - -.. include:: ../snippets/run_standard_experiment.rst - -Running experiment in background -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Once you're up and running with Beobench, you will likely eventually want to start running experiments in the background. One way of doing this on Linux is using ``nohup``: - -.. code-block:: console - - nohup beobench run -c config.yaml & - -This will save the console output of your experiment to `nohup.out` in your current directory. - -If you have setup the Beobench development environment, you may want to start your experiments inside the devcontainer. You can do this using: - -.. code-block:: console - - nohup docker exec beobench run -c config.yaml & - diff --git a/docs/guides/quickstart.rst b/docs/guides/quickstart.rst deleted file mode 100644 index 040935a9..00000000 --- a/docs/guides/quickstart.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. include:: ../../README.rst - :start-after: start-quickstart - :end-before: end-quickstart \ No newline at end of file diff --git a/docs/guides/tips.rst b/docs/guides/tips.rst new file mode 100644 index 00000000..24857adb --- /dev/null +++ b/docs/guides/tips.rst @@ -0,0 +1,20 @@ +Miscellaneous tips +------------------ + +Running experiment in background +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Once you're up and running with Beobench, you will likely eventually want to start running experiments in the background. One way of doing this on Linux is using ``nohup``: + +.. code-block:: console + + nohup beobench run -c config.yaml & + +This will save the console output of your experiment to `nohup.out` in your current directory. + +If you have setup the Beobench development environment, you may want to start your experiments inside the devcontainer. You can do this using: + +.. code-block:: console + + nohup docker exec beobench run -c config.yaml & + diff --git a/docs/guides/visualisation.rst b/docs/guides/visualisation.rst deleted file mode 100644 index 29259f6d..00000000 --- a/docs/guides/visualisation.rst +++ /dev/null @@ -1,56 +0,0 @@ - -Visualisation -------------- - -Running a basic experiment -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Let's run your first experiment with beobench. After installing beobench using the :ref:`installation guide `, you can simply use one of the following two commands to run a first experiment: - -.. tabs:: - - .. code-tab:: console Console - - beobench run - - .. code-tab:: python - - import beobench - - beobench.run() - -This will run the default experiment `defined here `_: applying the reinforcement learning method `Proximal Policy Optimisation (PPO) `_ to control a residential `HVAC system `_ (defined in the `BOPTEST bestest_hydronic testcase `_). The results of this experiment will be logged in a newly created ``./beobench_results`` directory. - -Visualize experiment results -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Although the ``./beobench_results`` folder already contains a lot of information, it can be more helpful to log our experiment data to a proper experiment tracking service. Beobench provides an integration with the `Weights and Biases (wandb) `_ cloud-based tracking service for this purpose. To use wandb we add the following arguments to the previous command: - -.. tabs:: - - .. code-tab:: console Console - - beobench run \ - --wandb-project= \ - --wandb-entity= \ - --wandb-api-key= - - .. code-tab:: python - - import beobench - - beobench.run( - wandb_project=, - wandb_entity=, - wandb_api_key= - ) - -where you replace ````, ```` and ```` with your own wandb information. See `this guide `_ to get started with wandb. With this setup you will now be able to see the experiment progress on your wandb dashboard, similar to the one shown below. You can `find a live dashboard of another run of this experiment here `_. - -.. image:: ../_static/wandb_demo.png - :width: 550 px - :alt: wandb website - :align: center - :target: `example wandb`_ - -.. _example wandb: https://wandb.ai/beobench/doc-test/runs/66299_00000 \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 15e9e5d8..503a1b72 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ ************* -Beobench Docs +Beobench docs ************* .. include:: ../README.rst @@ -16,8 +16,7 @@ Contents Home getting_started envs - advanced_usage - api + guides/index contributing credits history diff --git a/requirements/doc_requirements.txt b/requirements/doc_requirements.txt index 4013f01d..c443cb46 100644 --- a/requirements/doc_requirements.txt +++ b/requirements/doc_requirements.txt @@ -14,4 +14,7 @@ docutils==0.16 # Because of the following bug # https://github.com/readthedocs/readthedocs.org/issues/9038 -Jinja2<3.1 \ No newline at end of file +Jinja2<3.1 + +# For allowing type hints +sphinx-autodoc-typehints \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 1c061b63..e3abeb36 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.4 +current_version = 0.5.0 commit = True tag = True @@ -11,6 +11,14 @@ replace = version = "{new_version}" search = __version__ = "{current_version}" replace = __version__ = "{new_version}" +[bumpversion:file:CITATION.cff] +search = version: {current_version} +replace = version: {new_version} + +[bumpversion:file:beobench/data/configs/default.yaml] +search = version: {current_version} +replace = version: {new_version} + [bdist_wheel] universal = 1 diff --git a/setup.py b/setup.py index bdf90d5a..aca89ad4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ with open("HISTORY.rst", encoding="UTF-8") as history_file: history = history_file.read() -version = "0.4.4" # pylint: disable=invalid-name +version = "0.5.0" # pylint: disable=invalid-name requirements = [ "docker", @@ -36,7 +36,7 @@ author="Beobench authors", python_requires=">=3.6", classifiers=[ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", @@ -44,8 +44,9 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], - description="Beobench is a toolbox for benchmarking reinforcement learning (RL) algorithms on building energy optimisation (BEO) problems.", # pylint: disable=line-too-long + description="Beobench is a toolkit providing easy and unified access to building control environments for reinforcement learning (RL).", # pylint: disable=line-too-long install_requires=requirements, extras_require={ "extended": extended_requirements, diff --git a/tests/conftest.py b/tests/conftest.py index 3c20e9be..e3f5d591 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,4 +9,4 @@ @pytest.fixture def run_config(): - return beobench.experiment.config_parser.get_standard_config("simple") + return beobench.experiment.config_parser.get_standard_config("test_energym") diff --git a/tox.ini b/tox.ini index a7466bac..b5ffcccc 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,12 @@ [tox] -envlist = py39, py38, py37, flake8 +envlist = py39, py38, py37, py36, flake8 [travis] python = 3.9: py39 3.8: py38 3.7: py37 + 3.6: py36 [testenv:flake8] basepython = python