diff --git a/.gitignore b/.gitignore
index 9954c405..c2058d0a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -119,6 +119,5 @@ docs/generated/
 #beobench
 beobench_results*
 notebooks/archive
-*beo.yaml
-*beo.yml
+.beobench.yml
 perf_tests*
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000..6b0cdf09
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,22 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: >-
+  Beobench: A Toolkit for Unified Access to Building
+  Simulations for Reinforcement Learning
+message: >-
+  If you use this software, please cite it using the
+  metadata from this file.
+type: software
+version: 0.5.0
+url: https://github.com/rdnfn/beobench
+authors:
+  - given-names: Arduin
+    family-names: Findeis
+  - given-names: Fiodar
+    family-names: Kazhamiaka
+  - given-names: Scott
+    family-names: Jeen
+  - given-names: Srinivasan
+    family-names: Keshav
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 123a555b..6d948477 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -42,6 +42,13 @@ beobench could always use more documentation, whether as part of the
 official beobench docs, in docstrings, or even on the web in blog posts,
 articles, and such.
 
+To update the API docs, use the following command inside the ``/docs`` directory:
+
+.. code-block::
+
+    sphinx-apidoc -f -o . ..
+
+
 Submit Feedback
 ~~~~~~~~~~~~~~~
 
diff --git a/HISTORY.rst b/HISTORY.rst
index ceb4dbe8..4b3300f0 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,6 +2,29 @@
 History
 =======
 
+0.5.0 (2022-05-26)
+------------------
+
+* Features:
+
+  * Mean and cummulative metrics can now be logged by WandbLogger wrapper.
+  * Support for automatically running multiple samples/trials of same experiment via ``num_samples`` config parameter.
+  * Configs named `.beobench.yml` will be automatically parsed when Beobench is run in directory containing such a config. This allows users to set e.g. wandb API keys without referring to the config in every Beobench command call.
+  * Configs from experiments now specify the Beobench version used. When trying to rerun an experiment this version will be checked, and an error thrown if there is a mismatch between installed and requested version.
+  * Add improved high-level API for getting started. This uses the CLI arguments ``--method``, ``--gym`` and ``--env``. Example usage: ``beobench run --method ppo --gym sinergym --env Eplus-5Zone-hot-continuous-v1``.
+
+* Improvements
+
+  * Add ``CITATION.cff`` file to citing software easier.
+  * By default, docker builds of experiment images are now skipped if an image with tag corresponding to installed Beobench version already exists.
+  * Remove outdated guides and add yaml configuration description from docs.
+  * Add support for logging multidimensional actions to wandb.
+  * Add support for logging summary metrics on every env reset to wandb.
+
+* Fixes
+
+  * Updated BOPTEST integration to work with current version of Beobench.
+
 0.4.4 (2022-05-09)
 ------------------
 
diff --git a/PYPI_README.rst b/PYPI_README.rst
index 923dd5b0..84da9b99 100644
--- a/PYPI_README.rst
+++ b/PYPI_README.rst
@@ -1,3 +1,5 @@
-A toolbox for benchmarking reinforcement learning (RL) algorithms on building energy optimisation (BEO) problems. Beobench tries to make working on RL for BEO easier: it provides simple access to existing libraries defining BEO problems (such as `BOPTEST <https://github.com/ibpsa/project1-boptest>`_) and provides a large set of pre-configured RL algorithms. Beobench is *not* a gym library itself - instead it leverages the brilliant work done by many existing gym-type projects and makes their work more easily accessible.
+A toolkit providing easy and unified access to building control environments for reinforcement learning (RL). Compared to other domains, `RL environments for building control <https://github.com/rdnfn/rl-building-control#environments>`_ tend to be more difficult to install and handle. Most environments require the user to either manually install a building simulator (e.g. `EnergyPlus <https://github.com/NREL/EnergyPlus>`_) or to manually manage Docker containers. This can be tedious.
+
+Beobench was created to make building control environments easier to use and experiments more reproducible. Beobench uses Docker to manage all environment dependencies in the background so that the user doesn't have to. A standardised API allows the user to easily configure experiments and evaluate new RL agents on building control environments.
 
 For more information go to the `documentation <https://beobench.readthedocs.io/>`_ and the `GitHub code repository <https://github.com/rdnfn/beobench>`_.
\ No newline at end of file
diff --git a/README.rst b/README.rst
index db03371e..b929d580 100644
--- a/README.rst
+++ b/README.rst
@@ -24,7 +24,7 @@
         :target: https://opensource.org/licenses/MIT
         :alt: License
 
-A toolkit providing easy and unified access to building control environments for reinforcement learning (RL). Compared to other domains, `RL environments for building control <https://github.com/rdnfn/rl-building-control#environments>`_ tend to be more difficult to install and handle. Most environments require the user to either manually install a building simulator (e.g. `EnergyPlus <https://github.com/NREL/EnergyPlus>`_) or to manually manage Docker containers. This is tedious.
+A toolkit providing easy and unified access to building control environments for reinforcement learning (RL). Compared to other domains, `RL environments for building control <https://github.com/rdnfn/rl-building-control#environments>`_ tend to be more difficult to install and handle. Most environments require the user to either manually install a building simulator (e.g. `EnergyPlus <https://github.com/NREL/EnergyPlus>`_) or to manually manage Docker containers. This can be tedious.
 
 Beobench was created to make building control environments easier to use and experiments more reproducible. Beobench uses Docker to manage all environment dependencies in the background so that the user doesn't have to. A standardised API, illustrated in the figure below, allows the user to easily configure experiments and evaluate new RL agents on building control environments.
 
@@ -66,7 +66,7 @@ Installation
 ------------
 
 1. `Install docker <https://docs.docker.com/get-docker/>`_ on your machine (if on Linux, check the `additional installation steps <https://beobench.readthedocs.io/en/latest/guides/installation_linux.html>`_)
-2. Install *beobench* using:
+2. Install Beobench using:
 
         .. code-block:: console
 
@@ -94,9 +94,23 @@ Experiment configuration
 
 To get started with our first experiment, we set up an *experiment configuration*.
 Experiment configurations
-can be given as a yaml file or a Python dictionary. Such a configuration
+can be given as a yaml file or a Python dictionary. The configuration
 fully defines an experiment, configuring everything
-from the RL agent to the environment and its wrappers.
+from the RL agent to the environment and its wrappers. The figure below illustrates the config structure.
+
+.. raw:: html
+
+   <p align="center">
+
+.. image:: https://github.com/rdnfn/beobench/raw/2cf961a8135b25c9a66e70d67eea9890ce0b878a/docs/_static/beobench_config_v1.png
+        :align: center
+        :width: 350 px
+        :alt: Beobench
+
+.. raw:: html
+
+   </p>
+
 
 Let's look at a concrete example. Consider this ``config.yaml`` file:
 
@@ -174,7 +188,7 @@ Execution
 
 .. end-qs-sec3
 
-Given the configuration and agent script above, we can run the experiment using either via the command line:
+Given the configuration and agent script above, we can run the experiment either via the command line:
 
 .. code-block:: console
 
diff --git a/beobench/__init__.py b/beobench/__init__.py
index 4998ceca..07ea095f 100644
--- a/beobench/__init__.py
+++ b/beobench/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = """Beobench authors"""
 __email__ = "-"
-__version__ = "0.4.4"
+__version__ = "0.5.0"
 
 from beobench.utils import restart
 from beobench.experiment.scheduler import run
diff --git a/beobench/beobench_contrib b/beobench/beobench_contrib
index 533f3b02..172d5622 160000
--- a/beobench/beobench_contrib
+++ b/beobench/beobench_contrib
@@ -1 +1 @@
-Subproject commit 533f3b02bd6ca4e612d60bf898ed45d02d40d49c
+Subproject commit 172d5622e17db61cfc3d7c9a9fe5002120b57de6
diff --git a/beobench/cli.py b/beobench/cli.py
index 77506738..44218686 100644
--- a/beobench/cli.py
+++ b/beobench/cli.py
@@ -25,6 +25,11 @@ def cli():
     default=None,
     help="Name of RL method to use in experiment.",
 )
+@click.option(
+    "--gym",
+    default=None,
+    help="Name of gym framework to use in experiment.",
+)
 @click.option(
     "--env",
     default=None,
@@ -82,9 +87,15 @@ def cli():
     default=None,
     help="For developer use only: location of custom beobench package version.",
 )
+@click.option(
+    "--force-build",
+    is_flag=True,
+    help="whether to force a re-build, even if image already exists.",
+)
 def run(
     config: str,
     method: str,
+    gym: str,
     env: str,
     local_dir: str,
     wandb_project: str,
@@ -96,6 +107,7 @@ def run(
     no_additional_container: bool,
     use_no_cache: bool,
     dev_path: str,
+    force_build: bool,
 ) -> None:
     """Run beobench experiment from command line.
 
@@ -110,6 +122,7 @@ def run(
     beobench.experiment.scheduler.run(
         config=list(config),
         method=method,
+        gym=gym,
         env=env,
         local_dir=local_dir,
         wandb_project=wandb_project,
@@ -121,6 +134,7 @@ def run(
         no_additional_container=no_additional_container,
         use_no_cache=use_no_cache,
         dev_path=dev_path,
+        force_build=force_build,
     )
 
 
diff --git a/beobench/constants.py b/beobench/constants.py
index 2fa32b28..4d53ffa0 100644
--- a/beobench/constants.py
+++ b/beobench/constants.py
@@ -2,6 +2,8 @@
 
 import pathlib
 
+USER_CONFIG_PATH = pathlib.Path("./.beobench.yml")
+
 # available gym-framework integrations
 AVAILABLE_INTEGRATIONS = [
     "boptest",
diff --git a/beobench/data/agents/random_action.py b/beobench/data/agents/random_action.py
index 6e2befd0..7dea0e7c 100644
--- a/beobench/data/agents/random_action.py
+++ b/beobench/data/agents/random_action.py
@@ -28,10 +28,24 @@
 except KeyError:
     horizon = 1000
 
+try:
+    imitate_rllib_env_checks = config["agent"]["config"]["imitate_rllib_env_checks"]
+except KeyError:
+    imitate_rllib_env_checks = False
+
 
 print("Random agent: starting test.")
 
 env = create_env()
+
+if imitate_rllib_env_checks:
+    # RLlib appears to reset and take single action in env
+    # this may be to check compliance of env with space etc.
+    env.reset()
+    action = env.action_space.sample()
+    _, _, _, _ = env.step(action)
+
+
 observation = env.reset()
 
 num_steps_per_ep = 0
diff --git a/beobench/data/configs/rewex01.yaml b/beobench/data/configs/archive/rewex01.yaml
similarity index 100%
rename from beobench/data/configs/rewex01.yaml
rename to beobench/data/configs/archive/rewex01.yaml
diff --git a/beobench/data/configs/rewex01_grid.yaml b/beobench/data/configs/archive/rewex01_grid.yaml
similarity index 100%
rename from beobench/data/configs/rewex01_grid.yaml
rename to beobench/data/configs/archive/rewex01_grid.yaml
diff --git a/beobench/data/configs/rewex01_grid02.yaml b/beobench/data/configs/archive/rewex01_grid02.yaml
similarity index 100%
rename from beobench/data/configs/rewex01_grid02.yaml
rename to beobench/data/configs/archive/rewex01_grid02.yaml
diff --git a/beobench/data/configs/rewex02.yaml b/beobench/data/configs/archive/rewex02.yaml
similarity index 100%
rename from beobench/data/configs/rewex02.yaml
rename to beobench/data/configs/archive/rewex02.yaml
diff --git a/beobench/data/configs/baselines/boptest_arroyo2022_dqn.yaml b/beobench/data/configs/baselines/boptest_arroyo2022_dqn.yaml
new file mode 100644
index 00000000..092c3a28
--- /dev/null
+++ b/beobench/data/configs/baselines/boptest_arroyo2022_dqn.yaml
@@ -0,0 +1,72 @@
+# A first attempt at reproduction of experiments in the following paper by Arroyo et al.
+# https://lirias.kuleuven.be/retrieve/658452
+#
+# Some of the descriptions of RLlib config values are taken from
+# https://docs.ray.io/en/latest/rllib/rllib-training.html
+# other from
+# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py
+
+env:
+  gym: boptest
+  config:
+    name: bestest_hydronic_heat_pump
+    # whether to normalise the observations and actions
+    normalize: True
+    discretize: True
+    gym_kwargs:
+      actions: ["oveHeaPumY_u"]
+      # Dictionary mapping observation keys to a tuple with the lower
+      # and upper bound of each observation. Observation keys must
+      # belong either to the set of measurements or to the set of
+      # forecasting variables of the BOPTEST test case. Contrary to
+      # the actions, the expected minimum and maximum values of the
+      # measurement and forecasting variables are not provided from
+      # the BOPTEST framework, although they are still relevant here
+      # e.g. for normalization or discretization. Therefore, these
+      # bounds need to be provided by the user.
+      # If `time` is included as an observation, the time in seconds
+      # will be passed to the agent. This is the remainder time from
+      # the beginning of the episode and for periods of the length
+      # specified in the upper bound of the time feature.
+      observations:
+        reaTZon_y: [280.0, 310.0]
+      # Set to True if desired to use a random start time for each episode
+      random_start_time: True
+      # Maximum duration of each episode in seconds
+      max_episode_length: 31536000 # one year in seconds
+      # Desired simulation period to initialize each episode
+      warmup_period: 10
+      # Sampling time in seconds
+      step_period: 900 # = 15min
+agent:
+  origin: rllib
+  config:
+    run_or_experiment: DQN
+    config:
+      lr: 0.0001
+      gamma: 0.99
+      # Number of steps after which the episode is forced to terminate. Defaults
+      # to `env.spec.max_episode_steps` (if present) for Gym envs.
+      horizon: 24 # one week 672 = 96 * 7 # other previous values: 96 # 10000 #
+      # Calculate rewards but don't reset the environment when the horizon is
+      # hit. This allows value estimation and RNN state to span across logical
+      # episodes denoted by horizon. This only has an effect if horizon != inf.
+      soft_horizon: True
+      num_workers: 1 # this is required, otherwise effectively assuming simulator.
+      # Training batch size, if applicable. Should be >= rollout_fragment_length.
+      # Samples batches will be concatenated together to a batch of this size,
+      # which is then passed to SGD.
+      train_batch_size: 24
+    stop:
+      timesteps_total: 105120 # = 3 years # 35040 # = 365 * 96 (full year)
+wrappers:
+  - origin: general
+    class: WandbLogger
+    config:
+      log_freq: 1
+      summary_metric_keys:
+        - env.returns.reward
+general:
+  wandb_project: boptest_arroyo2022_baseline
+  wandb_group: random_action
+  num_samples: 1
\ No newline at end of file
diff --git a/beobench/data/configs/baselines/boptest_arroyo2022_random_agent.yaml b/beobench/data/configs/baselines/boptest_arroyo2022_random_agent.yaml
new file mode 100644
index 00000000..da0b4891
--- /dev/null
+++ b/beobench/data/configs/baselines/boptest_arroyo2022_random_agent.yaml
@@ -0,0 +1,59 @@
+# A first attempt at reproduction of experiments in the following paper by Arroyo et al.
+# https://lirias.kuleuven.be/retrieve/658452
+#
+# Some of the descriptions of RLlib config values are taken from
+# https://docs.ray.io/en/latest/rllib/rllib-training.html
+# other from
+# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py
+
+env:
+  gym: boptest
+  name: bestest_hydronic_heat_pump
+  config:
+    boptest_testcase: bestest_hydronic_heat_pump
+    # whether to normalise the observations and actions
+    normalize: True
+    gym_kwargs:
+      actions: ["oveHeaPumY_u"]
+      # Dictionary mapping observation keys to a tuple with the lower
+      # and upper bound of each observation. Observation keys must
+      # belong either to the set of measurements or to the set of
+      # forecasting variables of the BOPTEST test case. Contrary to
+      # the actions, the expected minimum and maximum values of the
+      # measurement and forecasting variables are not provided from
+      # the BOPTEST framework, although they are still relevant here
+      # e.g. for normalization or discretization. Therefore, these
+      # bounds need to be provided by the user.
+      # If `time` is included as an observation, the time in seconds
+      # will be passed to the agent. This is the remainder time from
+      # the beginning of the episode and for periods of the length
+      # specified in the upper bound of the time feature.
+      observations:
+        reaTZon_y: [280.0, 310.0]
+      # Set to True if desired to use a random start time for each episode
+      random_start_time: True
+      # Maximum duration of each episode in seconds
+      max_episode_length: 31536000 # one year in seconds
+      # Desired simulation period to initialize each episode
+      warmup_period: 10
+      # Sampling time in seconds
+      step_period: 900 # = 15min
+agent:
+  origin: random_action
+  config:
+    config:
+      horizon: 96
+    stop:
+      timesteps_total: 10000
+    imitate_rllib_env_checks: True
+wrappers:
+  - origin: general
+    class: WandbLogger
+    config:
+      log_freq: 1
+      summary_metric_keys:
+        - env.returns.reward
+general:
+  wandb_project: boptest_arroyo2022_baseline
+  wandb_group: random_action
+  num_samples: 1
\ No newline at end of file
diff --git a/beobench/data/configs/baselines/sinergym_figure4_dqn.yaml b/beobench/data/configs/baselines/sinergym_figure4_dqn.yaml
new file mode 100644
index 00000000..882f4a19
--- /dev/null
+++ b/beobench/data/configs/baselines/sinergym_figure4_dqn.yaml
@@ -0,0 +1,51 @@
+# Reproduction of experiment shown in Figure 4 of Sinergym paper.
+# The paper can be found at: https://dl.acm.org/doi/abs/10.1145/3486611.3488729
+#
+# Some of the descriptions of RLlib config values are taken from
+# https://docs.ray.io/en/latest/rllib/rllib-training.html
+#
+# From environment output we can deduce that the step size is 15 minutes
+# (In january there are 2976 steps / 31 days -> 96 steps per day
+# -> step size of 15 min)
+
+env:
+  gym: sinergym
+  name: Eplus-5Zone-hot-discrete-v1
+  config:
+    name: Eplus-5Zone-hot-discrete-v1
+    # whether to normalise the observations
+    normalize: True
+agent:
+  origin: rllib
+  config:
+    run_or_experiment: DQN
+    config:
+      lr: 0.0001
+      gamma: 0.99
+      # Number of steps after which the episode is forced to terminate. Defaults
+      # to `env.spec.max_episode_steps` (if present) for Gym envs.
+      horizon: 24 # one week 672 = 96 * 7 # other previous values: 96 # 10000 #
+      # Calculate rewards but don't reset the environment when the horizon is
+      # hit. This allows value estimation and RNN state to span across logical
+      # episodes denoted by horizon. This only has an effect if horizon != inf.
+      soft_horizon: True
+      num_workers: 1 # this is required, otherwise effectively assuming simulator.
+      # Training batch size, if applicable. Should be >= rollout_fragment_length.
+      # Samples batches will be concatenated together to a batch of this size,
+      # which is then passed to SGD.
+      train_batch_size: 24
+    stop:
+      timesteps_total: 105120 # = 3 years # 35040 # = 365 * 96 (full year)
+wrappers:
+  - origin: general
+    class: WandbLogger
+    config:
+      log_freq: 1
+      summary_metric_keys:
+        - env.returns.reward
+        - env.returns.info.total_power
+        - env.returns.info.comfort_penalty
+general:
+  wandb_project: sinergym_fig4_baseline
+  num_samples: 5
+
diff --git a/beobench/data/configs/baselines/sinergym_figure4_random_agent.yaml b/beobench/data/configs/baselines/sinergym_figure4_random_agent.yaml
new file mode 100644
index 00000000..4e3cdd99
--- /dev/null
+++ b/beobench/data/configs/baselines/sinergym_figure4_random_agent.yaml
@@ -0,0 +1,39 @@
+# Reproduction of experiment shown in Figure 4 of Sinergym paper.
+# The paper can be found at: https://dl.acm.org/doi/abs/10.1145/3486611.3488729
+#
+# Some of the descriptions of RLlib config values are taken from
+# https://docs.ray.io/en/latest/rllib/rllib-training.html
+#
+# From environment output we can deduce that the step size is 15 minutes
+# (In january there are 2976 steps / 31 days -> 96 steps per day
+# -> step size of 15 min)
+
+env:
+  gym: sinergym
+  name: Eplus-5Zone-hot-discrete-v1
+  config:
+    name: Eplus-5Zone-hot-discrete-v1
+    # whether to normalise the observations
+    normalize: True
+agent:
+  origin: random_action
+  config:
+    config:
+      horizon: 96
+    stop:
+      timesteps_total: 105120 # 35040
+    imitate_rllib_env_checks: True
+wrappers:
+  - origin: general
+    class: WandbLogger
+    config:
+      log_freq: 1
+      summary_metric_keys:
+        - env.returns.reward
+        - env.returns.info.total_power
+        - env.returns.info.comfort_penalty
+general:
+  wandb_project: sinergym_fig4_baseline
+  wandb_group: random_action
+  num_samples: 5
+
diff --git a/beobench/data/configs/default.yaml b/beobench/data/configs/default.yaml
index 851c1dba..30aab120 100644
--- a/beobench/data/configs/default.yaml
+++ b/beobench/data/configs/default.yaml
@@ -1,48 +1,112 @@
-# agent config
+# Default Beobench configuration.
+#
+# Some of the descriptions of RLlib config
+# values are taken from
+# https://docs.ray.io/en/latest/rllib/rllib-training.html.
+#
+# Agent config
 agent:
-  origin: rllib # either path to agent script or name of agent library (rllib)
-  config: # given to ray.tune.run() as arguments (since rllib set before)
+  # Either path to agent script or name of built-in agent
+  # (e.g. `rllib`)
+  origin: rllib
+  # Config used by agent. In this case given to
+  # ray.tune.run() as arguments.
+  # (since rllib set before)
+  config:
+    # RLlib algorithm name
     run_or_experiment: PPO
+    # Stopping condition
     stop:
       timesteps_total: 400000
+    # Config only used by ray.RLlib and not other ray modules.
     config:
+      # Learning rate
       lr: 0.005
+      # Neural network model for PPO
       model:
         fcnet_activation: relu
         fcnet_hiddens: [256,256,256,256]
         post_fcnet_activation: tanh
       batch_mode: complete_episodes
+      # Discount factor
       gamma: 0.999
+      # Number of steps after which the episode is forced to
+      # terminate. Defaults to `env.spec.max_episode_steps`
+      # (if present) for Gym envs.
       horizon: 1000
+      # Calculate rewards but don't reset the environment when
+      # the horizon is hit. This allows value estimation and
+      # RNN state to span across logical episodes denoted by
+      # horizon. This only has an effect if horizon != inf.
+      soft_horizon: True
+      # Number of episodes over which RLlib internal metrics
+      # are smoothed
       metrics_smoothing_episodes: 5
+      # Deep learning framework
       framework: torch
-      num_workers: 1 # this is required for energym to work (can fail silently otherwise)
-# environment config
+      # Number of parallel workers interacting with
+      # environment for most gym frameworks this has to be
+      # set to 1 as often the background building simulations
+      # are not setup for parallel usage
+      # (can fail silently otherwise).
+      num_workers: 1
+# Environment config
 env:
-  name: MixedUseFanFCU-v0
+  # Gym framework of environment
   gym: energym
-  config:
-    days: 365
-    energym_environment: MixedUseFanFCU-v0
-    gym_kwargs:
-      max_episode_length: 35040
-      normalize: true
-      step_period: 15
-    weather: GRC_A_Athens
-# general config
+  # Environment name
+  name: MixedUseFanFCU-v0
+  # Configuration passed to integration's `create_env()`
+  # function. Specific to whatever gym framework you use
+  # (in this case Energym).
+  config: null
+# Wrappers (None added by default)
 wrappers: []
+# General Beobench config
 general:
+  # Directory to write experiment files to. This argument
+  # is equivalent to the `local_dir` argument in
+  # `tune.run()`.
   local_dir: ./beobench_results
+  # Name of wandb project.
   wandb_project: null
+  # Name of wandb entity.
   wandb_entity: null
+  # Name of wandb run group.
   wandb_group: null
+  # Wandb API key
   wandb_api_key: null
+  # Name of MLflow experiment
   mlflow_name: null
+  # Whether to use GPU from the host system. Requires that
+  # GPU is available.
   use_gpu: False
+  # Size of the shared memory available to the experiment
+  # container.
   docker_shm_size: 4gb
+  # Whether to force a re-build, even if image already
+  # exists.
+  force_build: False
+  # Whether to use cache IF building experiment container.
+  # This will not do anything if force_build is disabled,
+  # and image already exists.
   use_no_cache: False
+  # File or github path to beobench package. For
+  # developement purpose only. This will install a custom
+  # beobench version inside the experiment container.
+  # By default the latest PyPI version is installed.
   dev_path: null
+  # List of docker flags to be added to docker run command
+  # of Beobench experiment container.
   docker_flags: null
+  # Extra dependencies to install with beobench. Used
+  # during pip installation in experiment image,
+  # as in using the command:
+  # `pip install beobench[<beobench_extras>]`
   beobench_extras: "extended"
-  # Whether to log all episode data to wandb after running experiment
-  log_full_episode_data: False
+  # Number of experiment samples to run. This defaults
+  # to a single sample, i.e. just running the
+  # experiment once.
+  num_samples: 1
+  # Beobench version
+  version: 0.5.0
diff --git a/beobench/data/configs/gym_boptest.yaml b/beobench/data/configs/gym_boptest.yaml
new file mode 100644
index 00000000..65d57bc3
--- /dev/null
+++ b/beobench/data/configs/gym_boptest.yaml
@@ -0,0 +1,36 @@
+# BOPTEST default config
+# Some of the descriptions taken from
+# https://github.com/ibpsa/project1-boptest-gym/blob/master/boptestGymEnv.py
+
+env:
+  gym: boptest
+  config:
+    name: bestest_hydronic_heat_pump
+    # whether to normalise the observations and actions
+    normalize: True
+    discretize: True
+    gym_kwargs:
+      actions: ["oveHeaPumY_u"]
+      # Dictionary mapping observation keys to a tuple with the lower
+      # and upper bound of each observation. Observation keys must
+      # belong either to the set of measurements or to the set of
+      # forecasting variables of the BOPTEST test case. Contrary to
+      # the actions, the expected minimum and maximum values of the
+      # measurement and forecasting variables are not provided from
+      # the BOPTEST framework, although they are still relevant here
+      # e.g. for normalization or discretization. Therefore, these
+      # bounds need to be provided by the user.
+      # If `time` is included as an observation, the time in seconds
+      # will be passed to the agent. This is the remainder time from
+      # the beginning of the episode and for periods of the length
+      # specified in the upper bound of the time feature.
+      observations:
+        reaTZon_y: [280.0, 310.0]
+      # Set to True if desired to use a random start time for each episode
+      random_start_time: True
+      # Maximum duration of each episode in seconds
+      max_episode_length: 31536000 # one year in seconds
+      # Desired simulation period to initialize each episode
+      warmup_period: 10
+      # Sampling time in seconds
+      step_period: 900 # = 15min
\ No newline at end of file
diff --git a/beobench/data/configs/gym_energym.yaml b/beobench/data/configs/gym_energym.yaml
new file mode 100644
index 00000000..3f14d494
--- /dev/null
+++ b/beobench/data/configs/gym_energym.yaml
@@ -0,0 +1,20 @@
+# Energym default config
+
+env:
+  gym: energym
+  config:
+    # Number of simulation days
+    days: 365
+    # Name of energym environment
+    name: Apartments2Thermal-v0
+    gym_kwargs:
+      # Maximum number of timesteps in one episode
+      max_episode_length: 35040 # corresponds to a year of 15 min steps
+      # Whether state and action spaces are normalized
+      normalize: true
+      # Number of real-world minutes between timesteps in building simulation
+      step_period: 15
+      # Whether to allow a complete environment reset
+      ignore_reset: True
+    # Weather file to use for the scenario (possible files depend on env chosen)
+    weather: ESP_CT_Barcelona
\ No newline at end of file
diff --git a/beobench/data/configs/gym_sinergym.yaml b/beobench/data/configs/gym_sinergym.yaml
new file mode 100644
index 00000000..ef6a51c9
--- /dev/null
+++ b/beobench/data/configs/gym_sinergym.yaml
@@ -0,0 +1,9 @@
+# Sinergym default config
+
+env:
+  gym: sinergym
+  config:
+    # Sinergym env name
+    name: Eplus-5Zone-hot-continuous-v1
+    # whether to normalise the observations
+    normalize: True
\ No newline at end of file
diff --git a/beobench/data/configs/ppo.yaml b/beobench/data/configs/method_ppo.yaml
similarity index 94%
rename from beobench/data/configs/ppo.yaml
rename to beobench/data/configs/method_ppo.yaml
index 1808820c..c133e571 100644
--- a/beobench/data/configs/ppo.yaml
+++ b/beobench/data/configs/method_ppo.yaml
@@ -8,8 +8,10 @@ agent:
   config: # given to ray.tune.run() as arguments (since rllib set before)
     run_or_experiment: PPO
     log_to_file: True
+    stop:
+      timesteps_total: 35040 # 1 year w 15 min timesteps
     config:
-      lr: 0.0005
+      lr: 0.005
       model:
         fcnet_activation: relu
         fcnet_hiddens: [256,256,256,256]
diff --git a/beobench/data/configs/random_action.yaml b/beobench/data/configs/method_random_action.yaml
similarity index 78%
rename from beobench/data/configs/random_action.yaml
rename to beobench/data/configs/method_random_action.yaml
index 7a2887ff..c6c87b93 100644
--- a/beobench/data/configs/random_action.yaml
+++ b/beobench/data/configs/method_random_action.yaml
@@ -3,6 +3,8 @@
 agent:
   origin: random_action
   config:
+    stop:
+      timesteps_total: 35040
     config:
       horizon: 480
 general:
diff --git a/beobench/data/configs/sinergym.yaml b/beobench/data/configs/sinergym.yaml
deleted file mode 100644
index f83e6c50..00000000
--- a/beobench/data/configs/sinergym.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-env:
-  name: Eplus-5Zone-hot-continuous-v1
-  gym: sinergym
-  config:
-    name: Eplus-5Zone-hot-continuous-v1
-    normalize: True
\ No newline at end of file
diff --git a/beobench/data/configs/simple.yaml b/beobench/data/configs/test_energym.yaml
similarity index 91%
rename from beobench/data/configs/simple.yaml
rename to beobench/data/configs/test_energym.yaml
index 9c92814b..df266bc2 100644
--- a/beobench/data/configs/simple.yaml
+++ b/beobench/data/configs/test_energym.yaml
@@ -11,13 +11,12 @@ agent:
 
 # environment config
 env:
-  name: Apartments2Thermal-v0
   gym: energym
   config:
     # Number of simulation days
     days: 365
     # Name of energym environment
-    energym_environment: Apartments2Thermal-v0
+    name: Apartments2Thermal-v0
     gym_kwargs:
       # Maximum number of timesteps in one episode
       max_episode_length: 35040 # corresponds to a year of 15 min steps
diff --git a/beobench/experiment/config_parser.py b/beobench/experiment/config_parser.py
index fef44a24..92672c11 100644
--- a/beobench/experiment/config_parser.py
+++ b/beobench/experiment/config_parser.py
@@ -6,9 +6,14 @@
 import yaml
 import ast
 import sys
+import random
+import os
 
+import beobench
 import beobench.utils
 
+from beobench.constants import USER_CONFIG_PATH
+
 # To enable compatiblity with Python<=3.8 (e.g. for sinergym dockerfile)
 if sys.version_info[1] >= 9:
     import importlib.resources
@@ -116,7 +121,7 @@ def get_standard_config(name: str) -> dict:
 
 
 def get_default() -> dict:
-    """Get default beobench config
+    """Get default beobench config.
 
     Returns:
         dict: default beobench config dict
@@ -125,23 +130,97 @@ def get_default() -> dict:
     return get_standard_config("default")
 
 
-def get_agent(name: str) -> pathlib.Path:
+def get_user() -> dict:
+    """Get user beobench config.
+
+    Returns:
+        dict: default beobench config dict
+    """
+
+    if os.path.isfile(USER_CONFIG_PATH):
+        print(f"Beobench: recognised user config at '{USER_CONFIG_PATH}'.")
+        user_config = parse(USER_CONFIG_PATH)
+    else:
+        user_config = {}
+
+    return user_config
+
+
+def add_default_and_user_configs(config: dict) -> dict:
+    """Add default and user configs to existing beobench config.
 
-    """Get standard beobench agent from beobench.data.agents.
+    Args:
+        config (dict): beobench config
 
     Returns:
-        Path: path/traversible to agent.
+        dict: merged dict of given config, and user and default configs.
     """
 
-    agents_path = importlib.resources.files("beobench.data.agents")
-    agent_path = agents_path.joinpath(f"{name}.py")
+    default_config = get_default()
+    user_config = get_user()
+    user_default_config = beobench.utils.merge_dicts(
+        a=default_config, b=user_config, let_b_overrule_a=True
+    )
+    config = beobench.utils.merge_dicts(
+        a=user_default_config, b=config, let_b_overrule_a=True
+    )
 
-    return agent_path
+    return config
 
 
 def get_autogen_config() -> dict:
-    run_id = uuid.uuid4().hex
+    """Get automatically generated parts of a Beobench configuration."""
+
+    config = {
+        "autogen": {
+            "run_id": uuid.uuid4().hex,
+            "random_seed": random.randint(1, 10000000),
+        },
+    }
+
+    return config
 
-    config = {"autogen": {"run_id": run_id}}
+
+def check_config(config: dict) -> None:
+    """Check if config is valid.
+
+    Args:
+        config (dict): Beobench config.
+    """
+    requested_version = config["general"]["version"]
+    if requested_version != beobench.__version__:
+        raise ValueError(
+            f"Beobench config requests version {requested_version}"
+            f" that does not match installed version {beobench.__version__}. "
+            "Change the installed Beobench version to the requested version "
+            f"{requested_version} or remove general.version parameter from config "
+            "to prevent this error."
+        )
+
+
+def get_high_level_config(method: str, gym: str, env: str) -> dict:
+    """Get config from agent, gym and env params.
+
+    Args:
+        method (str): name of method
+        gym (str): name of gym
+        env (str): name of environment.
+
+    Returns:
+        dict: Beobench configuration.
+    """
+
+    config = {}
+    if method is not None:
+        config = beobench.utils.merge_dicts(
+            config,
+            get_standard_config(f"method_{method}"),
+        )
+    if gym is not None and env is not None:
+        config = beobench.utils.merge_dicts(
+            config,
+            get_standard_config(f"gym_{gym}"),
+        )
+        config["env"]["name"] = env
 
     return config
diff --git a/beobench/experiment/containers.py b/beobench/experiment/containers.py
index 7d891ec0..88d86640 100644
--- a/beobench/experiment/containers.py
+++ b/beobench/experiment/containers.py
@@ -3,7 +3,9 @@
 import contextlib
 import subprocess
 import os
+import docker
 
+import beobench
 from beobench.constants import AVAILABLE_INTEGRATIONS
 
 # To enable compatiblity with Python<=3.6 (e.g. for sinergym dockerfile)
@@ -16,12 +18,22 @@
     importlib.resources = importlib_resources
 
 
+def check_image_exists(image: str):
+    client = docker.from_env()
+
+    try:
+        client.images.get(image)
+        return True
+    except docker.errors.ImageNotFound:
+        return False
+
+
 def build_experiment_container(
     build_context: str,
     use_no_cache: bool = False,
-    version: str = "latest",
     beobench_package: str = "beobench",
     beobench_extras: str = "extended",
+    force_build: bool = False,
 ) -> None:
     """Build experiment container from beobench/integrations/boptest/Dockerfile.
 
@@ -31,11 +43,14 @@ def build_experiment_container(
             of existing beobench integration (e.g. `boptest`). See the official docs
             https://docs.docker.com/engine/reference/commandline/build/ for more info.
         use_no_cache (bool, optional): wether to use cache in build. Defaults to False.
-        version (str, optional): version to add to container tag. Defaults to "latest".
         beobench_extras (str, optional): which beobench extra dependencies to install
             As in `pip install beobench[extras]`. Defaults to "extended".
+        force_build (bool, optional): whether to force a re-build, even if
+            image already exists.
     """
 
+    version = beobench.__version__
+
     # Flags are shared between gym image build and gym_and_beobench image build
     flags = []
 
@@ -59,19 +74,27 @@ def build_experiment_container(
         )
         package_build_context = True
 
-        print(
-            (
-                f"Recognised integration named {gym_name}: using build"
-                f" context {build_context}"
-            )
-        )
+        print(f"Beobench: recognised integration named {gym_name}.")
     else:
         # get alphanumeric name from context
         context_name = "".join(e for e in build_context if e.isalnum())
         image_name = f"beobench_custom_{context_name}"
         package_build_context = False
 
+    # Create tags of different image stages
     stage0_image_tag = f"{image_name}_base:{version}"
+    stage1_image_tag = f"{image_name}_intermediate:{version}"
+    stage2_image_tag = f"{image_name}_complete:{version}"
+
+    # skip build if image already exists.
+    if not force_build and check_image_exists(stage2_image_tag):
+        print(f"Beobench: existing image found ({stage2_image_tag}). Skipping build.")
+        return stage2_image_tag
+
+    print(
+        f"Beobench: image not found ({stage2_image_tag}) or forced",
+        "rebuild. Building image.",
+    )
 
     print(f"Building experiment base image `{stage0_image_tag}`...")
 
@@ -99,7 +122,6 @@ def build_experiment_container(
 
         # Part 2: build stage 1 (intermediate) experiment image
         # This includes installation of beobench in experiment image
-        stage1_image_tag = f"{image_name}_intermediate:{version}"
         stage1_dockerfile = str(
             importlib.resources.files("beobench.data.dockerfiles").joinpath(
                 "Dockerfile.experiment"
@@ -148,7 +170,6 @@ def build_experiment_container(
             build_context = beobench_package
             # need to add std-in-dockerfile via -f flag and not context directly
             stage2_docker_flags = ["-f", "-"]
-        stage2_image_tag = f"{image_name}_complete:{version}"
 
         stage2_build_args = [
             *build_commands,
diff --git a/beobench/experiment/scheduler.py b/beobench/experiment/scheduler.py
index 1612a73a..c198b0d9 100644
--- a/beobench/experiment/scheduler.py
+++ b/beobench/experiment/scheduler.py
@@ -7,6 +7,7 @@
 import subprocess
 import pathlib
 import yaml
+import copy
 import contextlib
 from typing import Union
 
@@ -28,6 +29,7 @@
 def run(
     config: Union[str, dict, pathlib.Path, list] = None,
     method: str = None,
+    gym: str = None,
     env: str = None,
     local_dir: str = None,
     wandb_project: str = None,
@@ -42,6 +44,8 @@ def run(
     no_additional_container: bool = False,
     docker_flags: list[str] = None,
     beobench_extras: str = None,
+    force_build: str = False,
+    num_samples: int = None,
 ) -> None:
     """Run experiment.
 
@@ -71,12 +75,13 @@ def run(
         docker_shm_size(str, optional): size of the shared memory available to the
             container. Defaults to None."
         use_no_cache (bool, optional): whether to use cache to build experiment
-            container. Defaults to False.
+            container. Defaults to False. This will not do anything if force_build is
+            disabled, and image already exists.
         dev_path (str, optional): file or github path to beobench package. For
             developement purpose only. This will install a custom beobench version
             inside the experiment container. By default the latest PyPI version is
             installed.
-        no_additional_container (bool, optional): wether not to start another container
+        no_additional_container (bool, optional): whether not to start another container
             to run experiments in. Defaults to False, which means that another container
             is started to run experiments in.
         docker_flags (list[str], optional): list of docker flags to be added to
@@ -84,6 +89,10 @@ def run(
         beobench_extras (str, optional): extra dependencies to install with beobench.
             Used during pip installation in experiment image, as in using the command:
             `pip install beobench[<beobench_extras>]`
+        force_build (bool, optional): whether to force a re-build, even if
+            image already exists.
+        num_samples (int, optional): number of experiment samples to run. This defaults
+            to a single sample, i.e. just running the experiment once.
     """
     print("Beobench: starting experiment run ...")
     # parsing relevant kwargs and adding them to config
@@ -100,183 +109,201 @@ def run(
         dev_path=dev_path,
         docker_flags=docker_flags,
         beobench_extras=beobench_extras,
+        force_build=force_build,
+        num_samples=num_samples,
     )
 
     # parse combined config
     config = beobench.experiment.config_parser.parse([config, kwarg_config])
 
-    # adding any defaults that haven't been set by user given config
-    default_config = beobench.experiment.config_parser.get_default()
-    config = beobench.utils.merge_dicts(
-        a=default_config, b=config, let_b_overrule_a=True
+    high_level_config = beobench.experiment.config_parser.get_high_level_config(
+        method=method, gym=gym, env=env
     )
-
-    autogen_config = beobench.experiment.config_parser.get_autogen_config()
     config = beobench.utils.merge_dicts(
-        a=autogen_config, b=config, let_b_overrule_a=True
+        config, high_level_config, let_b_overrule_a=True
     )
 
-    # select agent script
-    if config["agent"]["origin"] in AVAILABLE_AGENTS:
-        agent_file = beobench.experiment.config_parser.get_agent(
-            config["agent"]["origin"]
-        )
-        package_agent = True
-    else:
-        agent_file = pathlib.Path(config["agent"]["origin"])
-        package_agent = False
-
-    # TODO add parsing of high level API arguments env and agent
-    if env or method:
-        raise ValueError(
+    # adding any defaults that haven't been set by given config
+    # The configs can be conflicting:
+    # config overrules user_config which overrules default_config.
+    config = beobench.experiment.config_parser.add_default_and_user_configs(config)
+    beobench.experiment.config_parser.check_config(config)
+
+    # running experiment num_samples times
+    num_samples = config["general"]["num_samples"]
+    for i in range(1, num_samples + 1):
+        # TODO: enable checking whether something is run in container
+        # and do not print the statement below if inside experiment container.
+        print(
             (
-                "This functionality has been deprecated. Directly configure"
-                " the environment or method in the config argument."
+                f"Beobench: running experiment in container with environment "
+                f"{config['env']['name']}"
+                f" and agent from {config['agent']['origin']}. Sample {i} of"
+                f" {num_samples}."
             )
         )
 
-    print(
-        (
-            f"Beobench: running experiment with environment {config['env']['name']}"
-            f" and agent from {config['agent']['origin']}."
+        autogen_config = beobench.experiment.config_parser.get_autogen_config()
+        config = beobench.utils.merge_dicts(
+            a=config, b=autogen_config, let_b_overrule_a=True
         )
-    )
 
-    if no_additional_container:
-        # Execute experiment
-        # (this is usually reached from inside an experiment container)
+        if no_additional_container:
+            # Execute experiment
+            # (this is usually reached from inside an experiment container)
 
-        container_ro_dir_abs = CONTAINER_RO_DIR.absolute()
-        args = ["python", str(container_ro_dir_abs / agent_file.name)]
-        subprocess.check_call(args)
+            container_ro_dir_abs = CONTAINER_RO_DIR.absolute()
+            args = [
+                "python",
+                str(container_ro_dir_abs / _get_agent_file(config)[0].name),
+            ]
+            subprocess.check_call(args)
 
-    else:
-        # build and run experiments in docker container
-
-        ### part 1: build docker images
-        # Ensure local_dir exists, and create otherwise
-        local_dir_path = pathlib.Path(config["general"]["local_dir"])
-        local_dir_path.mkdir(parents=True, exist_ok=True)
-        local_dir_path_abs = local_dir_path.absolute()
-        container_data_dir_abs = CONTAINER_DATA_DIR.absolute()
-
-        if (
-            config["general"]["beobench_extras"] == "extended"
-            and config["agent"]["origin"] == "rllib"
-        ):
-            beobench_extras = "extended,rllib"
         else:
-            beobench_extras = config["general"]["beobench_extras"]
+            # First build container image and then execute experiment inside container
+            # But only run one experiment per container.
+            config["general"]["num_samples"] = 1
+            _build_and_run_in_container(config)
 
-        image_tag = beobench.experiment.containers.build_experiment_container(
-            build_context=config["env"]["gym"],
-            use_no_cache=config["general"]["use_no_cache"],
-            beobench_extras=beobench_extras,
-            beobench_package=config["general"]["dev_path"],
-        )
 
-        ### part 2: create args and run command in docker container
-        if config["general"]["docker_flags"] is None:
-            docker_flags = []
-        else:
-            docker_flags = config["general"]["docker_flags"]
+def _build_and_run_in_container(config: dict) -> None:
+    """Build container image and run experiment in docker container.
 
-        # if no wandb API key is given try to get it from env
-        if config["general"]["wandb_api_key"] is None:
-            # this will return "" if env var not set
-            wandb_api_key = os.getenv("WANDB_API_KEY", "")
-        else:
-            wandb_api_key = config["general"]["wandb_api_key"]
-
-        # We don't want the key to be logged in wandb
-        del config["general"]["wandb_api_key"]
-
-        # Save config to local dir and add mount flag for config
-        config_path = local_dir_path / "tmp" / "config.yaml"
-        config_path.parent.mkdir(parents=True, exist_ok=True)
-        config_path_abs = config_path.absolute()
-        config_container_path_abs = (CONTAINER_RO_DIR / "config.yaml").absolute()
-        with open(config_path, "w", encoding="utf-8") as conf_file:
-            yaml.dump(config, conf_file)
-        docker_flags += [
-            "-v",
-            f"{config_path_abs}:{config_container_path_abs}:ro",
-        ]
+    Args:
+        config (dict): Beobench configuration.
+    """
 
-        # setup container name with unique identifier
-        unique_id = uuid.uuid4().hex[:6]
-        container_name = f"auto_beobench_experiment_{unique_id}"
+    # We need to deepcopy the config as some sensitive data (API keys) is deleted at
+    # some point below. This can otherwise cause problems when running multiple
+    # samples of the same experiment.
+    config = copy.deepcopy(config)
+
+    ### part 1: build docker images
+    # Ensure local_dir exists, and create otherwise
+    local_dir_path = pathlib.Path(config["general"]["local_dir"])
+    local_dir_path.mkdir(parents=True, exist_ok=True)
+    local_dir_path_abs = local_dir_path.absolute()
+    container_data_dir_abs = CONTAINER_DATA_DIR.absolute()
+
+    if (
+        config["general"]["beobench_extras"] == "extended"
+        and config["agent"]["origin"] == "rllib"
+    ):
+        beobench_extras = "extended,rllib"
+    else:
+        beobench_extras = config["general"]["beobench_extras"]
 
-        # enable docker-from-docker access only for built-in boptest integration.
-        if config["env"]["gym"] == "boptest":
+    image_tag = beobench.experiment.containers.build_experiment_container(
+        build_context=config["env"]["gym"],
+        use_no_cache=config["general"]["use_no_cache"],
+        beobench_extras=beobench_extras,
+        beobench_package=config["general"]["dev_path"],
+        force_build=config["general"]["force_build"],
+    )
 
-            # Create docker network (only useful if starting other containers)
-            beobench.experiment.containers.create_docker_network("beobench-net")
+    ### part 2: create args and run command in docker container
+    if config["general"]["docker_flags"] is None:
+        docker_flags = []
+    else:
+        docker_flags = config["general"]["docker_flags"]
 
-            docker_flags += [
-                # enable access to docker-from-docker
-                "-v",
-                "/var/run/docker.sock:/var/run/docker.sock",
-                # network allows access to BOPTEST API in other containers
-                "--network",
-                "beobench-net",
-            ]
+    # if no wandb API key is given try to get it from env
+    if config["general"]["wandb_api_key"] is None:
+        # this will return "" if env var not set
+        wandb_api_key = os.getenv("WANDB_API_KEY", "")
+    else:
+        wandb_api_key = config["general"]["wandb_api_key"]
 
-        # enabling GPU access in docker container
-        if config["general"]["use_gpu"]:
-            docker_flags += [
-                # add all available GPUs
-                "--gpus=all",
-            ]
+    # We don't want the key to be logged in wandb
+    del config["general"]["wandb_api_key"]
 
-        # define flags for beobench scheduler call inside experiment container
-        beobench_flags = []
-        beobench_flags.append(f'--config="{config}"')
-        beobench_flag_str = " ".join(beobench_flags)
-
-        with contextlib.ExitStack() as stack:
-            # if using package agent, get (potentially temp.) agent file path
-            if package_agent:
-                agent_file = stack.enter_context(
-                    importlib.resources.as_file(agent_file)
-                )
-            # load agent file
-            ag_file_abs = agent_file.absolute()
-            ag_file_on_docker_abs = (CONTAINER_RO_DIR / agent_file.name).absolute()
-            docker_flags += [
-                "-v",
-                f"{ag_file_abs}:{ag_file_on_docker_abs}:ro",
-            ]
+    # Save config to local dir and add mount flag for config
+    config_path = local_dir_path / "tmp" / "config.yaml"
+    config_path.parent.mkdir(parents=True, exist_ok=True)
+    config_path_abs = config_path.absolute()
+    config_container_path_abs = (CONTAINER_RO_DIR / "config.yaml").absolute()
+    with open(config_path, "w", encoding="utf-8") as conf_file:
+        yaml.dump(config, conf_file)
+    docker_flags += [
+        "-v",
+        f"{config_path_abs}:{config_container_path_abs}:ro",
+    ]
 
-            args = [
-                "docker",
-                "run",
-                # mount experiment data dir
-                "-v",
-                f"{local_dir_path_abs}:{container_data_dir_abs}",
-                # automatically remove container when stopped/exited
-                "--rm",
-                # add more memory
-                f"--shm-size={config['general']['docker_shm_size']}",
-                "--name",
-                container_name,
-                *docker_flags,
-                image_tag,
-                "/bin/bash",
-                "-c",
-                (
-                    f"export WANDB_API_KEY={wandb_api_key} && "
-                    f"beobench run {beobench_flag_str} "
-                    "--no-additional-container && bash"
-                ),
-            ]
+    # setup container name with unique identifier
+    unique_id = uuid.uuid4().hex[:6]
+    container_name = f"auto_beobench_experiment_{unique_id}"
 
-            arg_str = " ".join(args)
-            if wandb_api_key:
-                arg_str = arg_str.replace(wandb_api_key, "<API_KEY_HIDDEN>")
-            print(f"Executing docker command: {arg_str}")
+    # enable docker-from-docker access only for built-in boptest integration.
+    if config["env"]["gym"] == "boptest":
 
-            subprocess.check_call(args)
+        # Create docker network (only useful if starting other containers)
+        beobench.experiment.containers.create_docker_network("beobench-net")
+
+        docker_flags += [
+            # enable access to docker-from-docker
+            "-v",
+            "/var/run/docker.sock:/var/run/docker.sock",
+            # network allows access to BOPTEST API in other containers
+            "--network",
+            "beobench-net",
+        ]
+
+    # enabling GPU access in docker container
+    if config["general"]["use_gpu"]:
+        docker_flags += [
+            # add all available GPUs
+            "--gpus=all",
+        ]
+
+    # define flags for beobench scheduler call inside experiment container
+    beobench_flags = []
+    beobench_flags.append(f'--config="{config}"')
+    beobench_flag_str = " ".join(beobench_flags)
+
+    with contextlib.ExitStack() as stack:
+        # get agent file path
+        agent_file, uses_importlib = _get_agent_file(config)
+        # if using package built-in agent, then make sure agent file path exists
+        # by entering context (because it's potentially temp.).
+        if uses_importlib:
+            agent_file = stack.enter_context(importlib.resources.as_file(agent_file))
+        # load agent file
+        ag_file_abs = agent_file.absolute()
+        ag_file_on_docker_abs = (CONTAINER_RO_DIR / agent_file.name).absolute()
+        docker_flags += [
+            "-v",
+            f"{ag_file_abs}:{ag_file_on_docker_abs}:ro",
+        ]
+
+        args = [
+            "docker",
+            "run",
+            # mount experiment data dir
+            "-v",
+            f"{local_dir_path_abs}:{container_data_dir_abs}",
+            # automatically remove container when stopped/exited
+            "--rm",
+            # add more memory
+            f"--shm-size={config['general']['docker_shm_size']}",
+            "--name",
+            container_name,
+            *docker_flags,
+            image_tag,
+            "/bin/bash",
+            "-c",
+            (
+                f"export WANDB_API_KEY={wandb_api_key} && "
+                f"beobench run {beobench_flag_str} "
+                "--no-additional-container && bash"
+            ),
+        ]
+
+        arg_str = " ".join(args)
+        if wandb_api_key:
+            arg_str = arg_str.replace(wandb_api_key, "<API_KEY_HIDDEN>")
+        print(f"Executing docker command: {arg_str}")
+
+        subprocess.check_call(args)
 
 
 def _create_config_from_kwargs(**kwargs) -> dict:
@@ -290,3 +317,25 @@ def _create_config_from_kwargs(**kwargs) -> dict:
         if value:
             config["general"][key] = value
     return config
+
+
+def _get_agent_file(config: dict) -> tuple:
+    """Get agent file path based on config.
+
+    Args:
+        config (dict): Beobench config.
+
+    Returns:
+        tuple: path or traversible of agent file, and whether agent is accessed via
+            importlib.
+    """
+
+    if config["agent"]["origin"] in AVAILABLE_AGENTS:
+        agents_path = importlib.resources.files("beobench.data.agents")
+        agent_file = agents_path.joinpath(f"{config['agent']['origin']}.py")
+        uses_importlib = True
+    else:
+        agent_file = pathlib.Path(config["agent"]["origin"])
+        uses_importlib = False
+
+    return agent_file, uses_importlib
diff --git a/beobench/wrappers/general.py b/beobench/wrappers/general.py
index a52ec021..6517cdc1 100644
--- a/beobench/wrappers/general.py
+++ b/beobench/wrappers/general.py
@@ -4,6 +4,7 @@
 import gym.spaces
 import warnings
 import wandb
+import numpy as np
 
 from beobench.experiment.provider import config
 
@@ -86,15 +87,29 @@ def reset(self, **kwargs):
 class WandbLogger(gym.Wrapper):
     """Wrapper to log all env data for every xth step."""
 
-    def __init__(self, env: gym.Env, log_freq: int = 1):
+    def __init__(
+        self,
+        env: gym.Env,
+        log_freq: int = 1,
+        summary_metric_keys: list = None,
+        restart_sum_metrics_at_reset: bool = False,
+    ):
         """Wrapper to log all env data for every xth step.
 
         Args:
             env (gym.Env): environment to wrap.
             log_freq (int, optional): how often to log the step() method. E.g. for 2
                 every second step is logged. Defaults to 1.
+            summary_metric_keys (list, optional): list of keys of logged metrics for
+                summary metrics such as cummulative sum and mean are computed. This
+                defaults to ["env.returns.reward"]. WARNING: summary metrics only
+                work for log_feq == 1, otherwise the cummulative metrics will not be
+                correct.
         """
 
+        if summary_metric_keys is None:
+            summary_metric_keys = ["env.returns.reward"]
+
         super().__init__(env)
         wandb.init(
             id=config["autogen"]["run_id"],
@@ -104,6 +119,10 @@ def __init__(self, env: gym.Env, log_freq: int = 1):
         )
         self.log_freq = log_freq
         self.total_env_steps = 0
+        self.restart_sum_metrics_at_reset = restart_sum_metrics_at_reset
+        self.cum_metrics = {key: 0 for key in summary_metric_keys}
+        self.num_env_resets = 0
+        self.last_log_dict = {}
 
     def step(self, action):
         obs, reward, done, info = self.env.step(action)
@@ -111,16 +130,51 @@ def step(self, action):
         self.total_env_steps += 1
 
         if self.total_env_steps % self.log_freq == 0:
+            # TODO: enable flat dict for dict action and observation spaces
+            # This would allow for summary values of invididual actions/observations
+            if isinstance(action, (list, np.ndarray)):
+                for i, act in enumerate(action):
+                    new_action = {f"{i}": act}
+                action = new_action
+
             log_dict = {
-                "env": {
-                    "action": action,
-                    "obs": obs,
-                    "reward": reward,
-                    "done": done,
-                    "info": info,
-                    "step": self.total_env_steps,
-                }
+                "env.inputs.action": action,
+                "env.returns.obs": obs,
+                "env.returns.reward": reward,
+                "env.returns.done": done,
+                "env.total_steps": self.total_env_steps,
+                **{f"env.returns.info.{key}": value for key, value in info.items()},
             }
+            log_dict = self._create_summary_metrics(log_dict)
             wandb.log(log_dict)
+            self.last_log_dict = log_dict
 
         return obs, reward, done, info
+
+    def reset(self):
+
+        if self.restart_sum_metrics_at_reset:
+            wandb.log({"reset": self.last_log_dict, "reset.num": self.num_env_resets})
+            self.cum_metrics = {key: 0 for key in self.cum_metrics.keys()}
+            self.num_env_resets += 1
+
+        return self.env.reset()
+
+    def _create_summary_metrics(self, log_dict: dict):
+        """Create summary of variables in log dict.
+
+        In particular, the
+
+        Args:
+            log_dict (dict): _description_
+
+        Returns:
+            _type_: _description_
+        """
+
+        for key in self.cum_metrics.keys():
+            self.cum_metrics[key] += log_dict[key]
+            log_dict[key + "_cum"] = self.cum_metrics[key]
+            log_dict[key + "_mean"] = self.cum_metrics[key] / self.total_env_steps
+
+        return log_dict
diff --git a/docs/_static/beobench_config_v1.png b/docs/_static/beobench_config_v1.png
new file mode 100644
index 00000000..e789f321
Binary files /dev/null and b/docs/_static/beobench_config_v1.png differ
diff --git a/docs/_static/wandb_demo.png b/docs/_static/wandb_demo.png
deleted file mode 100644
index 394d6555..00000000
Binary files a/docs/_static/wandb_demo.png and /dev/null differ
diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst
deleted file mode 100644
index d5ced58c..00000000
--- a/docs/advanced_usage.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-==============
-Advanced usage
-==============
-
-Welcome to beobench's usage guides!
-
-.. toctree::
-   :maxdepth: 2
-
-   guides/visualisation
-   guides/intro_experiment
-   guides/add_env
-   guides/dev_env
diff --git a/docs/conf.py b/docs/conf.py
index 3cd6d35c..eff6e60e 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -38,6 +38,8 @@
     "sphinx.ext.viewcode",
     "sphinx.ext.autosummary",
     "sphinx_tabs.tabs",
+    "sphinx.ext.napoleon",
+    "sphinx_autodoc_typehints",
     # "myst_parser",
     # "m2r2",
 ]
diff --git a/docs/experiments.rst b/docs/experiments.rst
deleted file mode 100644
index 5066450b..00000000
--- a/docs/experiments.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-===========
-Experiments
-===========
-
-How to run experiments
-
-Create dev container with vscode
-
-Then outside of dev container, start dev container::
-
-    docker start <dev_container_name>
-
-And then::
-
-    docker exec <dev_container_name> nohup python -m beobench.experiment.scheduler --use-wandb --wandb-api-key=<your_api_key> &
\ No newline at end of file
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index fa868961..95413fed 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -5,7 +5,7 @@ Getting started
 ===============
 
 .. tip::
-  This section is (almost) identical to the quickstart section in the repository README. If you already read that one, you may want to skip to the :doc:`advanced usage guides<advanced_usage>`.
+  This section is (almost) identical to the quickstart section in the repository README. If you already read that one, you may want to skip to the :doc:`advanced usage guides<guides/index>`.
 
 .. Note that below we import multiple separate parts from the main repo readme. This separation is done to allow us to add rst elements inbetween that can't be parsed by GitHubs rst parser.
 
diff --git a/docs/guides/configuration.rst b/docs/guides/configuration.rst
new file mode 100644
index 00000000..a033198c
--- /dev/null
+++ b/docs/guides/configuration.rst
@@ -0,0 +1,32 @@
+YAML configuration
+==================
+
+Beobench uses `YAML files <https://en.wikipedia.org/wiki/YAML>`_ to define experiment configurations. The file below is the default configuration for every Beobench experiment. The comments above each setting describe its functionality.
+
+.. literalinclude:: ../../beobench/data/configs/default.yaml
+    :language: yaml
+
+
+Gym-specific configurations
+---------------------------
+
+Each gym framework integration has its own configuration setup. See below for the default environment configurations of the three supported frameworks.
+
+BOPTEST
+^^^^^^^
+
+.. literalinclude:: ../../beobench/data/configs/gym_boptest.yaml
+    :language: yaml
+
+
+Energym
+^^^^^^^
+
+.. literalinclude:: ../../beobench/data/configs/gym_energym.yaml
+    :language: YAML
+
+Sinergym
+^^^^^^^^
+
+.. literalinclude:: ../../beobench/data/configs/gym_sinergym.yaml
+    :language: yaml
\ No newline at end of file
diff --git a/docs/guides/index.rst b/docs/guides/index.rst
new file mode 100644
index 00000000..a3e5cc40
--- /dev/null
+++ b/docs/guides/index.rst
@@ -0,0 +1,14 @@
+======
+Guides
+======
+
+Welcome to beobench's usage guides!
+
+.. toctree::
+   :maxdepth: 2
+
+   configuration
+   add_env
+   installation_linux
+   tips
+   dev_env
\ No newline at end of file
diff --git a/docs/guides/intro_experiment.rst b/docs/guides/intro_experiment.rst
deleted file mode 100644
index 3ad6049c..00000000
--- a/docs/guides/intro_experiment.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-Creating new experiments
-------------------------
-
-Overview
-^^^^^^^^
-
-The diagram below gives an overview of how beobench experiments work. The ``beobench.experiment.scheduler.run()`` function builds and starts an *experiment container*. Within this container all experiments are being run using the *Ray RLlib* and *Ray Tune* libraries. Results are saved to a local folder (by default ``./beobench_results``), and optionally to Weights and Biases (wandb) as well.
-
-.. image:: ../_static/experiment_run_flow.png
-   :width: 450 px
-   :alt: experiment run diagram
-   :align: center
-
-Experiment configuration
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Beobench experiments are configured either using a *Python dictionary* or an equivalent *yaml file*. For example, the following ``config.yaml`` file configures an experiment that evaluates an RLlib-based *proximal policy optimisation* (PPO) agent on the ``MixedUseFanFCU-v0`` environment of Energym:
-
-
-.. literalinclude:: ../../beobench/data/configs/default.yaml
-    :language: yaml
-
-
-
-Given this configuration file ``config.yaml``, we can run the experiment using the following commands:
-
-.. include:: ../snippets/run_standard_experiment.rst
-
-Running experiment in background
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Once you're up and running with Beobench, you will likely eventually want to start running experiments in the background. One way of doing this on Linux is using ``nohup``:
-
-.. code-block:: console
-
-    nohup beobench run -c config.yaml &
-
-This will save the console output of your experiment to `nohup.out` in your current directory.
-
-If you have setup the Beobench development environment, you may want to start your experiments inside the devcontainer. You can do this using:
-
-.. code-block:: console
-
-    nohup docker exec <devcontainer_name> beobench run -c config.yaml &
-
diff --git a/docs/guides/quickstart.rst b/docs/guides/quickstart.rst
deleted file mode 100644
index 040935a9..00000000
--- a/docs/guides/quickstart.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-
-.. include:: ../../README.rst
-   :start-after: start-quickstart
-   :end-before: end-quickstart
\ No newline at end of file
diff --git a/docs/guides/tips.rst b/docs/guides/tips.rst
new file mode 100644
index 00000000..24857adb
--- /dev/null
+++ b/docs/guides/tips.rst
@@ -0,0 +1,20 @@
+Miscellaneous tips
+------------------
+
+Running experiment in background
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Once you're up and running with Beobench, you will likely eventually want to start running experiments in the background. One way of doing this on Linux is using ``nohup``:
+
+.. code-block:: console
+
+    nohup beobench run -c config.yaml &
+
+This will save the console output of your experiment to `nohup.out` in your current directory.
+
+If you have setup the Beobench development environment, you may want to start your experiments inside the devcontainer. You can do this using:
+
+.. code-block:: console
+
+    nohup docker exec <devcontainer_name> beobench run -c config.yaml &
+
diff --git a/docs/guides/visualisation.rst b/docs/guides/visualisation.rst
deleted file mode 100644
index 29259f6d..00000000
--- a/docs/guides/visualisation.rst
+++ /dev/null
@@ -1,56 +0,0 @@
-
-Visualisation
--------------
-
-Running a basic experiment
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Let's run your first experiment with beobench. After installing beobench using the :ref:`installation guide <sec-installation>`, you can simply use one of the following two commands to run a first experiment:
-
-.. tabs::
-
-    .. code-tab:: console Console
-
-            beobench run
-
-    .. code-tab:: python
-
-        import beobench
-
-        beobench.run()
-
-This will run the default experiment `defined here <https://github.com/rdnfn/beobench/blob/master/beobench/experiment/definitions/default.py>`_: applying the reinforcement learning method `Proximal Policy Optimisation (PPO) <https://arxiv.org/pdf/1707.06347.pdf>`_ to control a residential `HVAC system <https://en.wikipedia.org/wiki/Heating,_ventilation,_and_air_conditioning>`_ (defined in the `BOPTEST bestest_hydronic testcase <https://htmlpreview.github.io/?https://github.com/ibpsa/project1-boptest/blob/master/testcases/bestest_hydronic/doc/index.html>`_). The results of this experiment will be logged in a newly created ``./beobench_results`` directory.
-
-Visualize experiment results
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Although the ``./beobench_results`` folder already contains a lot of information, it can be more helpful to log our experiment data to a proper experiment tracking service. Beobench provides an integration with the `Weights and Biases (wandb) <wandb.com>`_ cloud-based tracking service for this purpose. To use wandb we add the following arguments to the previous command:
-
-.. tabs::
-
-    .. code-tab:: console Console
-
-            beobench run \
-                --wandb-project=<your_project_name> \
-                --wandb-entity=<your_entity> \
-                --wandb-api-key=<your_api_key>
-
-    .. code-tab:: python
-
-        import beobench
-
-        beobench.run(
-                wandb_project=<your_project_name>,
-                wandb_entity=<your_entity>,
-                wandb_api_key=<your_api_key>
-        )
-
-where you replace ``<your_project_name>``, ``<your_entity>`` and ``<your_api_key>`` with your own wandb information. See `this guide <https://docs.wandb.ai/quickstart>`_ to get started with wandb. With this setup you will now be able to see the experiment progress on your wandb dashboard, similar to the one shown below. You can `find a live dashboard of another run of this experiment here <https://wandb.ai/beobench/doc-test/runs/66299_00000>`_.
-
-.. image:: ../_static/wandb_demo.png
-   :width: 550 px
-   :alt: wandb website
-   :align: center
-   :target: `example wandb`_
-
-.. _example wandb: https://wandb.ai/beobench/doc-test/runs/66299_00000
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 15e9e5d8..503a1b72 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,5 +1,5 @@
 *************
-Beobench Docs
+Beobench docs
 *************
 
 .. include:: ../README.rst
@@ -16,8 +16,7 @@ Contents
    Home <self>
    getting_started
    envs
-   advanced_usage
-   api
+   guides/index
    contributing
    credits
    history
diff --git a/requirements/doc_requirements.txt b/requirements/doc_requirements.txt
index 4013f01d..c443cb46 100644
--- a/requirements/doc_requirements.txt
+++ b/requirements/doc_requirements.txt
@@ -14,4 +14,7 @@ docutils==0.16
 
 # Because of the following bug
 # https://github.com/readthedocs/readthedocs.org/issues/9038
-Jinja2<3.1
\ No newline at end of file
+Jinja2<3.1
+
+# For allowing type hints
+sphinx-autodoc-typehints
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 1c061b63..e3abeb36 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.4
+current_version = 0.5.0
 commit = True
 tag = True
 
@@ -11,6 +11,14 @@ replace = version = "{new_version}"
 search = __version__ = "{current_version}"
 replace = __version__ = "{new_version}"
 
+[bumpversion:file:CITATION.cff]
+search = version: {current_version}
+replace = version: {new_version}
+
+[bumpversion:file:beobench/data/configs/default.yaml]
+search = version: {current_version}
+replace = version: {new_version}
+
 [bdist_wheel]
 universal = 1
 
diff --git a/setup.py b/setup.py
index bdf90d5a..aca89ad4 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
 with open("HISTORY.rst", encoding="UTF-8") as history_file:
     history = history_file.read()
 
-version = "0.4.4"  # pylint: disable=invalid-name
+version = "0.5.0"  # pylint: disable=invalid-name
 
 requirements = [
     "docker",
@@ -36,7 +36,7 @@
     author="Beobench authors",
     python_requires=">=3.6",
     classifiers=[
-        "Development Status :: 2 - Pre-Alpha",
+        "Development Status :: 3 - Alpha",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: MIT License",
         "Natural Language :: English",
@@ -44,8 +44,9 @@
         "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
     ],
-    description="Beobench is a toolbox for benchmarking reinforcement learning (RL) algorithms on building energy optimisation (BEO) problems.",  # pylint: disable=line-too-long
+    description="Beobench is a toolkit providing easy and unified access to building control environments for reinforcement learning (RL).",  # pylint: disable=line-too-long
     install_requires=requirements,
     extras_require={
         "extended": extended_requirements,
diff --git a/tests/conftest.py b/tests/conftest.py
index 3c20e9be..e3f5d591 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,4 +9,4 @@
 
 @pytest.fixture
 def run_config():
-    return beobench.experiment.config_parser.get_standard_config("simple")
+    return beobench.experiment.config_parser.get_standard_config("test_energym")
diff --git a/tox.ini b/tox.ini
index a7466bac..b5ffcccc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,11 +1,12 @@
 [tox]
-envlist = py39, py38, py37, flake8
+envlist = py39, py38, py37, py36, flake8
 
 [travis]
 python =
     3.9: py39
     3.8: py38
     3.7: py37
+    3.6: py36
 
 [testenv:flake8]
 basepython = python