Merge pull request #16 from rte-france/bd-dev

update to version 0.4.1
Grid2op · Jun 16, 2020 · 4678182 · 4678182
2 parents 657ebc7 + 6cd8fa7
commit 4678182
Show file tree

Hide file tree

Showing 19 changed files with 271 additions and 138 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -4,6 +4,17 @@ Change Log
 --------
 - stack multiple states in `utils/DeepQAgent`
 
+[0.4.1] - 2020-06-16
+-----------------------
+- [FIXED] `Issue 14 <https://github.com/rte-france/l2rpn-baselines/issues/14>`_ clearer interface and get rid
+  of the "nb_env" in some baselines constructor. A helper function
+  `make_multi_env` has also been created to help the creation of the appropariate multi environment.
+- [FIXED] `Issue 13 <https://github.com/rte-france/l2rpn-baselines/issues/13>`_ the name have been properly updated
+- [FIXED] `Issue 12 <https://github.com/rte-france/l2rpn-baselines/issues/12>`_ the appropriate documentation for the
+  SAC baselines and all the kind
+- [FIXED] `Issue 9 <https://github.com/rte-france/l2rpn-baselines/issues/9>`_ no more hard coded global variables for
+  most of the baselines.
+
 [0.4.0] - 2020-06-xx
 --------------------
 - [ADDED] convenience way to modify the architecture of the neural networks

diff --git a/docs/DeepQSimple.rst b/docs/DeepQSimple.rst
@@ -6,6 +6,7 @@ Description
 This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing"
 baseline. Don't expect to obtain state of the art method with this simple method however.
 
+An example to train this model is available in the train function :ref:`Example-deepqsimple`
 
 Exported class
 --------------

diff --git a/docs/DuelQLeapNet.rst b/docs/DuelQLeapNet.rst
@@ -13,6 +13,8 @@ powerlines based on the injection and the topology.
 
 In this baseline, we use this very same architecture to model the Q function. The D3QN RL method is used.
 
+An example to train this model is available in the train function :ref:`Example-leapnet`.
+
 Exported class
 --------------
 You can use this class with:

diff --git a/docs/DuelQSimple.rst b/docs/DuelQSimple.rst
@@ -8,6 +8,7 @@ Description
 This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing"
 baseline. Don't expect to obtain state of the art method with this simple method however.
 
+An example to train this model is available in the train function :ref:`Example-duelqsimple`.
 
 Exported class
 --------------

diff --git a/docs/SAC.rst b/docs/SAC.rst
@@ -9,6 +9,7 @@ Description
 -----------
 This module proposes an implementation of the SAC algorithm.
 
+An example to train this model is available in the train function :ref:`Example-sac`.
 
 Exported class
 --------------

diff --git a/docs/conf.py b/docs/conf.py
@@ -22,7 +22,7 @@
 author = 'Benjamin DONNOT'
 
 # The full version, including alpha/beta/rc tags
-release = '0.4.0'
+release = '0.4.1'
 version = '0.4'
 
 # -- General configuration ---------------------------------------------------

diff --git a/l2rpn_baselines/DeepQSimple/train.py b/l2rpn_baselines/DeepQSimple/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           kwargs_converters={},
@@ -56,10 +55,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     training_param: :class:`l2rpn_baselines.utils.TrainingParam`
         The parameters describing the way you will train your model.
 
@@ -84,17 +79,21 @@ def train(env,
     baseline: :class:`DeepQSimple`
         The trained baseline.
 
+
+    .. _Example-deepqsimple:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+
+    Here is an example on how to train a DeepQSimple baseline.
 
     First define a python script, for example
 
     .. code-block:: python
 
         import grid2op
         from grid2op.Reward import L2RPNReward
-        from l2rpn_baselines.utils import TrainingParam
+        from l2rpn_baselines.utils import TrainingParam, NNParam
         from l2rpn_baselines.DeepQSimple import train
 
         # define the environment
@@ -111,7 +110,7 @@ def train(env,
                          "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
 
         # neural network architecture
-        observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X)
+        observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
         sizes = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
         kwargs_archi = {'observation_size': observation_size,
                         'sizes': sizes,
@@ -134,7 +133,6 @@ def train(env,
                   save_path="/WHERE/I/SAVED/THE/MODEL",
                   load_path=None,
                   logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -177,7 +175,6 @@ def train(env,
                            nn_archi=nn_archi,
                            name=name,
                            istraining=True,
-                           nb_env=nb_env,
                            verbose=verbose,
                             **kwargs_converters
                             )

diff --git a/l2rpn_baselines/DuelQLeapNet/train.py b/l2rpn_baselines/DuelQLeapNet/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           verbose=True,
@@ -56,10 +55,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     training_param: :class:`l2rpn_baselines.utils.TrainingParam`
         The parameters describing the way you will train your model.
 
@@ -84,9 +79,12 @@ def train(env,
     baseline: :class:`DuelQLeapNet`
         The trained baseline.
 
+
+    .. _Example-leapnet:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+    Here is an example on how to train a DuelQLeapNet baseline.
 
     First define a python script, for example
 
@@ -95,7 +93,7 @@ def train(env,
         import grid2op
         from grid2op.Reward import L2RPNReward
         from l2rpn_baselines.utils import TrainingParam
-        from l2rpn_baselines.DuelQLeapNet import train
+        from l2rpn_baselines.DuelQLeapNet import train, LeapNet_NNParam
 
         # define the environment
         env = grid2op.make("l2rpn_case14_sandbox",
@@ -141,14 +139,15 @@ def train(env,
                              }
         # define the name of the model
         nm_ = "AnneOnymous"
+        save_path = "/WHERE/I/SAVED/THE/MODEL"
+        logs_dir = "/WHERE/I/SAVED/THE/LOGS"
         try:
             train(env,
                   name=nm_,
                   iterations=10000,
-                  save_path="/WHERE/I/SAVED/THE/MODEL",
+                  save_path=save_path,
                   load_path=None,
-                  logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
+                  logs_dir=logs_dir,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -191,8 +190,8 @@ def train(env,
                             nn_archi=nn_archi,
                             name=name,
                             istraining=True,
-                            nb_env=nb_env,
                             filter_action_fun=filter_action_fun,
+                            verbose=verbose,
                             **kwargs_converters
                             )
 
@@ -343,7 +342,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
     # limit the number of time steps played per scenarios
     tp.step_increase_nb_iter = 100  # None to deactivate it
     tp.min_iter = 10
-    tp.update_nb_iter(100)  # once 100 scenarios are solved, increase of "step_increase_nb_iter"
+    tp.update_nb_iter = 100  # once 100 scenarios are solved, increase of "step_increase_nb_iter"
 
     # oversampling hard scenarios
     tp.oversampling_rate = 3  # None to deactivate it
@@ -374,9 +373,10 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
     # nn architecture
     li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                      "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
-                     "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho"]
-    li_attr_obs_Tau = ["rho", "line_status"]
-    sizes = [800, 800, 800, 494, 494, 494]
+                     "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho", "line_status"]
+    # li_attr_obs_Tau = ["rho", "line_status"]
+    li_attr_obs_Tau = []
+    sizes = [512, 512, 256, 256]
 
     x_dim = LeapNet_NNParam.get_obs_size(env_init, li_attr_obs_X)
     tau_dims = [LeapNet_NNParam.get_obs_size(env_init, [el]) for el in li_attr_obs_Tau]
@@ -402,7 +402,8 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
               nb_env=args.nb_env,
               training_param=tp,
               kwargs_converters=kwargs_converters,
-              kwargs_archi=kwargs_archi)
+              kwargs_archi=kwargs_archi,
+              verbose=True)
     finally:
         env.close()
         if args.nb_env > 1:

diff --git a/l2rpn_baselines/DuelQSimple/train.py b/l2rpn_baselines/DuelQSimple/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           verbose=True,
@@ -57,10 +56,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     verbose: ``bool``
         If you want something to be printed on the terminal (a better logging strategy will be put at some point)
 
@@ -85,17 +80,21 @@ def train(env,
     baseline: :class:`DeepQSimple`
         The trained baseline.
 
+
+    .. _Example-duelqsimple:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+
+    Here is an example on how to train a DuelQSimple baseline.
 
     First define a python script, for example
 
     .. code-block:: python
 
         import grid2op
         from grid2op.Reward import L2RPNReward
-        from l2rpn_baselines.utils import TrainingParam
+        from l2rpn_baselines.utils import TrainingParam, NNParam
         from l2rpn_baselines.DuelQSimple import train
 
         # define the environment
@@ -112,7 +111,7 @@ def train(env,
                          "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
 
         # neural network architecture
-        observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X)
+        observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
         sizes = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
         kwargs_archi = {'observation_size': observation_size,
                         'sizes': sizes,
@@ -135,7 +134,6 @@ def train(env,
                   save_path="/WHERE/I/SAVED/THE/MODEL",
                   load_path=None,
                   logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -178,7 +176,6 @@ def train(env,
                             nn_archi=nn_archi,
                             name=name,
                             istraining=True,
-                            nb_env=nb_env,
                             verbose=verbose,
                             **kwargs_converters
                             )

diff --git a/l2rpn_baselines/SAC/SAC_NN.py b/l2rpn_baselines/SAC/SAC_NN.py
@@ -39,12 +39,14 @@ class SAC_NN(BaseDeepQ):
     """
     def __init__(self,
                  nn_params,
-                 training_param=None):
+                 training_param=None,
+                 verbose=False):
         if training_param is None:
             training_param = TrainingParam()
         BaseDeepQ.__init__(self,
                            nn_params,
-                           training_param)
+                           training_param,
+                           verbose=verbose)
 
         # TODO add as meta param the number of "Q" you want to use (here 2)
         # TODO add as meta param size and types of the networks
@@ -248,7 +250,8 @@ def load_network(self, path, name=None, ext="h5"):
         self.model_Q = load_model('{}.{}'.format(path_modelQ, ext))
         self.model_Q2 = load_model('{}.{}'.format(path_modelQ2, ext))
         self.model_policy = load_model('{}.{}'.format(path_policy, ext))
-        print("Succesfully loaded network.")
+        if self.verbose:
+            print("Succesfully loaded network.")
 
     def target_train(self):
         """

diff --git a/l2rpn_baselines/SAC/train.py b/l2rpn_baselines/SAC/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           verbose=True,
@@ -56,10 +55,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     verbose: ``bool``
         If you want something to be printed on the terminal (a better logging strategy will be put at some point)
 
@@ -84,17 +79,20 @@ def train(env,
     baseline: :class:`DeepQSimple`
         The trained baseline.
 
+
+    .. _Example-sac:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+    Here is an example on how to train a SAC baseline.
 
     First define a python script, for example
 
     .. code-block:: python
 
         import grid2op
         from grid2op.Reward import L2RPNReward
-        from l2rpn_baselines.utils import TrainingParam
+        from l2rpn_baselines.utils import TrainingParam, NNParam
         from l2rpn_baselines.SAC import train
 
         # define the environment
@@ -111,7 +109,7 @@ def train(env,
                          "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
 
         # neural network architecture
-        observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X)
+        observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
         sizes_q = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
         sizes_v = [800, 800]  # sizes of each hidden layers
         sizes_pol = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
@@ -141,7 +139,6 @@ def train(env,
                   save_path="/WHERE/I/SAVED/THE/MODEL",
                   load_path=None,
                   logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -184,7 +181,6 @@ def train(env,
                    nn_archi=nn_archi,
                    name=name,
                    istraining=True,
-                   nb_env=nb_env,
                    verbose=verbose,
                    **kwargs_converters
                    )