diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/data/trajectory_data_0900.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/data/trajectory_data_0900.pkl new file mode 100644 index 0000000..9932909 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/data/trajectory_data_0900.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/data/trajectory_data_0925.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/data/trajectory_data_0925.pkl new file mode 100644 index 0000000..f80c94e Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/data/trajectory_data_0925.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0.pkl new file mode 100644 index 0000000..f908598 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_convergence.pdf b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_convergence.pdf new file mode 100644 index 0000000..b5f2979 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_convergence.pdf differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_episodes.pdf b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_episodes.pdf new file mode 100644 index 0000000..a1262d8 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_episodes.pdf differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_rewards.pdf b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_rewards.pdf new file mode 100644 index 0000000..50c875c Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/plot_data_0_rewards.pdf differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/checkpoint b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/checkpoint new file mode 100644 index 0000000..28b60a1 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/checkpoint @@ -0,0 +1,8 @@ +model_checkpoint_path: "ckpt-37" +all_model_checkpoint_paths: "ckpt-35" +all_model_checkpoint_paths: "ckpt-36" +all_model_checkpoint_paths: "ckpt-37" +all_model_checkpoint_timestamps: 1595297759.3219218 +all_model_checkpoint_timestamps: 1595297787.4387484 +all_model_checkpoint_timestamps: 1595297814.0655754 +last_preserved_timestamp: 1595296945.9954271 diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.data-00000-of-00002 new file mode 100644 index 0000000..edc613b Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.data-00001-of-00002 new file mode 100644 index 0000000..581e123 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.index new file mode 100644 index 0000000..1910e90 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-35.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.data-00000-of-00002 new file mode 100644 index 0000000..feeebea Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.data-00001-of-00002 new file mode 100644 index 0000000..d602bc7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.index new file mode 100644 index 0000000..eac3eb5 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-36.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.data-00000-of-00002 new file mode 100644 index 0000000..c2a87e9 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.data-00001-of-00002 new file mode 100644 index 0000000..689ec66 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.index new file mode 100644 index 0000000..7553c2d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_main_model_1/ckpt-37.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/checkpoint b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/checkpoint new file mode 100644 index 0000000..2835012 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/checkpoint @@ -0,0 +1,8 @@ +model_checkpoint_path: "ckpt-37" +all_model_checkpoint_paths: "ckpt-35" +all_model_checkpoint_paths: "ckpt-36" +all_model_checkpoint_paths: "ckpt-37" +all_model_checkpoint_timestamps: 1595297759.3306406 +all_model_checkpoint_timestamps: 1595297787.447458 +all_model_checkpoint_timestamps: 1595297814.07366 +last_preserved_timestamp: 1595296946.180695 diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.data-00000-of-00002 new file mode 100644 index 0000000..4051249 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.data-00001-of-00002 new file mode 100644 index 0000000..dfca244 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.index new file mode 100644 index 0000000..0999353 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-35.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.data-00000-of-00002 new file mode 100644 index 0000000..f620916 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.data-00001-of-00002 new file mode 100644 index 0000000..8b23df5 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.index new file mode 100644 index 0000000..70b3aa3 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-36.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.data-00000-of-00002 new file mode 100644 index 0000000..ca62281 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.data-00001-of-00002 new file mode 100644 index 0000000..43ce204 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.index new file mode 100644 index 0000000..3ff5a58 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11/q_target_model_1/ckpt-37.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/buffer_data_verification.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/buffer_data_verification.pkl new file mode 100644 index 0000000..3fa7b10 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/buffer_data_verification.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0025.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0025.pkl new file mode 100644 index 0000000..86248f7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0025.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0050.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0050.pkl new file mode 100644 index 0000000..a94011c Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0050.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0075.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0075.pkl new file mode 100644 index 0000000..5113f87 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0075.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0100.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0100.pkl new file mode 100644 index 0000000..b834eed Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0100.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0125.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0125.pkl new file mode 100644 index 0000000..310285c Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0125.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0150.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0150.pkl new file mode 100644 index 0000000..7b6d04b Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0150.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0175.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0175.pkl new file mode 100644 index 0000000..11a455d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0175.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0200.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0200.pkl new file mode 100644 index 0000000..6c8b448 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0200.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0225.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0225.pkl new file mode 100644 index 0000000..45abbdf Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0225.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0250.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0250.pkl new file mode 100644 index 0000000..20854aa Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0250.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0275.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0275.pkl new file mode 100644 index 0000000..143ab03 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0275.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0300.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0300.pkl new file mode 100644 index 0000000..a621e69 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0300.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0325.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0325.pkl new file mode 100644 index 0000000..52231b1 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0325.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0350.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0350.pkl new file mode 100644 index 0000000..2548ff7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0350.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0375.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0375.pkl new file mode 100644 index 0000000..3eaa525 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0375.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0400.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0400.pkl new file mode 100644 index 0000000..f1a56bc Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0400.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0425.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0425.pkl new file mode 100644 index 0000000..cc6452d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0425.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0450.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0450.pkl new file mode 100644 index 0000000..5d043b1 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0450.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0475.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0475.pkl new file mode 100644 index 0000000..d39062d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0475.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0500.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0500.pkl new file mode 100644 index 0000000..139fbfd Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0500.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0525.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0525.pkl new file mode 100644 index 0000000..f251a78 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0525.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0550.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0550.pkl new file mode 100644 index 0000000..acb20ba Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0550.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0575.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0575.pkl new file mode 100644 index 0000000..425fed4 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0575.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0600.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0600.pkl new file mode 100644 index 0000000..8eaea81 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0600.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0625.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0625.pkl new file mode 100644 index 0000000..42b8493 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0625.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0650.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0650.pkl new file mode 100644 index 0000000..81d7f18 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0650.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0675.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0675.pkl new file mode 100644 index 0000000..8bf1f5d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0675.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0700.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0700.pkl new file mode 100644 index 0000000..a1b5f82 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0700.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0725.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0725.pkl new file mode 100644 index 0000000..224b1a5 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0725.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0750.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0750.pkl new file mode 100644 index 0000000..71916e9 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0750.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0775.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0775.pkl new file mode 100644 index 0000000..9c3f585 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0775.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0800.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0800.pkl new file mode 100644 index 0000000..f448667 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0800.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0825.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0825.pkl new file mode 100644 index 0000000..c77c158 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0825.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0850.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0850.pkl new file mode 100644 index 0000000..1dd77dd Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0850.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0875.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0875.pkl new file mode 100644 index 0000000..0daadee Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0875.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0900.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0900.pkl new file mode 100644 index 0000000..37d57d5 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0900.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0925.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0925.pkl new file mode 100644 index 0000000..72f3378 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0925.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0950.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0950.pkl new file mode 100644 index 0000000..270bc16 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0950.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0975.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0975.pkl new file mode 100644 index 0000000..15470b7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_0975.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1000.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1000.pkl new file mode 100644 index 0000000..676e49b Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1000.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1025.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1025.pkl new file mode 100644 index 0000000..95ce54c Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1025.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1050.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1050.pkl new file mode 100644 index 0000000..a003f4f Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1050.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1075.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1075.pkl new file mode 100644 index 0000000..bcc2466 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1075.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1100.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1100.pkl new file mode 100644 index 0000000..69c84de Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1100.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1125.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1125.pkl new file mode 100644 index 0000000..9769d5f Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1125.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1150.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1150.pkl new file mode 100644 index 0000000..295a0d7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1150.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1175.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1175.pkl new file mode 100644 index 0000000..4f97fde Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1175.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1200.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1200.pkl new file mode 100644 index 0000000..ae35fa1 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/buffer_data_1200.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0025.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0025.pkl new file mode 100644 index 0000000..71e29fd Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0025.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0050.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0050.pkl new file mode 100644 index 0000000..ef2be6e Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0050.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0075.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0075.pkl new file mode 100644 index 0000000..184fabb Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0075.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0100.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0100.pkl new file mode 100644 index 0000000..1310495 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0100.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0125.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0125.pkl new file mode 100644 index 0000000..1ba4730 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0125.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0150.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0150.pkl new file mode 100644 index 0000000..4f53e50 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0150.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0175.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0175.pkl new file mode 100644 index 0000000..e0eb9b9 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0175.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0200.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0200.pkl new file mode 100644 index 0000000..d943501 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0200.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0225.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0225.pkl new file mode 100644 index 0000000..8e04b0b Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0225.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0250.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0250.pkl new file mode 100644 index 0000000..75e3960 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0250.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0275.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0275.pkl new file mode 100644 index 0000000..4ba4d59 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0275.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0300.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0300.pkl new file mode 100644 index 0000000..dc7f768 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0300.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0325.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0325.pkl new file mode 100644 index 0000000..e142414 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0325.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0350.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0350.pkl new file mode 100644 index 0000000..48752b6 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0350.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0375.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0375.pkl new file mode 100644 index 0000000..b42c2ef Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0375.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0400.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0400.pkl new file mode 100644 index 0000000..7d9c997 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0400.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0425.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0425.pkl new file mode 100644 index 0000000..585dfad Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0425.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0450.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0450.pkl new file mode 100644 index 0000000..6db5723 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0450.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0475.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0475.pkl new file mode 100644 index 0000000..22bcdc1 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0475.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0500.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0500.pkl new file mode 100644 index 0000000..801dbbf Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0500.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0525.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0525.pkl new file mode 100644 index 0000000..12149d4 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0525.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0550.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0550.pkl new file mode 100644 index 0000000..b8d0c24 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0550.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0575.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0575.pkl new file mode 100644 index 0000000..269abb9 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0575.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0600.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0600.pkl new file mode 100644 index 0000000..8743f5a Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0600.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0625.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0625.pkl new file mode 100644 index 0000000..4ed8207 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0625.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0650.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0650.pkl new file mode 100644 index 0000000..168e35b Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0650.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0675.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0675.pkl new file mode 100644 index 0000000..833a23a Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0675.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0700.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0700.pkl new file mode 100644 index 0000000..edef520 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0700.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0725.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0725.pkl new file mode 100644 index 0000000..1c169ea Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0725.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0750.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0750.pkl new file mode 100644 index 0000000..87e5f5c Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0750.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0775.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0775.pkl new file mode 100644 index 0000000..c86b167 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0775.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0800.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0800.pkl new file mode 100644 index 0000000..01296c4 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0800.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0825.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0825.pkl new file mode 100644 index 0000000..aeec3a1 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0825.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0850.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0850.pkl new file mode 100644 index 0000000..802fb8d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0850.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0875.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0875.pkl new file mode 100644 index 0000000..669f1fa Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0875.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0900.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0900.pkl new file mode 100644 index 0000000..4f25cfb Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0900.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0925.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0925.pkl new file mode 100644 index 0000000..0a5746a Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0925.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0950.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0950.pkl new file mode 100644 index 0000000..9840781 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0950.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0975.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0975.pkl new file mode 100644 index 0000000..b8c14c7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_0975.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1000.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1000.pkl new file mode 100644 index 0000000..e8ab2e0 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1000.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1025.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1025.pkl new file mode 100644 index 0000000..d106ec0 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1025.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1050.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1050.pkl new file mode 100644 index 0000000..a28c323 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1050.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1075.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1075.pkl new file mode 100644 index 0000000..9bd5549 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1075.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1100.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1100.pkl new file mode 100644 index 0000000..c962975 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1100.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1125.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1125.pkl new file mode 100644 index 0000000..ee97784 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1125.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1150.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1150.pkl new file mode 100644 index 0000000..ff591b6 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1150.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1175.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1175.pkl new file mode 100644 index 0000000..fb38132 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1175.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1200.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1200.pkl new file mode 100644 index 0000000..3620c4b Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/data/trajectory_data_1200.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0.pkl b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0.pkl new file mode 100644 index 0000000..b815209 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0.pkl differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_convergence.pdf b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_convergence.pdf new file mode 100644 index 0000000..2d7673a Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_convergence.pdf differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_episodes.pdf b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_episodes.pdf new file mode 100644 index 0000000..fbb5306 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_episodes.pdf differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_rewards.pdf b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_rewards.pdf new file mode 100644 index 0000000..4de9f43 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/plot_data_0_rewards.pdf differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/checkpoint b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/checkpoint new file mode 100644 index 0000000..e3d516b --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/checkpoint @@ -0,0 +1,8 @@ +model_checkpoint_path: "ckpt-48" +all_model_checkpoint_paths: "ckpt-46" +all_model_checkpoint_paths: "ckpt-47" +all_model_checkpoint_paths: "ckpt-48" +all_model_checkpoint_timestamps: 1595303226.1714215 +all_model_checkpoint_timestamps: 1595303251.4241862 +all_model_checkpoint_timestamps: 1595303277.4032362 +last_preserved_timestamp: 1595301951.7302732 diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.data-00000-of-00002 new file mode 100644 index 0000000..7374779 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.data-00001-of-00002 new file mode 100644 index 0000000..560ef50 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.index new file mode 100644 index 0000000..f0999b5 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-46.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.data-00000-of-00002 new file mode 100644 index 0000000..19ea3d8 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.data-00001-of-00002 new file mode 100644 index 0000000..b258ac8 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.index new file mode 100644 index 0000000..58f3955 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-47.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.data-00000-of-00002 new file mode 100644 index 0000000..b359470 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.data-00001-of-00002 new file mode 100644 index 0000000..43e1bd7 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.index new file mode 100644 index 0000000..d1ca574 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_main_model_1/ckpt-48.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/checkpoint b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/checkpoint new file mode 100644 index 0000000..aee6852 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/checkpoint @@ -0,0 +1,8 @@ +model_checkpoint_path: "ckpt-48" +all_model_checkpoint_paths: "ckpt-46" +all_model_checkpoint_paths: "ckpt-47" +all_model_checkpoint_paths: "ckpt-48" +all_model_checkpoint_timestamps: 1595303226.1793263 +all_model_checkpoint_timestamps: 1595303251.432288 +all_model_checkpoint_timestamps: 1595303277.4116447 +last_preserved_timestamp: 1595301951.8949115 diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.data-00000-of-00002 new file mode 100644 index 0000000..038d671 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.data-00001-of-00002 new file mode 100644 index 0000000..8d4fb09 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.index new file mode 100644 index 0000000..4648435 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-46.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.data-00000-of-00002 new file mode 100644 index 0000000..48ea330 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.data-00001-of-00002 new file mode 100644 index 0000000..9e47742 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.index new file mode 100644 index 0000000..6c033a4 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-47.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.data-00000-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.data-00000-of-00002 new file mode 100644 index 0000000..5248ee6 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.data-00000-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.data-00001-of-00002 b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.data-00001-of-00002 new file mode 100644 index 0000000..f7cf68d Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.data-00001-of-00002 differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.index b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.index new file mode 100644 index 0000000..638d942 Binary files /dev/null and b/Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis/q_target_model_1/ckpt-48.index differ diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/basic_test_script.py b/Data_Experiments/2020_07_20_NAF@FERMI/basic_test_script.py new file mode 100644 index 0000000..11f425d --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/basic_test_script.py @@ -0,0 +1,197 @@ +import pickle +import random +import random +import sys + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +from local_fel_simulated_env import FelLocalEnv +from pernaf.pernaf.naf import NAF +from pernaf.pernaf.utils.statistic import Statistic + +# from pendulum import PendulumEnv as simpleEnv +# set random seed +from simulated_tango import SimTangoConnection + +random_seed = 123 +# set random seed +tf.set_random_seed(random_seed) +np.random.seed(random_seed) +random.seed(random_seed) + +tango = SimTangoConnection() +env = FelLocalEnv(tango=tango) + +directory = "checkpoints/test_implementation/" + + +def plot_results(env, label): + # plotting + print('now plotting') + rewards = env.rewards + initial_states = env.initial_conditions + + iterations = [] + final_rews = [] + # starts = [] + sum_rews = [] + mean_rews = [] + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 0): + final_rews.append(rewards[i][len(rewards[i]) - 1]) + # starts.append(-np.sqrt(np.mean(np.square(initial_states[i])))) + iterations.append(len(rewards[i])) + sum_rews.append(np.sum(rewards[i])) + mean_rews.append(np.mean(rewards[i])) + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, constrained_layout=True) + + ax = axs[0] + ax.plot(iterations) + ax.set_title('Iterations' + plot_suffix) + fig.suptitle(label, fontsize=12) + + ax = axs[1] + ax.plot(final_rews, 'r--') + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + # ax1 = plt.twinx(ax) + # color = 'lime' + # ax1.set_ylabel('starts', color=color) # we already handled the x-label with ax1 + # ax1.tick_params(axis='y', labelcolor=color) + # ax1.plot(starts, color=color) + plt.savefig(label + '.pdf') + # fig.tight_layout() + plt.show() + + fig, axs = plt.subplots(1, 1) + axs.plot(sum_rews) + ax1 = plt.twinx(axs) + ax1.plot(mean_rews, c='lime') + plt.title(label) + plt.show() + + +def plot_convergence(agent, label): + losses, vs = agent.losses, agent.vs + fig, ax = plt.subplots() + ax.set_title(label) + ax.set_xlabel('# steps') + + color = 'tab:blue' + ax.semilogy(losses, color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('td_loss', color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(vs, color=color) + plt.savefig(label + 'convergence' + '.pdf') + plt.show() + + +if __name__ == '__main__': + try: + random_seed = int(sys.argv[2]) + except: + random_seed = 25 + try: + file_name = sys.argv[1] + '_' + str(random_seed) + except: + file_name = 'Data/17062020_' + str(random_seed) + '_' + + try: + root_dir = sys.argv[3] + except: + root_dir = "PAPER/tests/" + + try: + index = int(sys.argv[4]) + parameter_list = [ + dict(noise_function=lambda nr: max(0, 1 * (1 / (nr / 10 + 1))) if nr < 40 else 0), + dict(noise_function=lambda nr: max(0, 0.5 * (1 / (nr / 10 + 1))) if nr < 40 else 0), + dict(noise_function=lambda nr: max(0, (1 / (nr / 10 + 1))) ), + dict(noise_function=lambda nr: max(0, 0.5 * (1 / (nr / 10 + 1)))), + + ] + noise_info = parameter_list[index] + print(noise_info) + except: + noise_info = dict(noise_function=lambda nr: max(0, .1 * (1 / (nr / 10 + 1))) if nr < 5 else 0) + + batch_info = lambda nr: 15 + update_repeat = 20 + prio_info = dict(alpha=.1, beta_start=.9, beta_decay=lambda nr: max(1e-12, 1-nr/10)) + decay_info = lambda nr: max(0, 1-nr/10) + + + tf.set_random_seed(random_seed) + np.random.seed(random_seed) + directory = root_dir + file_name + '/' + + discount = 0.999 + batch_size = 10 + learning_rate = 1e-3 + max_steps = 10000 + + max_episodes = 10 + tau = 1 - 0.999 + + is_train = True + + is_continued = not (is_train) + + nafnet_kwargs = dict(hidden_sizes=[32, 32], activation=tf.nn.tanh + , weight_init=tf.random_uniform_initializer(-0.05, 0.05), + batch_info=batch_info, decay_info=decay_info) + # filename = 'Scan_data.obj' + + # filename = 'Scan_data.obj' + # filehandler = open(filename, 'rb') + # scan_data = pickle.load(filehandler) + + with tf.Session() as sess: + # statistics and running the agent + stat = Statistic(sess=sess, env_name=env.__name__, model_dir=directory, + max_update_per_step=update_repeat, is_continued=is_continued, save_frequency=100) + # init the agent + agent = NAF(sess=sess, env=env, stat=stat, discount=discount, batch_size=batch_size, + learning_rate=learning_rate, max_steps=max_steps, update_repeat=update_repeat, + max_episodes=max_episodes, tau=tau, pretune=None, prio_info=prio_info, + noise_info=noise_info, **nafnet_kwargs) + # run the agent + agent.run(is_train) + + # plot the results + label = file_name + plot_convergence(agent=agent, label=label) + plot_results(env, label) + + if is_train: + file_name += '_training' + + out_put_writer = open(root_dir + file_name + '.pkl', 'wb') + out_rewards = env.rewards + out_inits = env.initial_conditions + out_losses, out_vs = agent.losses, agent.vs + out_buffer = agent.replay_buffer + + pickle.dump(out_rewards, out_put_writer, -1) + pickle.dump(out_inits, out_put_writer, -1) + + pickle.dump(out_losses, out_put_writer, -1) + pickle.dump(out_vs, out_put_writer, -1) + pickle.dump(out_buffer, out_put_writer, -1) + out_put_writer.close() diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/laser_trajectory_control_env.py b/Data_Experiments/2020_07_20_NAF@FERMI/laser_trajectory_control_env.py new file mode 100644 index 0000000..6e33378 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/laser_trajectory_control_env.py @@ -0,0 +1,208 @@ +import numpy as np +import gym + +# from tango_connection import TangoConnection + +class LaserTrajectoryControlEnv(gym.Env): + + def __init__(self, tango, **kwargs): + self.init_rewards = [] + self.done = False + self.current_length = 0 + self.__name__ = 'LaserTrajectoryControlEnv' + + self.curr_episode = -1 + self.TOTAL_COUNTER = -1 + self.rewards = [] + self.initial_conditions = [] + + # + self.tango = tango + + # some information from tango + self.system = self.tango.system + + self.state_size = self.tango.state_size + self.action_size = self.tango.action_size + + self.init_state = self.tango.init_state + self.init_intensity = self.tango.init_intensity + + # scaling factor definition + if 'half_range' in kwargs: + self.half_range = kwargs.get('half_range') + else: + self.half_range = 3000 + if self.system == 'eos': + self.half_range = 30000 # 30000 + + self.state_range = self.get_range() + self.state_scale = 2 * self.half_range + + # target intensity + if 'target_intensity' in kwargs: + self.target_intensity = kwargs.get('target_intensity') + else: + self.target_intensity = self.init_intensity + + # state, intensity and reward definition + self.init_state_norm = self.scale(self.init_state) + self.init_intensity_norm = self.get_intensity() + self.state = self.init_state_norm.copy() + self.intensity = self.init_intensity_norm.copy() + self.reward = self.get_reward() + + ## max action allowed + if 'max_action' in kwargs: + max_action = kwargs.get('max_action') + else: + max_action = 500 + if self.system == 'eos': + max_action = 5000 # 2500 # 5000 + + self.max_action = max_action/self.state_scale + + # observation space definition + self.observation_space = gym.spaces.Box(low=0.0 + self.max_action, + high=1.0 - self.max_action, + shape=(self.state_size,), + dtype=np.float64) + + # action spacec definition + self.action_space = gym.spaces.Box(low=-self.max_action, + high=self.max_action, + shape=(self.action_size,), + dtype=np.float64) + + def get_range(self): + # define the available state space + state_range = np.c_[self.init_state - self.half_range, self.init_state + self.half_range] + return state_range + + def scale(self, state): + # scales the state from state_range values to [0, 1] + state_scaled = (state - self.state_range[:, 0]) / self.state_scale + return state_scaled + + def descale(self, state): + # descales the state from [0, 1] to state_range values + state_descaled = state * self.state_scale + self.state_range[:, 0] + return state_descaled + + def set_state(self, state): + # writes descaled state + state_descaled = self.descale(state) + self.tango.set_state(state_descaled) + + def get_state(self): + # read scaled state + state = self.tango.get_state() + state_scaled = self.scale(state) + return state_scaled + + def norm_intensity(self, intensity): + # normalize the intensity with respect to target_intensity + intensity_norm = intensity/self.target_intensity + return intensity_norm + + def get_intensity(self): + # read normalized intensity + intensity = self.tango.get_intensity() + intensity_norm = self.norm_intensity(intensity) + return intensity_norm + + def step(self, action): + # step method + self.current_length += 1 + state, reward = self.take_action(action) + + intensity = self.get_intensity() + if intensity > 0.95: + self.done = True + + elif self.current_length > 1000: + self.done = True + + print('step', self.current_length,'state ', state, 'a ', action, 'r ', reward) + self.rewards[self.curr_episode].append(reward) + + return state, reward, self.done, {} + + def take_action(self, action): + # take action method + new_state = self.state + action + + # state must remain in [0, 1] + if any(new_state < 0.0) or any(new_state > 1.0): + new_state = np.clip(new_state, 0.0, 1.0) + # print('WARNING: state boundaries!') + + # set new state to the machine + self.set_state(new_state) + state = self.get_state() + self.state = state + + # get new intensity from the machine + intensity = self.get_intensity() + self.intensity = intensity + + # reward calculation + reward = self.get_reward() + self.reward = reward + + return state, reward + + def get_reward(self): + # You can change reward function, but it should depend on intensity + # e.g. next line + # reward = -(1 - self.intensity / self.target_intensity) + reward = -(1 - self.intensity / 1.0) + + # reward = self.intensity + return reward + + def reset(self): + # reset method + self.done = False + self.current_length = 0 + + self.curr_episode += 1 + self.rewards.append([]) + + bad_init = True + while bad_init: + new_state = self.observation_space.sample() + + self.set_state(new_state) + state = self.get_state() + self.state = state + + intensity = self.get_intensity() + self.intensity = intensity + self.init_rewards.append(-(1 - self.intensity / 1.0)) + + bad_init = False + + return state + + def seed(self, seed=None): + # seed method + np.random.seed(seed) + + def render(self, mode='human'): + # render method + print('ERROR\nnot yet implemented!') + pass + + +if __name__ == '__main__': + + # fel + system = 'eos' + path = '/home/niky/FERMI/2020_07_20/configuration/' + conf_file = 'conf_'+system+'.json' + + filename = path+conf_file + tng = TangoConnection(conf_file=filename) + env = LaserTrajectoryControlEnv(tng) + diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/naf2.py b/Data_Experiments/2020_07_20_NAF@FERMI/naf2.py new file mode 100644 index 0000000..d586b9f --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/naf2.py @@ -0,0 +1,1007 @@ +import os +import pickle +import shutil +import time + +import tensorflow as tf +from tensorflow import keras + +tf.keras.backend.set_floatx('float64') +import numpy as np +from tqdm import tqdm + + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for NAF_debug agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float64) + self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float64) + self.acts_buf = np.zeros([size, act_dim], dtype=np.float64) + self.rews_buf = np.zeros(size, dtype=np.float64) + self.done_buf = np.zeros(size, dtype=np.float64) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs1_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.acts_buf[self.ptr] = act + self.rews_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + def sample_batch(self, batch_size=32): + if self.size < batch_size: + idxs = np.arange(self.size) + else: + idxs = np.random.randint(0, self.size, size=batch_size) + + return dict(obs1=self.obs1_buf[idxs], + obs2=self.obs2_buf[idxs], + acts=self.acts_buf[idxs], + rews=self.rews_buf[idxs], + done=self.done_buf[idxs]) + + def save_to_pkl(self, name, directory): + idxs = np.arange(self.size) + buffer_data = dict(obs1=self.obs1_buf[idxs], + obs2=self.obs2_buf[idxs], + acts=self.acts_buf[idxs], + rews=self.rews_buf[idxs], + done=self.done_buf[idxs]) + f = open(directory + name, "wb") + pickle.dump(buffer_data, f) + f.close() + + def read_from_pkl(self, name, directory): + with open(directory + name, 'rb') as f: + buffer_data = pickle.load(f) + obs1s, obs2s, acts, rews, dones = [buffer_data[key] for key in buffer_data] + for i in range(len(obs1s)): + self.store(obs1s[i], acts[i], rews[i], obs2s[i], dones[i]) + # print(self.size) + + +# class ReplayBufferPER(PrioritizedReplayBuffer): +# """ +# A simple FIFO experience replay buffer for NAF_debug agents. +# """ +# +# def __init__(self, obs_dim, act_dim, size, prio_info): +# self.alpha = prio_info.get('alpha') +# self.beta = prio_info.get('beta') +# super(ReplayBufferPER, self).__init__(size, self.alpha) +# self.ptr, self.size, self.max_size = 0, 0, size +# +# def store(self, obs, act, rew, next_obs, done): +# super(ReplayBufferPER, self).add(obs, act, rew, next_obs, done, 1) +# self.ptr = (self.ptr + 1) % self.max_size +# self.size = min(self.size + 1, self.max_size) +# +# def sample_normal(self, batch_size): +# if self.size < batch_size: +# batch_size = self.size +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample_normal_rand( +# batch_size) +# return dict(obs1=obs1, +# obs2=obs2, +# acts=acts, +# rews=rews, +# done=done), [weights, idxs] +# +# def sample_batch(self, batch_size=32, **kwargs): +# if 'beta' in kwargs: +# self.beta = kwargs.get('beta') +# if self.size < batch_size: +# batch_size = self.size +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample_normal_rand( +# batch_size) +# else: +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample(batch_size, +# self.beta) +# return dict(obs1=obs1, +# obs2=obs2, +# acts=acts, +# rews=rews, +# done=done), [weights, idxs] + + +def basic_loss_function(y_true, y_pred): + return tf.math.reduce_mean(y_true - y_pred) + + +# obs_dim = 2 +# act_dim = 2 +# action = tf.Variable(np.ones(act_dim), dtype=float) +hidden_sizes = (100, 100) + + +class QModel: + + def __init__(self, obs_box=2, act_box=2, **kwargs): + if 'directory' in kwargs: + self.directory = kwargs.get('directory') + + if 'save_frequency' in kwargs: + self.save_frequency = kwargs.get('save_frequency') + else: + self.save_frequency = 500 + + if 'hidden_sizes' in kwargs: + self.hidden_sizes = kwargs.get('hidden_sizes') + else: + self.hidden_sizes = (100, 100) + + if 'early_stopping' in kwargs: + self.callback = tf.keras.callbacks.EarlyStopping(monitor='mae', + patience=kwargs.get('early_stopping')) + else: + self.callback = tf.keras.callbacks.EarlyStopping(monitor='mae', patience=2) + + if 'name' in kwargs: + self.__name__ = kwargs.get('name') + print(self.__name__) + + if 'clipped_double_q' in kwargs: + self.clipped_double_q = kwargs.get('clipped_double_q') + else: + self.clipped_double_q = False + # print(self.__name__ ) + + self.init = True + + self.act_box = act_box + self.obs_box = obs_box + self.act_dim = act_box.shape[0] + self.obs_dim = obs_box.shape[0] + + # create a shared network for the variables + inputs_state = keras.Input(shape=(self.obs_dim,), name="state_input") + inputs_action = keras.Input(shape=(self.act_dim,), name="action_input") + + # h = inputs[:, 0:obs_dim] + h = self.normalize(inputs_state, box=self.obs_box) + for hidden_dim in hidden_sizes: + h = self.fc(h, hidden_dim) + V = self.fc(h, 1, name='V') + + l = self.fc(h, (self.act_dim * (self.act_dim + 1) / 2)) + mu = self.fc(h, self.act_dim, name='mu') + + # action = inputs[:, obs_dim:] + action = self.normalize(inputs_action, box=self.act_box) + # rescale action to tanh + + pivot = 0 + rows = [] + for idx in range(self.act_dim): + count = self.act_dim - idx + diag_elem = tf.exp(tf.slice(l, (0, pivot), (-1, 1))) + non_diag_elems = tf.slice(l, (0, pivot + 1), (-1, count - 1)) + row = tf.pad(tensor=tf.concat((diag_elem, non_diag_elems), 1), paddings=((0, 0), (idx, 0))) + rows.append(row) + pivot += count + L = tf.transpose(a=tf.stack(rows, axis=1), perm=(0, 2, 1)) + P = tf.matmul(L, tf.transpose(a=L, perm=(0, 2, 1))) + tmp = tf.expand_dims(action - mu, -1) + A = -tf.multiply(tf.matmul(tf.transpose(a=tmp, perm=[0, 2, 1]), + tf.matmul(P, tmp)), tf.constant(0.5, dtype=tf.float64)) + A = tf.reshape(A, [-1, 1]) + Q = tf.add(A, V) + + if 'learning_rate' in kwargs: + self.learning_rate = kwargs.get('learning_rate') + del kwargs['learning_rate'] + else: + self.learning_rate = 1e-3 + # print('learning rate', self.learning_rate ) + if 'directory' in kwargs: + self.directory = kwargs.get('directory') + else: + self.directory = None + + initial_learning_rate = 0.005 + lr_schedule = keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate, decay_steps=1000, decay_rate=0.99, staircase=True + ) + lr_schedule = self.learning_rate + self.optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) + + self.q_model = self.CustomModel(inputs=[inputs_state, inputs_action], outputs=Q, mother_class=self) + # self.q_model.compile(keras.optimizers.Adam(learning_rate=self.learning_rate), loss=MSE) + # optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate) + self.q_model.compile(optimizer=self.optimizer, loss="mse", metrics=["mae"]) + + # Action output + # self.model_get_action = keras.Model(inputs=self.q_model.layers[0].input, + # outputs=self.q_model.get_layer(name='mu').output) + self.model_get_action = keras.Model(inputs=[inputs_state, inputs_action], + outputs=self.q_model.get_layer(name='mu').output) + + # Value output + self.model_value_estimate = keras.Model(inputs=[inputs_state, inputs_action], + outputs=self.q_model.get_layer(name='V').output) + + self.storage_management() + # self.q_model.summary() + + def storage_management(self): + checkpoint_dir = self.directory + self.__name__ + "/" + self.ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.q_model) + self.manager = tf.train.CheckpointManager(self.ckpt, checkpoint_dir, max_to_keep=3) + self.ckpt.restore(self.manager.latest_checkpoint) + if self.manager.latest_checkpoint: + print("Restored from {}".format(self.manager.latest_checkpoint)) + else: + print("Initializing from scratch.") + + def normalize(self, input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + return tf.math.scalar_mul(tf.convert_to_tensor(2, dtype=tf.float64), + tf.math.add(tf.convert_to_tensor(-0.5, dtype=tf.float64), + tf.multiply(tf.math.add(input, -low), 1 / (high - low)))) + + def de_normalize(self, input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + input = tf.convert_to_tensor(input, dtype=tf.float64) + return tf.math.add( + tf.multiply(tf.math.add(tf.math.scalar_mul(tf.convert_to_tensor(0.5, dtype=tf.float64), input), + tf.convert_to_tensor(0.5, dtype=tf.float64)), + (high - low)), low) + + def fc(self, x, hidden_size, name=None): + layer = keras.layers.Dense(hidden_size, activation=tf.nn.tanh, + kernel_initializer=tf.compat.v1.random_uniform_initializer(-0.01, 0.01), + kernel_regularizer=None, + bias_initializer=tf.compat.v1.constant_initializer(0.0), name=name) + return layer(x) + + def get_action(self, state): + state = np.array([state], dtype='float64') + actions = tf.zeros(shape=(tf.shape(state)[0], self.act_dim), dtype=tf.float64) + return self.de_normalize(self.model_get_action.predict([state, actions]), self.act_box) + + def get_value_estimate(self, state): + actions = tf.zeros(shape=(tf.shape(state)[0], self.act_dim), dtype=tf.float64) + return self.model_value_estimate.predict([state, actions]) + + def set_polyak_weights(self, weights, polyak=0.999, **kwargs): + if 'name' in kwargs: + print(10 * ' updating:', kwargs.get('name')) + weights_old = self.get_weights() + weights_new = [polyak * weights_old[i] + (1 - polyak) * weights[i] for i in range(len(weights))] + self.q_model.set_weights(weights=weights_new) + + def get_weights(self): + return self.q_model.get_weights() + + def save_model(self, directory): + try: + self.q_model.save(filepath=directory, overwrite=True) + except: + print('Saving failed') + + # def train_model(self, batch_s, batch_a, batch_y, **kwargs): + # # batch_x = np.concatenate((batch_s, batch_a), axis=1) + # n_split = int(5 * len(batch_s) / 5) + # batch_s_train, batch_a_train, batch_y_train = batch_s[:n_split], batch_a[:n_split], batch_y[:n_split] + # batch_s_val, batch_a_val, batch_y_val = batch_s[n_split:], batch_a[n_split:], batch_y[n_split:] + # x_batch_train = tf.keras.layers.concatenate([batch_s_train, batch_a_train], + # axis=1, dtype=tf.float64) + # y_batch_train = tf.convert_to_tensor(batch_y_train, dtype=tf.float64) + # + # train_dataset = tf.data.Dataset.from_tensor_slices((x_batch_train, y_batch_train)) + # train_dataset = train_dataset.repeat(50).shuffle(buffer_size=1024, reshuffle_each_iteration=True).batch(15) + # + # # x_batch_train = tf.keras.layers.concatenate([batch_s_val, batch_a_val], + # # axis=1, dtype=tf.float64) + # # y_batch_train = tf.convert_to_tensor(batch_y_val, dtype=tf.float64) + # + # # val_dataset = tf.data.Dataset.from_tensor_slices((x_batch_train, y_batch_train)) + # # val_dataset = val_dataset.repeat(50).shuffle(buffer_size=1024, reshuffle_each_iteration=True).batch(10) + # + # # if x_batch_train.shape[0]<50 else 25 + # epochs = 6 + # self.callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2) + # batch_size = 10 # x_batch_train.shape[0] + # hist = self.q_model.fit(x_batch_train, y_batch_train, + # validation_split=0.1, + # steps_per_epoch=2, + # verbose=0, + # batch_size=batch_size, + # callbacks=[self.callback], + # shuffle=True, + # epochs=epochs, + # # validation_data=val_dataset, + # # validation_steps=3, + # **kwargs) + # return_value = hist.history['loss'] + # return return_value + + # def train_model(self, batch_s, batch_a, y_batch_train, **kwargs): + # # x_batch_train = np.concatenate((batch_s, batch_a), axis=1) + # x_batch_train = tf.keras.layers.concatenate([batch_s, batch_a], + # axis=1, dtype=tf.float64) + # y_batch_train = tf.convert_to_tensor(y_batch_train, dtype=tf.float64) + # + # return self.train_step(x_batch_train, y_batch_train, **kwargs) + + # @tf.function(experimental_relax_shapes=True) + + class CustomCallback(keras.callbacks.Callback): + + def __init__(self, patience=0): + # super(self.CustomCallback, self).__init__() + super().__init__() + self.patience = patience + # best_weights to store the weights at which the minimum loss occurs. + self.best_weights = None + + def on_train_begin(self, logs=None): + # The number of epoch it has waited when loss is no longer minimum. + self.wait = 0 + # The epoch the training stops at. + self.stopped_epoch = 0 + # Initialize the best as infinity. + self.best = np.Inf + + def on_epoch_end(self, epoch, logs=None): + current = logs.get("loss") + # if np.less(current, self.best): + # self.best = current + # self.wait = 0 + # # Record the best weights if current results is better (less). + # self.best_weights = self.model.get_weights() + # else: + # self.wait += 1 + # if self.wait >= self.patience: + # self.stopped_epoch = epoch + # self.model.stop_training = True + # # print("Restoring model weights from the end of the best epoch.") + # self.model.set_weights(self.best_weights) + self.q_target.set_polyak_weights(self.model.get_weights(), + polyak=0.9995) + # print('updating...', self.model.__name__) + + # def on_train_end(self, logs=None): + # if self.stopped_epoch > 0: + # print("Epoch %05d: early stopping" % (self.stopped_epoch + 1)) + # self.q_target.set_polyak_weights(self.model.get_weights(), + # polyak=0.999) + # print('end of training') + + # def on_train_batch_end(self, batch, logs=None): + # keys = list(logs.keys()) + # + # # self.q_target.set_polyak_weights(self.model.get_weights(), + # # polyak=0.999) + # # print('updated', self.q_target.__name__) + # print("...Training: end of batch {}; got log keys: {}".format(batch, keys)) + # # print(self.model.y_target) + + # def get_estimate(self, o2, d, r): + # self.discount = 0.999 + # v_1 = self.q_target_first.get_value_estimate(o2) + # v_2 = self.q_target_second.get_value_estimate(o2) + # v = tf.where(v_1 < v_2, v_1, v_2) + # return self.discount * tf.squeeze(v) * (1 - d) + r + + def train_model(self, **kwargs): + if 'polyak' in kwargs: + self.polyak = kwargs.get('polyak') + del kwargs['polyak'] + else: + self.polyak = 0.999 + if 'batch_size' in kwargs: + self.batch_size = kwargs.get('batch_size') + else: + self.batch_size = 100 + if 'epochs' not in kwargs: + # self.epochs = kwargs.get('epochs') + kwargs['epochs'] = 2 + if 'steps_per_epoch' not in kwargs: + # self.steps_per_epoch = kwargs.get('steps_per_epoch') + kwargs['steps_per_epoch'] = 10 + # else: + # self.steps_per_epoch = 10 + if 'discount' in kwargs: + self.polyak = kwargs.get('discount') + del kwargs['discount'] + else: + self.polyak = 0.999 + batch = self.replay_buffer.sample_batch(batch_size=1000000) + # batch, prios = self.replay_buffer.sample_batch(batch_size=batch_size) + # nr = self.replay_buffer.size + # + # beta = lambda nr: max(1e-16, 1 - nr / 1000) + # decay_function = lambda nr: max(0, 1 - nr / 1000) + # beta_decay = beta(nr) + # print(beta_decay) + # batch, priority_info = self.replay_buffer.sample_batch(batch_size=30, beta=beta_decay) + # sample_weights = priority_info[0].astype('float64') + # batch['sample_weights'] = sample_weights + # + batch['obs1'] = batch['obs1'].astype('float64') + batch['obs2'] = batch['obs2'].astype('float64') + batch['acts'] = batch['acts'].astype('float64') + batch['rews'] = batch['rews'].astype('float64') + batch['done'] = np.where(batch['done'], 1, 0).astype('float64') + + # batch['y_target'] = self.get_estimate(o2, d, r) + # batch['x_batch_train'] = x_batch_train + # print(batch) + dataset = tf.data.Dataset.from_tensor_slices(batch).repeat(200).shuffle(buffer_size=10000) + train_dataset = dataset.batch(self.batch_size) + # print([element['obs1'] for element in train_dataset.take(2)]) + + # val_dataset = dataset.take(10) + + # if False: + # # if self.replay_buffer.size % 50 == 0 or self.init: + # epochs = 50 + # dataset = tf.data.Dataset.from_tensor_slices(batch).shuffle(buffer_size=1024) + # train_dataset = dataset.batch(10) + # self.callback = self.CustomCallback(patience=0) + # self.callback.q_target = self.q_target_first + # + # # self.callback.q_model = self.q_model + # early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=1) + # + # hist = self.q_model.fit(train_dataset, + # # validation_split=0.1, + # verbose=1, + # # batch_size=batch_size, + # callbacks=[self.callback, early_stop], + # shuffle=True, + # epochs=epochs, + # # validation_data=val_dataset, + # # validation_steps=2, + # **kwargs) + # self.init = False + # else: + # # epochs = 2 + self.callback = self.CustomCallback(patience=0) + self.callback.q_target = self.q_target_first + # self.save_frequency = 5 + + # checkpoint_callback = [ + # keras.callbacks.ModelCheckpoint( + # # Path where to save the model + # filepath=self.directory+self.__name__ +"/mymodel.tf", + # save_weights_only=True, + # save_freq=self.save_frequency, + # # save_best_only=True, # Only save a model if `val_loss` has improved. + # # monitor="loss", + # verbose=1, + # ) + # ] + # self.callback.q_model = self.q_model + # early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=0) + # TODO: implement saving + hist = self.q_model.fit(train_dataset, + # sample_weights=sample_weights, + # validation_split=0.1, + # steps_per_epoch=self.steps_per_epoch, + verbose=0, + # batch_size=batch_size, + callbacks=[self.callback], # , checkpoint_callback], + shuffle=True, + # epochs=self.epochs, + # validation_data=val_dataset, + # validation_steps=2, + **kwargs) + # update the targets + # self.q_target_first.set_polyak_weights(self.q_model.get_weights(), + # polyak=0.999) + + # loss = model.train_model(o, a, target_y, sample_weight=sample_weights)[-1] + if int(self.ckpt.step) % self.save_frequency == 0: + save_path = self.manager.save() + print("Saved checkpoint for step {}: {}".format(int(self.ckpt.step), save_path)) + save_path_target = self.q_target_first.manager.save() + print("Saved checkpoint for step {}: {}".format(int(self.ckpt.step), save_path_target)) + self.ckpt.step.assign_add(1) + return_value = hist.history['loss'] + # decay = decay_function(nr) + # update_prios = (return_value[-1] * decay + 1e-16) * np.ones(priority_info[0].shape) + # self.replay_buffer.update_priorities(idxes=priority_info[1], priorities=update_prios) + + return return_value + + def set_models(self, q_target_1, q_target_2=None): + self.q_target_first = q_target_1 + if q_target_2 is not None: + self.q_target_second = q_target_2 + + class CustomModel(keras.Model): + + def __init__(self, *args, **kwargs): + self.mother_class = kwargs.get('mother_class') + self.__name__ = self.mother_class.__name__ + del kwargs['mother_class'] + super().__init__(*args, **kwargs) + if 'discount' in kwargs: + self.discount = tf.constant(kwargs.get('discount'), dtype=tf.float64) + del kwargs['discount'] + else: + self.discount = tf.constant(0.999, dtype=tf.float64) + + def train_step(self, batch): + o = batch['obs1'] + o2 = batch['obs2'] + a = batch['acts'] + r = batch['rews'] + d = batch['done'] + # target_y = self.mother_class.get_estimate(o2, d, r) + # y_target = batch['y_target'] + + # a_zero = tf.multiply(a, tf.constant(0, dtype=tf.float64)) + v_1 = self.mother_class.q_target_first.model_value_estimate([o2, a]) # , training=False) + if self.mother_class.clipped_double_q: + v_2 = self.mother_class.q_target_second.model_value_estimate([o2, a]) # , training=False) + v = tf.squeeze(tf.where(tf.math.less(v_1, v_2), v_1, v_2)) + # print('double', self.mother_class.__name__) + else: + v = tf.squeeze(v_1) + # print('single', self.mother_class.__name__) + y_target = tf.add(tf.multiply(tf.math.scalar_mul(self.discount, v), + tf.add(tf.constant(1, dtype=tf.float64), + tf.math.scalar_mul(-1, d))), r) + + # print('target', tf.reduce_mean(y_target)) + + # Iterate over the batches of the dataset. + # if 'sample_weight' in kwargs: + # sample_weight = kwargs.get('sample_weight') + with tf.GradientTape() as tape: + # Run the forward pass of the layer. + # The operations that the layer applies + # to its inputs are going to be recorded + # on the GradientTape. + y_pred = self([o, a], training=True) # Logits for this minibatch + # Compute the loss value for this minibatch. + loss = self.compiled_loss( + y_target, + y_pred, + # sample_weight=batch['sample_weights'] + ) + # Use the gradient tape to automatically retrieve + # the gradients of the trainable variables with respect to the loss. + # Compute gradients + trainable_vars = self.trainable_weights + gradients = tape.gradient(loss, trainable_vars) + # Run one step of gradient descent by updating + # the value of the variables to minimize the loss. + # Update weights + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + # Update the metrics. + # Metrics are configured in `compile()`. + self.compiled_metrics.update_state(y_target, y_pred) + + # self.mother_class.q_target.set_polyak_weights(self.mother_class.q_model.get_weights(), + # polyak=0.999) + return {m.name: m.result() for m in self.metrics} + + def create_buffers(self, per_flag, prio_info): + if not (per_flag): + self.replay_buffer = ReplayBuffer(obs_dim=self.obs_dim, act_dim=self.act_dim, size=int(1e6)) + + try: + files = [] + directory = self.directory + 'data/' + for f in os.listdir(directory): + if 'buffer_data' in f and 'pkl' in f: + files.append(f) + files.sort() + # print(files) + self.replay_buffer.read_from_pkl(name=files[-1], directory=directory) + print('Buffer data loaded for ' + self.__name__, files[-1]) + except: + print('Buffer data empty for ' + self.__name__, files) + + # else: + # self.replay_buffer = ReplayBufferPER(obs_dim=self.obs_dim, act_dim=self.act_dim, size=int(1e6), + # prio_info=prio_info) + + +class NAF(object): + def __init__(self, env, training_info=dict(), pretune=None, + noise_info=dict(), save_frequency=500, directory=None, is_continued=False, + clipped_double_q=2, q_smoothing=0.01, **nafnet_kwargs): + ''' + :param env: open gym environment to be solved + :param directory: directory were weigths are saved + :param stat: statistic class to handle tensorflow and statitics + :param discount: discount factor + :param batch_size: batch size for the training + :param learning_rate: learning rate + :param max_steps: maximal steps per episode + :param update_repeat: iteration per step of training + :param max_episodes: maximum number of episodes + :param polyak: polyac averaging + :param pretune: list of tuples of state action reward next state done + :param prio_info: parameters to handle the prioritizing of the buffer + :param nafnet_kwargs: keywords to handle the network and training + :param noise_info: dict with noise_function + :param clipped_double_q: use the clipped double q trick with switching all clipped_double_q steps + :param q_smoothing: add small noise on actions to smooth the training + ''' + self.rewards = [] + self.states = [] + self.actions = [] + self.dones = [] + + self.clipped_double_q = clipped_double_q + self.q_smoothing = q_smoothing + self.losses2 = [] + self.vs2 = [] + self.model_switch = 1 + + self.directory = directory + self.save_frequency = save_frequency + + self.losses = [] + self.pretune = pretune + # self.prio_info = prio_info + self.prio_info = dict() + self.per_flag = bool(self.prio_info) + # self.per_flag = False + print('PER is:', self.per_flag) + + self.env = env + + if 'noise_function' in noise_info: + self.noise_function = noise_info.get('noise_function') + else: + self.noise_function = lambda nr: 1 / (nr + 1) + + self.action_box = env.action_space + self.action_size = self.action_box.shape[0] + self.obs_box = env.observation_space + + self.max_steps = 1000 + # self.update_repeat = update_repeat + + self.idx_episode = None + self.vs = [] + if 'decay_function' in self.prio_info: + self.decay_function = self.prio_info.get('decay_function') + else: + if 'beta' in self.prio_info: + self.decay_function = lambda nr: self.prio_info.get('beta') + else: + self.decay_function = lambda nr: 1. + + if 'beta_decay' in self.prio_info: + self.beta_decay_function = self.prio_info.get('beta_decay') + # elif self.per_flag: + # self.beta_decay_function = lambda nr: max(1e-12, prio_info.get('beta_start') - nr / 100) + else: + self.beta_decay_function = lambda nr: 1 + + self.training_info = training_info + + if 'learning_rate' in training_info: + learning_rate = training_info.get('learning_rate') + del training_info['learning_rate'] + else: + learning_rate = 1e-3 + + if not is_continued: + # try: + # print(self.directory) + # self.q_target_model_1.q_model = tf.keras.models.load_model(filepath=self.directory) + # print('Successfully loaded', 10 * ' -') + # except: + # print('Failed to load', 10 * ' *') + # if not os.path.exists(self.directory): + # os.makedirs(self.directory) + # else: + # if not os.path.exists(self.directory): + # + # else: + shutil.rmtree(self.directory) + os.makedirs(self.directory) + os.makedirs(self.directory + "data/") + # for f in os.listdir(self.directory): + # print(f) + # print('Deleting: ', self.directory + f) + # os.remove(self.directory + f) + # time.sleep(.5) + else: + if not os.path.exists(self.directory): + print('Creating directory: ', self.directory) + os.makedirs(self.directory) + if not os.path.exists(self.directory + "data/"): + print('Creating directory: ', self.directory + "data/") + os.makedirs(self.directory + "data/") + + self.q_main_model_1 = QModel(obs_box=self.obs_box, act_box=self.action_box, learning_rate=learning_rate, + name='q_main_model_1', + directory=self.directory, + save_frequency=self.save_frequency, + clipped_double_q=self.clipped_double_q, + **nafnet_kwargs) + self.q_main_model_1.create_buffers(per_flag=self.per_flag, prio_info=self.prio_info) + + # self.current_model = self.q_main_model_1 + # Set same initial values in all networks + # self.q_main_model_2.q_model.set_weights(weights=self.q_main_model_1.q_model.get_weights()) + # Set same initial values in all networks + self.q_target_model_1 = QModel(obs_box=self.obs_box, act_box=self.action_box, + name='q_target_model_1', + directory=self.directory, + **nafnet_kwargs) + self.q_target_model_1.q_model.set_weights(weights=self.q_main_model_1.q_model.get_weights()) + + if self.clipped_double_q: + self.q_main_model_2 = QModel(obs_box=self.obs_box, act_box=self.action_box, learning_rate=learning_rate, + name='q_main_model_2', + directory=self.directory, + save_frequency=self.save_frequency, + clipped_double_q=self.clipped_double_q, + **nafnet_kwargs) + self.q_main_model_2.create_buffers(per_flag=self.per_flag, prio_info=self.prio_info) + self.q_target_model_2 = QModel(obs_box=self.obs_box, act_box=self.action_box, + name='q_target_model_2', + directory=self.directory, ) + self.q_target_model_2.q_model.set_weights(weights=self.q_main_model_2.q_model.get_weights()) + + # TODO: change to one network + # if self.clipped_double_q: + self.q_main_model_1.set_models(self.q_target_model_1, self.q_target_model_2) + self.q_main_model_2.set_models(self.q_target_model_2, self.q_target_model_1) + else: + self.q_main_model_1.set_models(self.q_target_model_1) + + + + self.counter = 0 + + def predict(self, model, state, is_train): + + if is_train and model.replay_buffer.size < self.warm_up_steps: + print(10 * 'inits ') + action = model.de_normalize(np.random.uniform(-1, 1, self.action_size), model.act_box) + # print(action) + return np.array(action) + elif is_train: + action = model.normalize(model.get_action(state=state), model.act_box) + noise = self.noise_function(self.idx_episode) * np.random.randn(self.action_size) + if self.q_smoothing is None: + return_value = model.de_normalize(np.clip(action + noise, -1, 1), model.act_box) + else: + return_value = model.de_normalize(np.clip(np.clip(action + + noise, -1, 1) + np.clip(self.q_smoothing * + np.random.randn( + self.action_size), + -self.q_smoothing, + self.q_smoothing), + -1, 1), + model.act_box) + return np.array(return_value) + else: + action = model.get_action(state=state) + return action + + def verification(self, **kwargs): + print('Verification phase') + if 'environment' in kwargs: + self.env = kwargs.get('environment') + if 'max_episodes' in kwargs: + self.max_episodes = kwargs.get('max_episodes') + if 'max_steps' in kwargs: + self.max_steps = kwargs.get('max_steps') + self.run(is_train=False) + self.q_main_model_1.replay_buffer.save_to_pkl(name='buffer_data_verification.pkl', directory=self.directory) + print('Saving verification buffer...') + + def training(self, **kwargs): + print('Training phase') + if 'warm_up_steps' in kwargs: + self.warm_up_steps = kwargs.get('warm_up_steps') + else: + self.warm_up_steps = 0 + + if 'initial_episode_length' in kwargs: + self.initial_episode_length = kwargs.get('initial_episode_length') + else: + self.initial_episode_length = 5 + if 'environment' in kwargs: + self.env = kwargs.get('environment') + if 'max_episodes' in kwargs: + self.max_episodes = kwargs.get('max_episodes') + if 'max_steps' in kwargs: + self.max_steps = kwargs.get('max_steps') + + self.run(is_train=True) + + def add_trajectory_data(self, state, action, reward, done): + index = self.idx_episode + self.rewards[index].append(reward) + self.actions[index].append(action) + self.states[index].append(state) + self.dones[index].append(done) + + def store_trajectories_to_pkl(self, name, directory): + out_put_writer = open(directory + name, 'wb') + pickle.dump(self.states, out_put_writer, -1) + pickle.dump(self.actions, out_put_writer, -1) + pickle.dump(self.rewards, out_put_writer, -1) + pickle.dump(self.dones, out_put_writer, -1) + out_put_writer.close() + + def init_trajectory_data(self, state): + self.rewards.append([]) + self.actions.append([]) + self.states.append([]) + self.dones.append([]) + self.add_trajectory_data(state=state, action=None, done=None, reward=None) + + def run(self, is_train=True): + for index in tqdm(range(0, self.max_episodes)): + self.idx_episode = index + # if self.clipped_double_q is not None: + # self.model_switcher(self.idx_episode) + + o = self.env.reset() + # For the trajectory storage + self.init_trajectory_data(state=o) + + for t in range(0, self.max_steps): + # 1. predict + a = np.squeeze(self.predict(self.q_main_model_1, o, is_train)) + o2, r, d, _ = self.env.step(a) + self.add_trajectory_data(state=o2, action=a, done=d, reward=r) + if is_train: + self.q_main_model_1.replay_buffer.store(o, a, r, o2, d) + if self.clipped_double_q: + self.q_main_model_2.replay_buffer.store(o, a, r, o2, d) + o = o2 + d = False if t == self.max_steps - 1 else d + + if t % self.initial_episode_length == 0 and self.q_main_model_1.replay_buffer.size <= self.warm_up_steps: + o = self.env.reset() + self.init_trajectory_data(state=o) + print('Initial reset at ', t) + + # 2. train + if is_train and self.q_main_model_1.replay_buffer.size > self.warm_up_steps: + # try: + self.update_q(self.q_main_model_1) + if self.clipped_double_q: + self.update_q(self.q_main_model_2) + if d: + break + + def train_model(self, model): + # beta_decay = self.beta_decay_function(self.idx_episode) + # decay = self.decay_function(self.idx_episode) + + # if self.per_flag: + # if True: # model is self.q_main_model_1: + # batch, priority_info = model.replay_buffer.sample_batch(batch_size=self.batch_size, beta=beta_decay) + # else: + # batch, priority_info = model.replay_buffer.sample_normal(batch_size=self.batch_size) + # else: + batch = model.replay_buffer.sample_batch(200) + + # o = batch['obs1'] + o2 = batch['obs2'] + a = batch['acts'] + # r = batch['rews'] + # d = batch['done'] + # + # v = self.get_v(o2) + # target_y = self.discount * np.squeeze(v) * (1 - d) + r + # input = tf.keras.layers.concatenate([o2, a], axis=1, dtype=tf.float64) + v = self.q_target_model_1.model_value_estimate([o2, a]) + # v_2 = self.q_target_model_2.model_value_estimate([o2, a]) + # print(tf.reduce_mean(v)) + # print(tf.reduce_mean(v_2)) + + # if self.per_flag: + # if True: # model is self.q_main_model_1: + # sample_weights = tf.convert_to_tensor(priority_info[0], dtype=tf.float64) + # loss = model.train_model(o, a, target_y, sample_weight=sample_weights)[-1] + # update_prios = (loss * decay + 1e-16) * np.ones(priority_info[0].shape) + # model.replay_buffer.update_priorities(idxes=priority_info[1], priorities=update_prios) + # else: + # loss = model.train_model_1(10000)[-1] + # else: + # loss = model.train_model(o, a, target_y)[-1] + loss = model.train_model(**self.training_info)[-1] + + # model.set_polyak_weights(self.q_main_model_1.get_weights(), polyak=self.polyak) + # if self.clipped_double_q is not None: + # model.set_polyak_weights(self.q_main_model_2.get_weights(), polyak=self.polyak) + + return v, loss + + # def model_switcher(self, number): + # if number % 1 == 0: + # self.model_switch = 2 if self.model_switch == 1 else 1 + # if self.model_switch == 1: + # self.current_model = self.q_main_model_1 + # else: + # self.current_model = self.q_main_model_2 + + def update_q(self, model): + vs = [] + losses = [] + self.counter += 1 + + # for i in range(self.update_repeat): + # print('i', i, model) + v, loss = self.train_model(model=model) + if model == self.q_main_model_1: + vs.append(v) + losses.append(loss) + + if (self.counter) % self.save_frequency == 0: + # self.q_target_model_1.save_model(directory=self.directory) + number = str(self.counter).zfill(4) + self.q_main_model_1.replay_buffer.save_to_pkl(name=f'buffer_data_'+number+'.pkl', + directory=self.directory + "data/") + self.store_trajectories_to_pkl(name=f'trajectory_data_'+number+'.pkl', + directory=self.directory + "data/") + print('Saving buffer...') + # if model == self.q_main_model_1: + self.vs.append(np.mean(vs)) + self.losses.append(np.mean(losses)) + + # def get_v(self, o2): + # v_1 = self.q_target_model_1.get_value_estimate(o2) + # if self.clipped_double_q is not None: + # v_2 = self.q_target_model_2.get_value_estimate(o2) + # v = np.where(v_1 < v_2, v_1, v_2) + # + # # print('vs: ', np.mean(o2), np.mean(v_1), np.mean(v_2)) + # else: + # v = v_1 + # return v + + +if __name__ == '__main__': + print('start') + # test_state = np.random.random((1, 2)) + # + # q_main_model = QModel(2, 2) + # q_target_model = QModel(2, 2) + # + # print('main', q_main_model.get_action(test_state)) + # print('main', q_main_model.get_value_estimate(test_state)) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # q_target_model.set_weights(q_main_model.get_weights()) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # batch_x = np.random.random((5, 4)) + # batch_y = np.random.random((5, 4)) + # hist = q_main_model.q_model.fit(batch_x, batch_y) + # print(hist.history['loss']) + # + # print('main', q_main_model.get_action(test_state)) + # print('main', q_main_model.get_value_estimate(test_state)) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # + # q_target_model.set_weights(q_main_model.get_weights()) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # weights = (q_target_model.get_weights()) + # keras.utils.plot_model(model, 'my_first_model.png') + # keras.utils.plot_model(model_get_action, 'model_get_action.png') diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/naf2_notsoold.py b/Data_Experiments/2020_07_20_NAF@FERMI/naf2_notsoold.py new file mode 100644 index 0000000..0a04fa7 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/naf2_notsoold.py @@ -0,0 +1,965 @@ +import os +import pickle +import time + +import tensorflow as tf +from tensorflow import keras + +tf.keras.backend.set_floatx('float64') +import numpy as np +from tqdm import tqdm + + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for NAF_debug agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float64) + self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float64) + self.acts_buf = np.zeros([size, act_dim], dtype=np.float64) + self.rews_buf = np.zeros(size, dtype=np.float64) + self.done_buf = np.zeros(size, dtype=np.float64) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs1_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.acts_buf[self.ptr] = act + self.rews_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + def sample_batch(self, batch_size=32): + if self.size < batch_size: + idxs = np.arange(self.size) + else: + idxs = np.random.randint(0, self.size, size=batch_size) + + return dict(obs1=self.obs1_buf[idxs], + obs2=self.obs2_buf[idxs], + acts=self.acts_buf[idxs], + rews=self.rews_buf[idxs], + done=self.done_buf[idxs]) + + def save_to_pkl(self, name, directory): + idxs = np.arange(self.size) + buffer_data = dict(obs1=self.obs1_buf[idxs], + obs2=self.obs2_buf[idxs], + acts=self.acts_buf[idxs], + rews=self.rews_buf[idxs], + done=self.done_buf[idxs]) + f = open(directory + name, "wb") + pickle.dump(buffer_data, f) + f.close() + + def read_from_pkl(self, name, directory): + with open(directory + name, 'rb') as f: + buffer_data = pickle.load(f) + obs1s,obs2s,acts,rews,dones = [buffer_data[key] for key in buffer_data] + for i in range(len(obs1s)): + self.store(obs1s[i], acts[i], rews[i], obs2s[i], dones[i]) + # print(self.size) + + + +# class ReplayBufferPER(PrioritizedReplayBuffer): +# """ +# A simple FIFO experience replay buffer for NAF_debug agents. +# """ +# +# def __init__(self, obs_dim, act_dim, size, prio_info): +# self.alpha = prio_info.get('alpha') +# self.beta = prio_info.get('beta') +# super(ReplayBufferPER, self).__init__(size, self.alpha) +# self.ptr, self.size, self.max_size = 0, 0, size +# +# def store(self, obs, act, rew, next_obs, done): +# super(ReplayBufferPER, self).add(obs, act, rew, next_obs, done, 1) +# self.ptr = (self.ptr + 1) % self.max_size +# self.size = min(self.size + 1, self.max_size) +# +# def sample_normal(self, batch_size): +# if self.size < batch_size: +# batch_size = self.size +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample_normal_rand( +# batch_size) +# return dict(obs1=obs1, +# obs2=obs2, +# acts=acts, +# rews=rews, +# done=done), [weights, idxs] +# +# def sample_batch(self, batch_size=32, **kwargs): +# if 'beta' in kwargs: +# self.beta = kwargs.get('beta') +# if self.size < batch_size: +# batch_size = self.size +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample_normal_rand( +# batch_size) +# else: +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample(batch_size, +# self.beta) +# return dict(obs1=obs1, +# obs2=obs2, +# acts=acts, +# rews=rews, +# done=done), [weights, idxs] + + +def basic_loss_function(y_true, y_pred): + return tf.math.reduce_mean(y_true - y_pred) + + +# obs_dim = 2 +# act_dim = 2 +# action = tf.Variable(np.ones(act_dim), dtype=float) +hidden_sizes = (100, 100) + + +class QModel: + + def __init__(self, obs_box=2, act_box=2, **kwargs): + if 'directory' in kwargs: + self.directory = kwargs.get('directory') + + if 'save_frequency' in kwargs: + self.save_frequency = kwargs.get('save_frequency') + else: + self.save_frequency = 500 + + if 'hidden_sizes' in kwargs: + self.hidden_sizes = kwargs.get('hidden_sizes') + else: + self.hidden_sizes = (100, 100) + + if 'early_stopping' in kwargs: + self.callback = tf.keras.callbacks.EarlyStopping(monitor='mae', + patience=kwargs.get('early_stopping')) + else: + self.callback = tf.keras.callbacks.EarlyStopping(monitor='mae', patience=2) + + if 'name' in kwargs: + self.__name__ = kwargs.get('name') + print(self.__name__) + + if 'clipped_double_q' in kwargs: + self.clipped_double_q = kwargs.get('clipped_double_q') + else: + self.clipped_double_q = False + # print(self.__name__ ) + + self.init = True + + self.act_box = act_box + self.obs_box = obs_box + self.act_dim = act_box.shape[0] + self.obs_dim = obs_box.shape[0] + + # create a shared network for the variables + inputs_state = keras.Input(shape=(self.obs_dim,), name="state_input") + inputs_action = keras.Input(shape=(self.act_dim,), name="action_input") + + # h = inputs[:, 0:obs_dim] + h = self.normalize(inputs_state, box=self.obs_box) + for hidden_dim in hidden_sizes: + h = self.fc(h, hidden_dim) + V = self.fc(h, 1, name='V') + + l = self.fc(h, (self.act_dim * (self.act_dim + 1) / 2)) + mu = self.fc(h, self.act_dim, name='mu') + + # action = inputs[:, obs_dim:] + action = self.normalize(inputs_action, box=self.act_box) + # rescale action to tanh + + pivot = 0 + rows = [] + for idx in range(self.act_dim): + count = self.act_dim - idx + diag_elem = tf.exp(tf.slice(l, (0, pivot), (-1, 1))) + non_diag_elems = tf.slice(l, (0, pivot + 1), (-1, count - 1)) + row = tf.pad(tensor=tf.concat((diag_elem, non_diag_elems), 1), paddings=((0, 0), (idx, 0))) + rows.append(row) + pivot += count + L = tf.transpose(a=tf.stack(rows, axis=1), perm=(0, 2, 1)) + P = tf.matmul(L, tf.transpose(a=L, perm=(0, 2, 1))) + tmp = tf.expand_dims(action - mu, -1) + A = -tf.multiply(tf.matmul(tf.transpose(a=tmp, perm=[0, 2, 1]), + tf.matmul(P, tmp)), tf.constant(0.5, dtype=tf.float64)) + A = tf.reshape(A, [-1, 1]) + Q = tf.add(A, V) + + if 'learning_rate' in kwargs: + self.learning_rate = kwargs.get('learning_rate') + del kwargs['learning_rate'] + else: + self.learning_rate = 1e-3 + # print('learning rate', self.learning_rate ) + if 'directory' in kwargs: + self.directory = kwargs.get('directory') + else: + self.directory = None + + initial_learning_rate = 0.005 + lr_schedule = keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate, decay_steps=1000, decay_rate=0.99, staircase=True + ) + lr_schedule = self.learning_rate + self.optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) + + self.q_model = self.CustomModel(inputs=[inputs_state, inputs_action], outputs=Q, mother_class=self) + # self.q_model.compile(keras.optimizers.Adam(learning_rate=self.learning_rate), loss=MSE) + # optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate) + self.q_model.compile(optimizer=self.optimizer, loss="mse", metrics=["mae"]) + + # Action output + # self.model_get_action = keras.Model(inputs=self.q_model.layers[0].input, + # outputs=self.q_model.get_layer(name='mu').output) + self.model_get_action = keras.Model(inputs=[inputs_state, inputs_action], + outputs=self.q_model.get_layer(name='mu').output) + + # Value output + self.model_value_estimate = keras.Model(inputs=[inputs_state, inputs_action], + outputs=self.q_model.get_layer(name='V').output) + + self.storage_management() + # self.q_model.summary() + + def storage_management(self): + checkpoint_dir = self.directory + self.__name__ + "/" + self.ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.q_model) + self.manager = tf.train.CheckpointManager(self.ckpt, checkpoint_dir, max_to_keep=3) + self.ckpt.restore(self.manager.latest_checkpoint) + if self.manager.latest_checkpoint: + print("Restored from {}".format(self.manager.latest_checkpoint)) + else: + print("Initializing from scratch.") + + def normalize(self, input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + return tf.math.scalar_mul(tf.convert_to_tensor(2, dtype=tf.float64), + tf.math.add(tf.convert_to_tensor(-0.5, dtype=tf.float64), + tf.multiply(tf.math.add(input, -low), 1 / (high - low)))) + + def de_normalize(self, input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + input = tf.convert_to_tensor(input, dtype=tf.float64) + return tf.math.add( + tf.multiply(tf.math.add(tf.math.scalar_mul(tf.convert_to_tensor(0.5, dtype=tf.float64), input), + tf.convert_to_tensor(0.5, dtype=tf.float64)), + (high - low)), low) + + def fc(self, x, hidden_size, name=None): + layer = keras.layers.Dense(hidden_size, activation=tf.nn.tanh, + kernel_initializer=tf.compat.v1.random_uniform_initializer(-0.01, 0.01), + kernel_regularizer=None, + bias_initializer=tf.compat.v1.constant_initializer(0.0), name=name) + return layer(x) + + def get_action(self, state): + state = np.array([state], dtype='float64') + actions = tf.zeros(shape=(tf.shape(state)[0], self.act_dim), dtype=tf.float64) + return self.de_normalize(self.model_get_action.predict([state, actions]), self.act_box) + + def get_value_estimate(self, state): + actions = tf.zeros(shape=(tf.shape(state)[0], self.act_dim), dtype=tf.float64) + return self.model_value_estimate.predict([state, actions]) + + def set_polyak_weights(self, weights, polyak=0.999, **kwargs): + if 'name' in kwargs: + print(10 * ' updating:', kwargs.get('name')) + weights_old = self.get_weights() + weights_new = [polyak * weights_old[i] + (1 - polyak) * weights[i] for i in range(len(weights))] + self.q_model.set_weights(weights=weights_new) + + def get_weights(self): + return self.q_model.get_weights() + + def save_model(self, directory): + try: + self.q_model.save(filepath=directory, overwrite=True) + except: + print('Saving failed') + + # def train_model(self, batch_s, batch_a, batch_y, **kwargs): + # # batch_x = np.concatenate((batch_s, batch_a), axis=1) + # n_split = int(5 * len(batch_s) / 5) + # batch_s_train, batch_a_train, batch_y_train = batch_s[:n_split], batch_a[:n_split], batch_y[:n_split] + # batch_s_val, batch_a_val, batch_y_val = batch_s[n_split:], batch_a[n_split:], batch_y[n_split:] + # x_batch_train = tf.keras.layers.concatenate([batch_s_train, batch_a_train], + # axis=1, dtype=tf.float64) + # y_batch_train = tf.convert_to_tensor(batch_y_train, dtype=tf.float64) + # + # train_dataset = tf.data.Dataset.from_tensor_slices((x_batch_train, y_batch_train)) + # train_dataset = train_dataset.repeat(50).shuffle(buffer_size=1024, reshuffle_each_iteration=True).batch(15) + # + # # x_batch_train = tf.keras.layers.concatenate([batch_s_val, batch_a_val], + # # axis=1, dtype=tf.float64) + # # y_batch_train = tf.convert_to_tensor(batch_y_val, dtype=tf.float64) + # + # # val_dataset = tf.data.Dataset.from_tensor_slices((x_batch_train, y_batch_train)) + # # val_dataset = val_dataset.repeat(50).shuffle(buffer_size=1024, reshuffle_each_iteration=True).batch(10) + # + # # if x_batch_train.shape[0]<50 else 25 + # epochs = 6 + # self.callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2) + # batch_size = 10 # x_batch_train.shape[0] + # hist = self.q_model.fit(x_batch_train, y_batch_train, + # validation_split=0.1, + # steps_per_epoch=2, + # verbose=0, + # batch_size=batch_size, + # callbacks=[self.callback], + # shuffle=True, + # epochs=epochs, + # # validation_data=val_dataset, + # # validation_steps=3, + # **kwargs) + # return_value = hist.history['loss'] + # return return_value + + # def train_model(self, batch_s, batch_a, y_batch_train, **kwargs): + # # x_batch_train = np.concatenate((batch_s, batch_a), axis=1) + # x_batch_train = tf.keras.layers.concatenate([batch_s, batch_a], + # axis=1, dtype=tf.float64) + # y_batch_train = tf.convert_to_tensor(y_batch_train, dtype=tf.float64) + # + # return self.train_step(x_batch_train, y_batch_train, **kwargs) + + # @tf.function(experimental_relax_shapes=True) + + class CustomCallback(keras.callbacks.Callback): + + def __init__(self, patience=0): + # super(self.CustomCallback, self).__init__() + super().__init__() + self.patience = patience + # best_weights to store the weights at which the minimum loss occurs. + self.best_weights = None + + def on_train_begin(self, logs=None): + # The number of epoch it has waited when loss is no longer minimum. + self.wait = 0 + # The epoch the training stops at. + self.stopped_epoch = 0 + # Initialize the best as infinity. + self.best = np.Inf + + def on_epoch_end(self, epoch, logs=None): + current = logs.get("loss") + # if np.less(current, self.best): + # self.best = current + # self.wait = 0 + # # Record the best weights if current results is better (less). + # self.best_weights = self.model.get_weights() + # else: + # self.wait += 1 + # if self.wait >= self.patience: + # self.stopped_epoch = epoch + # self.model.stop_training = True + # # print("Restoring model weights from the end of the best epoch.") + # self.model.set_weights(self.best_weights) + self.q_target.set_polyak_weights(self.model.get_weights(), + polyak=0.9995) + # print('updating...', self.model.__name__) + + # def on_train_end(self, logs=None): + # if self.stopped_epoch > 0: + # print("Epoch %05d: early stopping" % (self.stopped_epoch + 1)) + # self.q_target.set_polyak_weights(self.model.get_weights(), + # polyak=0.999) + # print('end of training') + + # def on_train_batch_end(self, batch, logs=None): + # keys = list(logs.keys()) + # + # # self.q_target.set_polyak_weights(self.model.get_weights(), + # # polyak=0.999) + # # print('updated', self.q_target.__name__) + # print("...Training: end of batch {}; got log keys: {}".format(batch, keys)) + # # print(self.model.y_target) + + # def get_estimate(self, o2, d, r): + # self.discount = 0.999 + # v_1 = self.q_target_first.get_value_estimate(o2) + # v_2 = self.q_target_second.get_value_estimate(o2) + # v = tf.where(v_1 < v_2, v_1, v_2) + # return self.discount * tf.squeeze(v) * (1 - d) + r + + def train_model(self, **kwargs): + if 'polyak' in kwargs: + self.polyak = kwargs.get('polyak') + del kwargs['polyak'] + else: + self.polyak = 0.999 + if 'batch_size' in kwargs: + self.batch_size = kwargs.get('batch_size') + else: + self.batch_size = 100 + if 'epochs' not in kwargs: + # self.epochs = kwargs.get('epochs') + kwargs['epochs'] = 2 + if 'steps_per_epoch' not in kwargs: + # self.steps_per_epoch = kwargs.get('steps_per_epoch') + kwargs['steps_per_epoch'] = 10 + # else: + # self.steps_per_epoch = 10 + if 'discount' in kwargs: + self.polyak = kwargs.get('discount') + del kwargs['discount'] + else: + self.polyak = 0.999 + batch = self.replay_buffer.sample_batch(batch_size=1000000) + # batch, prios = self.replay_buffer.sample_batch(batch_size=batch_size) + # nr = self.replay_buffer.size + # + # beta = lambda nr: max(1e-16, 1 - nr / 1000) + # decay_function = lambda nr: max(0, 1 - nr / 1000) + # beta_decay = beta(nr) + # print(beta_decay) + # batch, priority_info = self.replay_buffer.sample_batch(batch_size=30, beta=beta_decay) + # sample_weights = priority_info[0].astype('float64') + # batch['sample_weights'] = sample_weights + # + batch['obs1'] = batch['obs1'].astype('float64') + batch['obs2'] = batch['obs2'].astype('float64') + batch['acts'] = batch['acts'].astype('float64') + batch['rews'] = batch['rews'].astype('float64') + batch['done'] = np.where(batch['done'], 1, 0).astype('float64') + + # batch['y_target'] = self.get_estimate(o2, d, r) + # batch['x_batch_train'] = x_batch_train + # print(batch) + dataset = tf.data.Dataset.from_tensor_slices(batch).repeat(200).shuffle(buffer_size=10000) + train_dataset = dataset.batch(self.batch_size) + # print([element['obs1'] for element in train_dataset.take(2)]) + + # val_dataset = dataset.take(10) + + # if False: + # # if self.replay_buffer.size % 50 == 0 or self.init: + # epochs = 50 + # dataset = tf.data.Dataset.from_tensor_slices(batch).shuffle(buffer_size=1024) + # train_dataset = dataset.batch(10) + # self.callback = self.CustomCallback(patience=0) + # self.callback.q_target = self.q_target_first + # + # # self.callback.q_model = self.q_model + # early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=1) + # + # hist = self.q_model.fit(train_dataset, + # # validation_split=0.1, + # verbose=1, + # # batch_size=batch_size, + # callbacks=[self.callback, early_stop], + # shuffle=True, + # epochs=epochs, + # # validation_data=val_dataset, + # # validation_steps=2, + # **kwargs) + # self.init = False + # else: + # # epochs = 2 + self.callback = self.CustomCallback(patience=0) + self.callback.q_target = self.q_target_first + # self.save_frequency = 5 + + # checkpoint_callback = [ + # keras.callbacks.ModelCheckpoint( + # # Path where to save the model + # filepath=self.directory+self.__name__ +"/mymodel.tf", + # save_weights_only=True, + # save_freq=self.save_frequency, + # # save_best_only=True, # Only save a model if `val_loss` has improved. + # # monitor="loss", + # verbose=1, + # ) + # ] + # self.callback.q_model = self.q_model + # early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=0) + # TODO: implement saving + hist = self.q_model.fit(train_dataset, + # sample_weights=sample_weights, + # validation_split=0.1, + # steps_per_epoch=self.steps_per_epoch, + verbose=0, + # batch_size=batch_size, + callbacks=[self.callback],# , checkpoint_callback], + shuffle=True, + # epochs=self.epochs, + # validation_data=val_dataset, + # validation_steps=2, + **kwargs) + # update the targets + # self.q_target_first.set_polyak_weights(self.q_model.get_weights(), + # polyak=0.999) + + # loss = model.train_model(o, a, target_y, sample_weight=sample_weights)[-1] + if int(self.ckpt.step) % self.save_frequency == 0: + save_path = self.manager.save() + print("Saved checkpoint for step {}: {}".format(int(self.ckpt.step), save_path)) + save_path_target = self.q_target_first.manager.save() + print("Saved checkpoint for step {}: {}".format(int(self.ckpt.step), save_path_target)) + self.ckpt.step.assign_add(1) + return_value = hist.history['loss'] + # decay = decay_function(nr) + # update_prios = (return_value[-1] * decay + 1e-16) * np.ones(priority_info[0].shape) + # self.replay_buffer.update_priorities(idxes=priority_info[1], priorities=update_prios) + + return return_value + + def set_models(self, q_target_1, q_target_2=None): + self.q_target_first = q_target_1 + if q_target_2 is not None: + self.q_target_second = q_target_2 + + class CustomModel(keras.Model): + + def __init__(self, *args, **kwargs): + self.mother_class = kwargs.get('mother_class') + self.__name__ = self.mother_class.__name__ + del kwargs['mother_class'] + super().__init__(*args, **kwargs) + if 'discount' in kwargs: + self.discount = tf.constant(kwargs.get('discount'), dtype=tf.float64) + del kwargs['discount'] + else: + self.discount = tf.constant(0.999, dtype=tf.float64) + + def train_step(self, batch): + o = batch['obs1'] + o2 = batch['obs2'] + a = batch['acts'] + r = batch['rews'] + d = batch['done'] + # target_y = self.mother_class.get_estimate(o2, d, r) + # y_target = batch['y_target'] + + # a_zero = tf.multiply(a, tf.constant(0, dtype=tf.float64)) + v_1 = self.mother_class.q_target_first.model_value_estimate([o2, a]) # , training=False) + if self.mother_class.clipped_double_q: + v_2 = self.mother_class.q_target_second.model_value_estimate([o2, a]) # , training=False) + v = tf.squeeze(tf.where(tf.math.less(v_1, v_2), v_1, v_2)) + # print('double', self.mother_class.__name__) + else: + v = tf.squeeze(v_1) + # print('single', self.mother_class.__name__) + y_target = tf.add(tf.multiply(tf.math.scalar_mul(self.discount, v), + tf.add(tf.constant(1, dtype=tf.float64), + tf.math.scalar_mul(-1, d))), r) + + # print('target', tf.reduce_mean(y_target)) + + # Iterate over the batches of the dataset. + # if 'sample_weight' in kwargs: + # sample_weight = kwargs.get('sample_weight') + with tf.GradientTape() as tape: + # Run the forward pass of the layer. + # The operations that the layer applies + # to its inputs are going to be recorded + # on the GradientTape. + y_pred = self([o, a], training=True) # Logits for this minibatch + # Compute the loss value for this minibatch. + loss = self.compiled_loss( + y_target, + y_pred, + # sample_weight=batch['sample_weights'] + ) + # Use the gradient tape to automatically retrieve + # the gradients of the trainable variables with respect to the loss. + # Compute gradients + trainable_vars = self.trainable_weights + gradients = tape.gradient(loss, trainable_vars) + # Run one step of gradient descent by updating + # the value of the variables to minimize the loss. + # Update weights + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + # Update the metrics. + # Metrics are configured in `compile()`. + self.compiled_metrics.update_state(y_target, y_pred) + + # self.mother_class.q_target.set_polyak_weights(self.mother_class.q_model.get_weights(), + # polyak=0.999) + return {m.name: m.result() for m in self.metrics} + + def create_buffers(self, per_flag, prio_info): + if not (per_flag): + self.replay_buffer = ReplayBuffer(obs_dim=self.obs_dim, act_dim=self.act_dim, size=int(1e6)) + + try: + self.replay_buffer.read_from_pkl(name='buffer_data.pkl', directory=self.directory) + print('Buffer data loaded for ' + self.__name__) + except: + print('Buffer data empty for ' + self.__name__) + + # else: + # self.replay_buffer = ReplayBufferPER(obs_dim=self.obs_dim, act_dim=self.act_dim, size=int(1e6), + # prio_info=prio_info) + + +class NAF(object): + def __init__(self, env, training_info=dict(), pretune=None, + noise_info=dict(), save_frequency=500, directory=None, is_continued=False, + clipped_double_q=2, q_smoothing=0.01, **nafnet_kwargs): + ''' + :param env: open gym environment to be solved + :param directory: directory were weigths are saved + :param stat: statistic class to handle tensorflow and statitics + :param discount: discount factor + :param batch_size: batch size for the training + :param learning_rate: learning rate + :param max_steps: maximal steps per episode + :param update_repeat: iteration per step of training + :param max_episodes: maximum number of episodes + :param polyak: polyac averaging + :param pretune: list of tuples of state action reward next state done + :param prio_info: parameters to handle the prioritizing of the buffer + :param nafnet_kwargs: keywords to handle the network and training + :param noise_info: dict with noise_function + :param clipped_double_q: use the clipped double q trick with switching all clipped_double_q steps + :param q_smoothing: add small noise on actions to smooth the training + ''' + self.clipped_double_q = clipped_double_q + self.q_smoothing = q_smoothing + self.losses2 = [] + self.vs2 = [] + self.model_switch = 1 + + self.directory = directory + self.save_frequency = save_frequency + + self.losses = [] + self.pretune = pretune + # self.prio_info = prio_info + self.prio_info = dict() + self.per_flag = bool(self.prio_info) + # self.per_flag = False + print('PER is:', self.per_flag) + + self.env = env + + if 'noise_function' in noise_info: + self.noise_function = noise_info.get('noise_function') + else: + self.noise_function = lambda nr: 1 / (nr + 1) + + + self.action_box = env.action_space + self.action_size = self.action_box.shape[0] + self.obs_box = env.observation_space + + self.max_steps = 1000 + # self.update_repeat = update_repeat + + self.idx_episode = None + self.vs = [] + if 'decay_function' in self.prio_info: + self.decay_function = self.prio_info.get('decay_function') + else: + if 'beta' in self.prio_info: + self.decay_function = lambda nr: self.prio_info.get('beta') + else: + self.decay_function = lambda nr: 1. + + if 'beta_decay' in self.prio_info: + self.beta_decay_function = self.prio_info.get('beta_decay') + # elif self.per_flag: + # self.beta_decay_function = lambda nr: max(1e-12, prio_info.get('beta_start') - nr / 100) + else: + self.beta_decay_function = lambda nr: 1 + + self.training_info = training_info + + if 'learning_rate' in training_info: + learning_rate = training_info.get('learning_rate') + del training_info['learning_rate'] + else: + learning_rate = 1e-3 + + self.q_main_model_1 = QModel(obs_box=self.obs_box, act_box=self.action_box, learning_rate=learning_rate, + name='q_main_model_1', + directory=self.directory, + save_frequency=self.save_frequency, + clipped_double_q=self.clipped_double_q, + **nafnet_kwargs) + self.q_main_model_1.create_buffers(per_flag=self.per_flag, prio_info=self.prio_info) + + # self.current_model = self.q_main_model_1 + # Set same initial values in all networks + # self.q_main_model_2.q_model.set_weights(weights=self.q_main_model_1.q_model.get_weights()) + # Set same initial values in all networks + self.q_target_model_1 = QModel(obs_box=self.obs_box, act_box=self.action_box, + name='q_target_model_1', + directory=self.directory, + **nafnet_kwargs) + self.q_target_model_1.q_model.set_weights(weights=self.q_main_model_1.q_model.get_weights()) + + if self.clipped_double_q: + self.q_main_model_2 = QModel(obs_box=self.obs_box, act_box=self.action_box, learning_rate=learning_rate, + name='q_main_model_2', + directory=self.directory, + save_frequency=self.save_frequency, + clipped_double_q=self.clipped_double_q, + **nafnet_kwargs) + self.q_main_model_2.create_buffers(per_flag=self.per_flag, prio_info=self.prio_info) + self.q_target_model_2 = QModel(obs_box=self.obs_box, act_box=self.action_box, + name='q_target_model_2', + directory=self.directory,) + self.q_target_model_2.q_model.set_weights(weights=self.q_main_model_2.q_model.get_weights()) + + # TODO: change to one network + # if self.clipped_double_q: + self.q_main_model_1.set_models(self.q_target_model_1, self.q_target_model_2) + self.q_main_model_2.set_models(self.q_target_model_2, self.q_target_model_1) + else: + self.q_main_model_1.set_models(self.q_target_model_1) + + if not(is_continued): + # try: + # print(self.directory) + # self.q_target_model_1.q_model = tf.keras.models.load_model(filepath=self.directory) + # print('Successfully loaded', 10 * ' -') + # except: + # print('Failed to load', 10 * ' *') + # if not os.path.exists(self.directory): + # os.makedirs(self.directory) + # else: + if not os.path.exists(self.directory): + os.makedirs(self.directory) + elif not (self.directory): + for f in os.listdir(self.directory): + print('Deleting: ', self.directory + '/' + f) + os.remove(self.directory + '/' + f) + time.sleep(.5) + else: + if not os.path.exists(self.directory): + print('Creating directory: ', self.directory) + os.makedirs(self.directory) + + self.counter = 0 + + def predict(self, model, state, is_train): + + if is_train and model.replay_buffer.size < self.warm_up_steps: + print(10 * 'inits ') + action = model.de_normalize(np.random.uniform(-1, 1, self.action_size), model.act_box) + # print(action) + return np.array(action) + elif is_train: + action = model.normalize(model.get_action(state=state), model.act_box) + noise = self.noise_function(self.idx_episode) * np.random.randn(self.action_size) + if self.q_smoothing is None: + return_value = model.de_normalize(np.clip(action + noise, -1, 1), model.act_box) + else: + return_value = model.de_normalize(np.clip(np.clip(action + + noise, -1, 1) + np.clip(self.q_smoothing * + np.random.randn( + self.action_size), + -self.q_smoothing, + self.q_smoothing), + -1, 1), + model.act_box) + return np.array(return_value) + else: + action = model.get_action(state=state) + return action + + def verification(self, **kwargs): + print('Verification phase') + if 'environment' in kwargs: + self.env = kwargs.get('environment') + if 'max_episodes' in kwargs: + self.max_episodes = kwargs.get('max_episodes') + if 'max_steps' in kwargs: + self.max_steps = kwargs.get('max_steps') + self.run(is_train=False) + + + def training(self, **kwargs): + print('Training phase') + if 'warm_up_steps' in kwargs: + self.warm_up_steps = kwargs.get('warm_up_steps') + else: + self.warm_up_steps = 0 + + if 'initial_episode_length' in kwargs: + self.initial_episode_length = kwargs.get('initial_episode_length') + else: + self.initial_episode_length = 5 + if 'environment' in kwargs: + self.env = kwargs.get('environment') + if 'max_episodes' in kwargs: + self.max_episodes = kwargs.get('max_episodes') + if 'max_steps' in kwargs: + self.max_steps = kwargs.get('max_steps') + + self.run(is_train=True) + + def run(self, is_train=True): + for index in tqdm(range(0, self.max_episodes)): + self.idx_episode = index + # if self.clipped_double_q is not None: + # self.model_switcher(self.idx_episode) + + o = self.env.reset() + # if self.training_stop is not None: + # is_train = False if self.training_stop < self.idx_episode else True + # print("starting the tests") + for t in range(0, self.max_steps): + # 1. predict + # if self.q_smoothing is not None: + # a = np.clip(self.predict(self.q_main_model_1, o, is_train)[0] + + # np.clip(self.q_smoothing * np.random.randn(self.action_size), + # -self.q_smoothing, self.q_smoothing), -1, 1) + # else: + a = np.squeeze(self.predict(self.q_main_model_1, o, is_train)) + o2, r, d, _ = self.env.step(a) + if is_train: + self.q_main_model_1.replay_buffer.store(o, a, r, o2, d) + if self.clipped_double_q: + self.q_main_model_2.replay_buffer.store(o, a, r, o2, d) + o = o2 + d = False if t == self.max_steps - 1 else d + + if t % self.initial_episode_length == 0 and self.q_main_model_1.replay_buffer.size <= self.warm_up_steps: + o = self.env.reset() + print('Initial reset at ', t) + + # 2. train + if is_train and self.q_main_model_1.replay_buffer.size > self.warm_up_steps: + # try: + self.update_q(self.q_main_model_1) + if self.clipped_double_q: + self.update_q(self.q_main_model_2) + # except: + # print('wait:', self.q_main_model_1.replay_buffer.size) + if d: + break + + def train_model(self, model): + # beta_decay = self.beta_decay_function(self.idx_episode) + # decay = self.decay_function(self.idx_episode) + + # if self.per_flag: + # if True: # model is self.q_main_model_1: + # batch, priority_info = model.replay_buffer.sample_batch(batch_size=self.batch_size, beta=beta_decay) + # else: + # batch, priority_info = model.replay_buffer.sample_normal(batch_size=self.batch_size) + # else: + batch = model.replay_buffer.sample_batch(200) + + # o = batch['obs1'] + o2 = batch['obs2'] + a = batch['acts'] + # r = batch['rews'] + # d = batch['done'] + # + # v = self.get_v(o2) + # target_y = self.discount * np.squeeze(v) * (1 - d) + r + # input = tf.keras.layers.concatenate([o2, a], axis=1, dtype=tf.float64) + v = self.q_target_model_1.model_value_estimate([o2, a]) + # v_2 = self.q_target_model_2.model_value_estimate([o2, a]) + # print(tf.reduce_mean(v)) + # print(tf.reduce_mean(v_2)) + + # if self.per_flag: + # if True: # model is self.q_main_model_1: + # sample_weights = tf.convert_to_tensor(priority_info[0], dtype=tf.float64) + # loss = model.train_model(o, a, target_y, sample_weight=sample_weights)[-1] + # update_prios = (loss * decay + 1e-16) * np.ones(priority_info[0].shape) + # model.replay_buffer.update_priorities(idxes=priority_info[1], priorities=update_prios) + # else: + # loss = model.train_model_1(10000)[-1] + # else: + # loss = model.train_model(o, a, target_y)[-1] + loss = model.train_model(**self.training_info)[-1] + + # model.set_polyak_weights(self.q_main_model_1.get_weights(), polyak=self.polyak) + # if self.clipped_double_q is not None: + # model.set_polyak_weights(self.q_main_model_2.get_weights(), polyak=self.polyak) + + return v, loss + + # def model_switcher(self, number): + # if number % 1 == 0: + # self.model_switch = 2 if self.model_switch == 1 else 1 + # if self.model_switch == 1: + # self.current_model = self.q_main_model_1 + # else: + # self.current_model = self.q_main_model_2 + + def update_q(self, model): + vs = [] + losses = [] + self.counter += 1 + + # for i in range(self.update_repeat): + # print('i', i, model) + v, loss = self.train_model(model=model) + if model == self.q_main_model_1: + vs.append(v) + losses.append(loss) + + if (self.counter) % self.save_frequency == 0: + # self.q_target_model_1.save_model(directory=self.directory) + self.q_main_model_1.replay_buffer.save_to_pkl(name='buffer_data.pkl', directory=self.directory) + print('Saving buffer...') + # if model == self.q_main_model_1: + self.vs.append(np.mean(vs)) + self.losses.append(np.mean(losses)) + + # def get_v(self, o2): + # v_1 = self.q_target_model_1.get_value_estimate(o2) + # if self.clipped_double_q is not None: + # v_2 = self.q_target_model_2.get_value_estimate(o2) + # v = np.where(v_1 < v_2, v_1, v_2) + # + # # print('vs: ', np.mean(o2), np.mean(v_1), np.mean(v_2)) + # else: + # v = v_1 + # return v + + +if __name__ == '__main__': + print('start') + # test_state = np.random.random((1, 2)) + # + # q_main_model = QModel(2, 2) + # q_target_model = QModel(2, 2) + # + # print('main', q_main_model.get_action(test_state)) + # print('main', q_main_model.get_value_estimate(test_state)) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # q_target_model.set_weights(q_main_model.get_weights()) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # batch_x = np.random.random((5, 4)) + # batch_y = np.random.random((5, 4)) + # hist = q_main_model.q_model.fit(batch_x, batch_y) + # print(hist.history['loss']) + # + # print('main', q_main_model.get_action(test_state)) + # print('main', q_main_model.get_value_estimate(test_state)) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # + # q_target_model.set_weights(q_main_model.get_weights()) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # weights = (q_target_model.get_weights()) + # keras.utils.plot_model(model, 'my_first_model.png') + # keras.utils.plot_model(model_get_action, 'model_get_action.png') diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/naf2_old.py b/Data_Experiments/2020_07_20_NAF@FERMI/naf2_old.py new file mode 100644 index 0000000..8cc0742 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/naf2_old.py @@ -0,0 +1,892 @@ +import os +import pickle +# import random +import time + +import tensorflow as tf +from tensorflow import keras + +tf.keras.backend.set_floatx('float64') +import numpy as np +# import tensorflow_docs as tfdocs +# from tensorflow_core.python.keras.losses import MSE +# from tensorflow.python.keras.losses import MSE +from tqdm import tqdm + + +# from pernaf.pernaf.utils.prioritised_experience_replay import PrioritizedReplayBuffer + + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for NAF_debug agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float64) + self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float64) + self.acts_buf = np.zeros([size, act_dim], dtype=np.float64) + self.rews_buf = np.zeros(size, dtype=np.float64) + self.done_buf = np.zeros(size, dtype=np.float64) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs1_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.acts_buf[self.ptr] = act + self.rews_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + def sample_batch(self, batch_size=32): + if self.size < batch_size: + idxs = np.arange(self.size) + else: + idxs = np.random.randint(0, self.size, size=batch_size) + + return dict(obs1=self.obs1_buf[idxs], + obs2=self.obs2_buf[idxs], + acts=self.acts_buf[idxs], + rews=self.rews_buf[idxs], + done=self.done_buf[idxs]) + + def save_to_pkl(self, name, directory): + buffer_data = dict(obs1=self.obs1_buf, + obs2=self.obs2_buf, + acts=self.acts_buf, + rews=self.rews_buf, + done=self.done_buf) + f = open(directory + name, "wb") + pickle.dump(buffer_data, f) + f.close() + + +# class ReplayBufferPER(PrioritizedReplayBuffer): +# """ +# A simple FIFO experience replay buffer for NAF_debug agents. +# """ +# +# def __init__(self, obs_dim, act_dim, size, prio_info): +# self.alpha = prio_info.get('alpha') +# self.beta = prio_info.get('beta') +# super(ReplayBufferPER, self).__init__(size, self.alpha) +# self.ptr, self.size, self.max_size = 0, 0, size +# +# def store(self, obs, act, rew, next_obs, done): +# super(ReplayBufferPER, self).add(obs, act, rew, next_obs, done, 1) +# self.ptr = (self.ptr + 1) % self.max_size +# self.size = min(self.size + 1, self.max_size) +# +# def sample_normal(self, batch_size): +# if self.size < batch_size: +# batch_size = self.size +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample_normal_rand( +# batch_size) +# return dict(obs1=obs1, +# obs2=obs2, +# acts=acts, +# rews=rews, +# done=done), [weights, idxs] +# +# def sample_batch(self, batch_size=32, **kwargs): +# if 'beta' in kwargs: +# self.beta = kwargs.get('beta') +# if self.size < batch_size: +# batch_size = self.size +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample_normal_rand( +# batch_size) +# else: +# obs1, acts, rews, obs2, done, gammas, weights, idxs = super(ReplayBufferPER, self).sample(batch_size, +# self.beta) +# return dict(obs1=obs1, +# obs2=obs2, +# acts=acts, +# rews=rews, +# done=done), [weights, idxs] + + +def basic_loss_function(y_true, y_pred): + return tf.math.reduce_mean(y_true - y_pred) + + +# obs_dim = 2 +# act_dim = 2 +# action = tf.Variable(np.ones(act_dim), dtype=float) +hidden_sizes = (100, 100) + + +class QModel: + + def __init__(self, obs_box=2, act_box=2, **kwargs): + if 'hidden_sizes' in kwargs: + self.hidden_sizes = kwargs.get('hidden_sizes') + else: + self.hidden_sizes = (100, 100) + + if 'early_stopping' in kwargs: + self.callback = tf.keras.callbacks.EarlyStopping(monitor='mae', + patience=kwargs.get('early_stopping')) + else: + self.callback = tf.keras.callbacks.EarlyStopping(monitor='mae', patience=2) + + if 'name' in kwargs: + self.__name__ = kwargs.get('name') + print(self.__name__) + + if 'clipped_double_q' in kwargs: + self.clipped_double_q = kwargs.get('clipped_double_q') + else: + self.clipped_double_q = False + # print(self.__name__ ) + + self.init = True + + self.act_box = act_box + self.obs_box = obs_box + self.act_dim = act_box.shape[0] + self.obs_dim = obs_box.shape[0] + + # create a shared network for the variables + inputs_state = keras.Input(shape=(self.obs_dim,), name="state_input") + inputs_action = keras.Input(shape=(self.act_dim,), name="action_input") + + # h = inputs[:, 0:obs_dim] + h = self.normalize(inputs_state, box=self.obs_box) + for hidden_dim in hidden_sizes: + h = self.fc(h, hidden_dim) + V = self.fc(h, 1, name='V') + + l = self.fc(h, (self.act_dim * (self.act_dim + 1) / 2)) + mu = self.fc(h, self.act_dim, name='mu') + + # action = inputs[:, obs_dim:] + action = self.normalize(inputs_action, box=self.act_box) + # rescale action to tanh + + pivot = 0 + rows = [] + for idx in range(self.act_dim): + count = self.act_dim - idx + diag_elem = tf.exp(tf.slice(l, (0, pivot), (-1, 1))) + non_diag_elems = tf.slice(l, (0, pivot + 1), (-1, count - 1)) + row = tf.pad(tensor=tf.concat((diag_elem, non_diag_elems), 1), paddings=((0, 0), (idx, 0))) + rows.append(row) + pivot += count + L = tf.transpose(a=tf.stack(rows, axis=1), perm=(0, 2, 1)) + P = tf.matmul(L, tf.transpose(a=L, perm=(0, 2, 1))) + tmp = tf.expand_dims(action - mu, -1) + A = -tf.multiply(tf.matmul(tf.transpose(a=tmp, perm=[0, 2, 1]), + tf.matmul(P, tmp)), tf.constant(0.5, dtype=tf.float64)) + A = tf.reshape(A, [-1, 1]) + Q = tf.add(A, V) + + if 'learning_rate' in kwargs: + self.learning_rate = kwargs.get('learning_rate') + else: + self.learning_rate = 1e-3 + # print('learning rate', self.learning_rate ) + if 'directory' in kwargs: + self.directory = kwargs.get('directory') + else: + self.directory = None + + initial_learning_rate = 0.005 + lr_schedule = keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate, decay_steps=1000, decay_rate=0.99, staircase=True + ) + lr_schedule = self.learning_rate + self.optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) + + self.q_model = self.CustomModel(inputs=[inputs_state, inputs_action], outputs=Q, mother_class=self) + # self.q_model.compile(keras.optimizers.Adam(learning_rate=self.learning_rate), loss=MSE) + # optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate) + self.q_model.compile(optimizer=self.optimizer, loss="mse", metrics=["mae"]) + + # Action output + # self.model_get_action = keras.Model(inputs=self.q_model.layers[0].input, + # outputs=self.q_model.get_layer(name='mu').output) + self.model_get_action = keras.Model(inputs=[inputs_state, inputs_action], + outputs=self.q_model.get_layer(name='mu').output) + + # Value output + self.model_value_estimate = keras.Model(inputs=[inputs_state, inputs_action], + outputs=self.q_model.get_layer(name='V').output) + + # self.q_model.summary() + + def normalize(self, input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + return tf.math.scalar_mul(tf.convert_to_tensor(2, dtype=tf.float64), + tf.math.add(tf.convert_to_tensor(-0.5, dtype=tf.float64), + tf.multiply(tf.math.add(input, -low), 1 / (high - low)))) + + def de_normalize(self, input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + input = tf.convert_to_tensor(input, dtype=tf.float64) + return tf.math.add( + tf.multiply(tf.math.add(tf.math.scalar_mul(tf.convert_to_tensor(0.5, dtype=tf.float64), input), + tf.convert_to_tensor(0.5, dtype=tf.float64)), + (high - low)), low) + + def fc(self, x, hidden_size, name=None): + layer = keras.layers.Dense(hidden_size, activation=tf.nn.tanh, + kernel_initializer=tf.compat.v1.random_uniform_initializer(-0.01, 0.01), + kernel_regularizer=None, + bias_initializer=tf.compat.v1.constant_initializer(0.0), name=name) + return layer(x) + + def get_action(self, state): + state = np.array([state], dtype='float64') + actions = tf.zeros(shape=(tf.shape(state)[0], self.act_dim), dtype=tf.float64) + return self.de_normalize(self.model_get_action.predict([state, actions]), self.act_box) + + def get_value_estimate(self, state): + actions = tf.zeros(shape=(tf.shape(state)[0], self.act_dim), dtype=tf.float64) + return self.model_value_estimate.predict([state, actions]) + + def set_polyak_weights(self, weights, polyak=0.999, **kwargs): + if 'name' in kwargs: + print(10 * ' updating:', kwargs.get('name')) + weights_old = self.get_weights() + weights_new = [polyak * weights_old[i] + (1 - polyak) * weights[i] for i in range(len(weights))] + self.q_model.set_weights(weights=weights_new) + + def get_weights(self): + return self.q_model.get_weights() + + def save_model(self, directory): + try: + self.q_model.save(filepath=directory, overwrite=True) + except: + print('Saving failed') + + # def train_model(self, batch_s, batch_a, batch_y, **kwargs): + # # batch_x = np.concatenate((batch_s, batch_a), axis=1) + # n_split = int(5 * len(batch_s) / 5) + # batch_s_train, batch_a_train, batch_y_train = batch_s[:n_split], batch_a[:n_split], batch_y[:n_split] + # batch_s_val, batch_a_val, batch_y_val = batch_s[n_split:], batch_a[n_split:], batch_y[n_split:] + # x_batch_train = tf.keras.layers.concatenate([batch_s_train, batch_a_train], + # axis=1, dtype=tf.float64) + # y_batch_train = tf.convert_to_tensor(batch_y_train, dtype=tf.float64) + # + # train_dataset = tf.data.Dataset.from_tensor_slices((x_batch_train, y_batch_train)) + # train_dataset = train_dataset.repeat(50).shuffle(buffer_size=1024, reshuffle_each_iteration=True).batch(15) + # + # # x_batch_train = tf.keras.layers.concatenate([batch_s_val, batch_a_val], + # # axis=1, dtype=tf.float64) + # # y_batch_train = tf.convert_to_tensor(batch_y_val, dtype=tf.float64) + # + # # val_dataset = tf.data.Dataset.from_tensor_slices((x_batch_train, y_batch_train)) + # # val_dataset = val_dataset.repeat(50).shuffle(buffer_size=1024, reshuffle_each_iteration=True).batch(10) + # + # # if x_batch_train.shape[0]<50 else 25 + # epochs = 6 + # self.callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2) + # batch_size = 10 # x_batch_train.shape[0] + # hist = self.q_model.fit(x_batch_train, y_batch_train, + # validation_split=0.1, + # steps_per_epoch=2, + # verbose=0, + # batch_size=batch_size, + # callbacks=[self.callback], + # shuffle=True, + # epochs=epochs, + # # validation_data=val_dataset, + # # validation_steps=3, + # **kwargs) + # return_value = hist.history['loss'] + # return return_value + + # def train_model(self, batch_s, batch_a, y_batch_train, **kwargs): + # # x_batch_train = np.concatenate((batch_s, batch_a), axis=1) + # x_batch_train = tf.keras.layers.concatenate([batch_s, batch_a], + # axis=1, dtype=tf.float64) + # y_batch_train = tf.convert_to_tensor(y_batch_train, dtype=tf.float64) + # + # return self.train_step(x_batch_train, y_batch_train, **kwargs) + + # @tf.function(experimental_relax_shapes=True) + + class CustomCallback(keras.callbacks.Callback): + + def __init__(self, patience=0): + # super(self.CustomCallback, self).__init__() + super().__init__() + self.patience = patience + # best_weights to store the weights at which the minimum loss occurs. + self.best_weights = None + + def on_train_begin(self, logs=None): + # The number of epoch it has waited when loss is no longer minimum. + self.wait = 0 + # The epoch the training stops at. + self.stopped_epoch = 0 + # Initialize the best as infinity. + self.best = np.Inf + + def on_epoch_end(self, epoch, logs=None): + current = logs.get("loss") + # if np.less(current, self.best): + # self.best = current + # self.wait = 0 + # # Record the best weights if current results is better (less). + # self.best_weights = self.model.get_weights() + # else: + # self.wait += 1 + # if self.wait >= self.patience: + # self.stopped_epoch = epoch + # self.model.stop_training = True + # # print("Restoring model weights from the end of the best epoch.") + # self.model.set_weights(self.best_weights) + self.q_target.set_polyak_weights(self.model.get_weights(), + polyak=0.9995) + # print('updating...', self.model.__name__) + + # def on_train_end(self, logs=None): + # if self.stopped_epoch > 0: + # print("Epoch %05d: early stopping" % (self.stopped_epoch + 1)) + # self.q_target.set_polyak_weights(self.model.get_weights(), + # polyak=0.999) + # print('end of training') + + # def on_train_batch_end(self, batch, logs=None): + # keys = list(logs.keys()) + # + # # self.q_target.set_polyak_weights(self.model.get_weights(), + # # polyak=0.999) + # # print('updated', self.q_target.__name__) + # print("...Training: end of batch {}; got log keys: {}".format(batch, keys)) + # # print(self.model.y_target) + + # def get_estimate(self, o2, d, r): + # self.discount = 0.999 + # v_1 = self.q_target_first.get_value_estimate(o2) + # v_2 = self.q_target_second.get_value_estimate(o2) + # v = tf.where(v_1 < v_2, v_1, v_2) + # return self.discount * tf.squeeze(v) * (1 - d) + r + + def train_model_1(self, **kwargs): + if 'polyak' in kwargs: + self.polyak = kwargs.get('polyak') + else: + self.polyak = 0.999 + if 'batch_size' in kwargs: + self.batch_size = kwargs.get('batch_size') + else: + self.batch_size = 100 + if 'epochs' in kwargs: + self.epochs = kwargs.get('epochs') + else: + self.epochs = 2 + if 'steps_per_epoch' in kwargs: + self.steps_per_epoch = kwargs.get('steps_per_epoch') + else: + self.steps_per_epoch = 10 + + batch = self.replay_buffer.sample_batch(batch_size=1000000) + # batch, prios = self.replay_buffer.sample_batch(batch_size=batch_size) + # nr = self.replay_buffer.size + # + # beta = lambda nr: max(1e-16, 1 - nr / 1000) + # decay_function = lambda nr: max(0, 1 - nr / 1000) + # beta_decay = beta(nr) + # print(beta_decay) + # batch, priority_info = self.replay_buffer.sample_batch(batch_size=30, beta=beta_decay) + # sample_weights = priority_info[0].astype('float64') + # batch['sample_weights'] = sample_weights + # + batch['obs1'] = batch['obs1'].astype('float64') + batch['obs2'] = batch['obs2'].astype('float64') + batch['acts'] = batch['acts'].astype('float64') + batch['rews'] = batch['rews'].astype('float64') + batch['done'] = np.where(batch['done'], 1, 0).astype('float64') + + # batch['y_target'] = self.get_estimate(o2, d, r) + # batch['x_batch_train'] = x_batch_train + # print(batch) + dataset = tf.data.Dataset.from_tensor_slices(batch).repeat(200).shuffle(buffer_size=10000) + train_dataset = dataset.batch(self.batch_size) + # print([element['obs1'] for element in train_dataset.take(2)]) + + # val_dataset = dataset.take(10) + + # if False: + # # if self.replay_buffer.size % 50 == 0 or self.init: + # epochs = 50 + # dataset = tf.data.Dataset.from_tensor_slices(batch).shuffle(buffer_size=1024) + # train_dataset = dataset.batch(10) + # self.callback = self.CustomCallback(patience=0) + # self.callback.q_target = self.q_target_first + # + # # self.callback.q_model = self.q_model + # early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=1) + # + # hist = self.q_model.fit(train_dataset, + # # validation_split=0.1, + # verbose=1, + # # batch_size=batch_size, + # callbacks=[self.callback, early_stop], + # shuffle=True, + # epochs=epochs, + # # validation_data=val_dataset, + # # validation_steps=2, + # **kwargs) + # self.init = False + # else: + # # epochs = 2 + self.callback = self.CustomCallback(patience=0) + self.callback.q_target = self.q_target_first + self.save_frequency = 100 + checkpoint_callback = [ + keras.callbacks.ModelCheckpoint( + # Path where to save the model + # The two parameters below mean that we will overwrite + # the current checkpoint if and only if + # the `val_loss` score has improved. + # The saved model name will include the current epoch. + filepath="checkpoints/mymodel_{epoch}", + save_weights_only=True, + save_freq=self.save_frequency, + # save_best_only=True, # Only save a model if `val_loss` has improved. + # monitor="loss", + verbose=1, + ) + ] + # self.callback.q_model = self.q_model + # early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=0) + # TODO: implement saving + hist = self.q_model.fit(train_dataset, + # sample_weights=sample_weights, + # validation_split=0.1, + steps_per_epoch=self.steps_per_epoch, + verbose=0, + # batch_size=batch_size, + callbacks=[self.callback], # , checkpoint_callback], + shuffle=True, + epochs=self.epochs, + # validation_data=val_dataset, + # validation_steps=2, + **kwargs) + # update the targets + # self.q_target_first.set_polyak_weights(self.q_model.get_weights(), + # polyak=0.999) + + # loss = model.train_model(o, a, target_y, sample_weight=sample_weights)[-1] + return_value = hist.history['loss'] + # decay = decay_function(nr) + # update_prios = (return_value[-1] * decay + 1e-16) * np.ones(priority_info[0].shape) + # self.replay_buffer.update_priorities(idxes=priority_info[1], priorities=update_prios) + + return return_value + + def set_models(self, q_target_1, q_target_2=None): + self.q_target_first = q_target_1 + if q_target_2 is not None: + self.q_target_second = q_target_2 + + class CustomModel(keras.Model): + + def __init__(self, *args, **kwargs): + self.mother_class = kwargs.get('mother_class') + self.__name__ = self.mother_class.__name__ + del kwargs['mother_class'] + super().__init__(*args, **kwargs) + self.discount = tf.constant(0.999, dtype=tf.float64) + + def train_step(self, batch): + o = batch['obs1'] + o2 = batch['obs2'] + a = batch['acts'] + r = batch['rews'] + d = batch['done'] + # target_y = self.mother_class.get_estimate(o2, d, r) + # y_target = batch['y_target'] + + # a_zero = tf.multiply(a, tf.constant(0, dtype=tf.float64)) + v_1 = self.mother_class.q_target_first.model_value_estimate([o2, a]) # , training=False) + if self.mother_class.clipped_double_q: + v_2 = self.mother_class.q_target_second.model_value_estimate([o2, a]) # , training=False) + v = tf.squeeze(tf.where(tf.math.less(v_1, v_2), v_1, v_2)) + # print('double', self.mother_class.__name__) + else: + v = tf.squeeze(v_1) + # print('single', self.mother_class.__name__) + y_target = tf.add(tf.multiply(tf.math.scalar_mul(self.discount, v), + tf.add(tf.constant(1, dtype=tf.float64), + tf.math.scalar_mul(-1, d))), r) + + # print('target', tf.reduce_mean(y_target)) + + # Iterate over the batches of the dataset. + # if 'sample_weight' in kwargs: + # sample_weight = kwargs.get('sample_weight') + with tf.GradientTape() as tape: + # Run the forward pass of the layer. + # The operations that the layer applies + # to its inputs are going to be recorded + # on the GradientTape. + y_pred = self([o, a], training=True) # Logits for this minibatch + # Compute the loss value for this minibatch. + loss = self.compiled_loss( + y_target, + y_pred, + # sample_weight=batch['sample_weights'] + ) + # Use the gradient tape to automatically retrieve + # the gradients of the trainable variables with respect to the loss. + # Compute gradients + trainable_vars = self.trainable_weights + gradients = tape.gradient(loss, trainable_vars) + # Run one step of gradient descent by updating + # the value of the variables to minimize the loss. + # Update weights + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + # Update the metrics. + # Metrics are configured in `compile()`. + self.compiled_metrics.update_state(y_target, y_pred) + + # self.mother_class.q_target.set_polyak_weights(self.mother_class.q_model.get_weights(), + # polyak=0.999) + return {m.name: m.result() for m in self.metrics} + + def create_buffers(self, per_flag, prio_info): + if not (per_flag): + self.replay_buffer = ReplayBuffer(obs_dim=self.obs_dim, act_dim=self.act_dim, size=int(1e6)) + # print('normal buffer') + # else: + # self.replay_buffer = ReplayBufferPER(obs_dim=self.obs_dim, act_dim=self.act_dim, size=int(1e6), + # prio_info=prio_info) + + +class NAF(object): + def __init__(self, env, + max_steps, max_episodes, training_info=dict(), pretune=None, prio_info=dict(), + noise_info=dict(), save_frequency=500, directory=None, is_continued=False, + clipped_double_q=2, q_smoothing=0.01, warm_up_steps=None, training_stop=None, **nafnet_kwargs): + ''' + :param env: open gym environment to be solved + :param directory: directory were weigths are saved + :param stat: statistic class to handle tensorflow and statitics + :param discount: discount factor + :param batch_size: batch size for the training + :param learning_rate: learning rate + :param max_steps: maximal steps per episode + :param update_repeat: iteration per step of training + :param max_episodes: maximum number of episodes + :param polyak: polyac averaging + :param pretune: list of tuples of state action reward next state done + :param prio_info: parameters to handle the prioritizing of the buffer + :param nafnet_kwargs: keywords to handle the network and training + :param noise_info: dict with noise_function + :param clipped_double_q: use the clipped double q trick with switching all clipped_double_q steps + :param q_smoothing: add small noise on actions to smooth the training + ''' + self.clipped_double_q = clipped_double_q + self.q_smoothing = q_smoothing + self.losses2 = [] + self.vs2 = [] + self.model_switch = 1 + + self.directory = directory + self.save_frequency = save_frequency + + self.losses = [] + self.pretune = pretune + self.prio_info = prio_info + self.per_flag = bool(self.prio_info) + print('PER is:', self.per_flag) + + self.env = env + + if 'noise_function' in noise_info: + self.noise_function = noise_info.get('noise_function') + else: + self.noise_function = lambda nr: 1 / (nr + 1) + if training_stop is not None: + self.training_stop = training_stop + else: + self.training_stop = None + + self.action_box = env.action_space + self.action_size = self.action_box.shape[0] + self.obs_box = env.observation_space + + self.max_steps = max_steps + # self.update_repeat = update_repeat + self.max_episodes = max_episodes + self.idx_episode = None + self.vs = [] + if warm_up_steps is None: + self.warm_up_steps = 0 + else: + self.warm_up_steps = warm_up_steps + + if 'decay_function' in prio_info: + self.decay_function = prio_info.get('decay_function') + else: + if 'beta' in prio_info: + self.decay_function = lambda nr: prio_info.get('beta') + else: + self.decay_function = lambda nr: 1. + + if 'beta_decay' in prio_info: + self.beta_decay_function = prio_info.get('beta_decay') + # elif self.per_flag: + # self.beta_decay_function = lambda nr: max(1e-12, prio_info.get('beta_start') - nr / 100) + else: + self.beta_decay_function = lambda nr: 1 + + self.training_info = training_info + + if 'learning_rate' in training_info: + learning_rate = training_info.get('learning_rate') + else: + learning_rate = 1e-3 + + self.q_main_model_1 = QModel(obs_box=self.obs_box, act_box=self.action_box, learning_rate=learning_rate, + name='q_main_model_1', + clipped_double_q=self.clipped_double_q, + **nafnet_kwargs) + self.q_main_model_1.create_buffers(per_flag=self.per_flag, prio_info=prio_info) + + # self.current_model = self.q_main_model_1 + # Set same initial values in all networks + # self.q_main_model_2.q_model.set_weights(weights=self.q_main_model_1.q_model.get_weights()) + # Set same initial values in all networks + self.q_target_model_1 = QModel(obs_box=self.obs_box, act_box=self.action_box, name='q_target_model_1', + **nafnet_kwargs) + self.q_target_model_1.q_model.set_weights(weights=self.q_main_model_1.q_model.get_weights()) + + if self.clipped_double_q: + self.q_main_model_2 = QModel(obs_box=self.obs_box, act_box=self.action_box, learning_rate=learning_rate, + name='q_main_model_2', + clipped_double_q=self.clipped_double_q, + **nafnet_kwargs) + self.q_main_model_2.create_buffers(per_flag=self.per_flag, prio_info=prio_info) + self.q_target_model_2 = QModel(obs_box=self.obs_box, act_box=self.action_box, + name='q_target_model_2') + self.q_target_model_2.q_model.set_weights(weights=self.q_main_model_2.q_model.get_weights()) + + # TODO: change to one network + # if self.clipped_double_q: + self.q_main_model_1.set_models(self.q_target_model_1, self.q_target_model_2) + self.q_main_model_2.set_models(self.q_target_model_2, self.q_target_model_1) + else: + self.q_main_model_1.set_models(self.q_target_model_1) + + if is_continued: + try: + print(self.directory) + self.q_target_model_1.q_model = tf.keras.models.load_model(filepath=self.directory) + print('Successfully loaded', 10 * ' -') + except: + print('Failed to load', 10 * ' *') + if not os.path.exists(self.directory): + os.makedirs(self.directory) + else: + if not os.path.exists(self.directory): + os.makedirs(self.directory) + elif not (self.directory): + for f in os.listdir(self.directory): + print('Deleting: ', self.directory + '/' + f) + os.remove(self.directory + '/' + f) + time.sleep(.5) + + self.counter = 0 + + def predict(self, model, state, is_train): + + if is_train and model.replay_buffer.size < self.warm_up_steps: + print(10 * 'inits ') + action = model.de_normalize(np.random.uniform(-1, 1, self.action_size), model.act_box) + # print(action) + return np.array(action) + elif is_train: + action = model.normalize(model.get_action(state=state), model.act_box) + noise = self.noise_function(self.idx_episode) * np.random.randn(self.action_size) + if self.q_smoothing is None: + return_value = model.de_normalize(np.clip(action + noise, -1, 1), model.act_box) + else: + return_value = model.de_normalize(np.clip(np.clip(action + + noise, -1, 1) + np.clip(self.q_smoothing * + np.random.randn( + self.action_size), + -self.q_smoothing, + self.q_smoothing), + -1, 1), + model.act_box) + return np.array(return_value) + else: + action = model.get_action(state=state) + return action + + def verification(self, steps): + self.run(is_train=False) + + def run(self, is_train=True): + initial_max = 5 + + for index in tqdm(range(0, self.max_episodes)): + self.idx_episode = index + # if self.clipped_double_q is not None: + # self.model_switcher(self.idx_episode) + + o = self.env.reset() + if self.training_stop is not None: + is_train = False if self.training_stop < self.idx_episode else True + # print("starting the tests") + for t in range(0, self.max_steps): + # 1. predict + # if self.q_smoothing is not None: + # a = np.clip(self.predict(self.q_main_model_1, o, is_train)[0] + + # np.clip(self.q_smoothing * np.random.randn(self.action_size), + # -self.q_smoothing, self.q_smoothing), -1, 1) + # else: + a = np.squeeze(self.predict(self.q_main_model_1, o, is_train)) + o2, r, d, _ = self.env.step(a) + if is_train: + self.q_main_model_1.replay_buffer.store(o, a, r, o2, d) + if self.clipped_double_q: + self.q_main_model_2.replay_buffer.store(o, a, r, o2, d) + o = o2 + d = False if t == self.max_steps - 1 else d + + if t % initial_max == 0 and self.q_main_model_1.replay_buffer.size <= self.warm_up_steps: + o = self.env.reset() + print('initial reset at ', t) + + # 2. train + if is_train and self.q_main_model_1.replay_buffer.size > self.warm_up_steps: + # try: + self.update_q(self.q_main_model_1) + if self.clipped_double_q: + self.update_q(self.q_main_model_2) + # except: + # print('wait:', self.q_main_model_1.replay_buffer.size) + if d: + break + + def train_model(self, model): + # beta_decay = self.beta_decay_function(self.idx_episode) + # decay = self.decay_function(self.idx_episode) + + # if self.per_flag: + # if True: # model is self.q_main_model_1: + # batch, priority_info = model.replay_buffer.sample_batch(batch_size=self.batch_size, beta=beta_decay) + # else: + # batch, priority_info = model.replay_buffer.sample_normal(batch_size=self.batch_size) + # else: + batch = model.replay_buffer.sample_batch(200) + + # o = batch['obs1'] + o2 = batch['obs2'] + a = batch['acts'] + # r = batch['rews'] + # d = batch['done'] + # + # v = self.get_v(o2) + # target_y = self.discount * np.squeeze(v) * (1 - d) + r + # input = tf.keras.layers.concatenate([o2, a], axis=1, dtype=tf.float64) + v = self.q_target_model_1.model_value_estimate([o2, a]) + # v_2 = self.q_target_model_2.model_value_estimate([o2, a]) + # print(tf.reduce_mean(v)) + # print(tf.reduce_mean(v_2)) + + # if self.per_flag: + # if True: # model is self.q_main_model_1: + # sample_weights = tf.convert_to_tensor(priority_info[0], dtype=tf.float64) + # loss = model.train_model(o, a, target_y, sample_weight=sample_weights)[-1] + # update_prios = (loss * decay + 1e-16) * np.ones(priority_info[0].shape) + # model.replay_buffer.update_priorities(idxes=priority_info[1], priorities=update_prios) + # else: + # loss = model.train_model_1(10000)[-1] + # else: + # loss = model.train_model(o, a, target_y)[-1] + loss = model.train_model_1(**self.training_info)[-1] + + # model.set_polyak_weights(self.q_main_model_1.get_weights(), polyak=self.polyak) + # if self.clipped_double_q is not None: + # model.set_polyak_weights(self.q_main_model_2.get_weights(), polyak=self.polyak) + + return v, loss + + # def model_switcher(self, number): + # if number % 1 == 0: + # self.model_switch = 2 if self.model_switch == 1 else 1 + # if self.model_switch == 1: + # self.current_model = self.q_main_model_1 + # else: + # self.current_model = self.q_main_model_2 + + def update_q(self, model): + vs = [] + losses = [] + self.counter += 1 + + # for i in range(self.update_repeat): + # print('i', i, model) + v, loss = self.train_model(model=model) + if model == self.q_main_model_1: + vs.append(v) + losses.append(loss) + + if self.counter % self.save_frequency == 0: + self.q_target_model_1.save_model(directory=self.directory) + self.q_main_model_1.replay_buffer.save_to_pkl(name='test.pkl', directory=self.directory) + if model == self.q_main_model_1: + self.vs.append(np.mean(vs)) + self.losses.append(np.mean(losses)) + + # def get_v(self, o2): + # v_1 = self.q_target_model_1.get_value_estimate(o2) + # if self.clipped_double_q is not None: + # v_2 = self.q_target_model_2.get_value_estimate(o2) + # v = np.where(v_1 < v_2, v_1, v_2) + # + # # print('vs: ', np.mean(o2), np.mean(v_1), np.mean(v_2)) + # else: + # v = v_1 + # return v + + +if __name__ == '__main__': + print('start') + # test_state = np.random.random((1, 2)) + # + # q_main_model = QModel(2, 2) + # q_target_model = QModel(2, 2) + # + # print('main', q_main_model.get_action(test_state)) + # print('main', q_main_model.get_value_estimate(test_state)) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # q_target_model.set_weights(q_main_model.get_weights()) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # batch_x = np.random.random((5, 4)) + # batch_y = np.random.random((5, 4)) + # hist = q_main_model.q_model.fit(batch_x, batch_y) + # print(hist.history['loss']) + # + # print('main', q_main_model.get_action(test_state)) + # print('main', q_main_model.get_value_estimate(test_state)) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # + # q_target_model.set_weights(q_main_model.get_weights()) + # + # print('target', q_target_model.get_action(test_state)) + # print('target', q_target_model.get_value_estimate(test_state)) + # + # weights = (q_target_model.get_weights()) + # keras.utils.plot_model(model, 'my_first_model.png') + # keras.utils.plot_model(model_get_action, 'model_get_action.png') diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/new_test_file.py b/Data_Experiments/2020_07_20_NAF@FERMI/new_test_file.py new file mode 100644 index 0000000..a1f3675 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/new_test_file.py @@ -0,0 +1,177 @@ +import os +import pickle +import pandas as pd +import random + +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +#from local_fel_simulated_env import FelLocalEnv +from laser_trajectory_control_env import LaserTrajectoryControlEnv +from naf2 import NAF +from pernaf.pernaf.utils.statistic import Statistic + +# from simple_environment import simpleEnv + +# from pendulum import PendulumEnv as simpleEnv +# set random seed +random_seed = 111 +# set random seed + +np.random.seed(random_seed) +random.seed(random_seed) + +# from simulated_tango import SimTangoConnection +from tango_connection import TangoConnection + +random_seed = 123 +# set random seed +# tf.set_random_seed(random_seed) +np.random.seed(random_seed) +random.seed(random_seed) + +# tango = SimTangoConnection() +# env = FelLocalEnv(tango=tango) +conf_file = '/home/niky/FERMI/2020_07_20/configuration/conf_eos.json' +tango = TangoConnection(conf_file=conf_file) +env = LaserTrajectoryControlEnv(tango=tango) + +directory = "checkpoints/test_implementation/" + +label = 'New NAF' + +directory = "checkpoints/new_test/" + + +# TODO: Test the loading + +def plot_results(env, label): + # plotting + print('now plotting') + rewards = env.rewards + initial_states = env.initial_conditions + + iterations = [] + final_rews = [] + # starts = [] + sum_rews = [] + mean_rews = [] + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 0): + final_rews.append(rewards[i][len(rewards[i]) - 1]) + # starts.append(-np.sqrt(np.mean(np.square(initial_states[i])))) + iterations.append(len(rewards[i])) + sum_rews.append(np.sum(rewards[i])) + mean_rews.append(np.mean(rewards[i])) + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1) + + ax = axs[0] + color = 'blue' + ax.plot(iterations, c=color) + ax.set_ylabel('steps', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'k' + ax1.plot(np.cumsum(iterations), c=color) + ax1.set_ylabel('cumulative steps', color=color) + ax.set_title('Iterations' + plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + color = 'red' + # ax.plot(starts, c=color) + ax.set_ylabel('starts', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('finals', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(final_rews, color=color) + + fig.tight_layout() + plt.savefig(label + '.pdf') + plt.show() + + fig, ax = plt.subplots(1, 1) + color = 'blue' + ax.plot(sum_rews, color) + ax.set_ylabel('cum. reward', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'lime' + ax1.plot(mean_rews, c=color) + ax1.set_ylabel('mean', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + plt.show() + + +def plot_convergence(agent, label): + losses, vs = agent.losses, agent.vs + losses2, vs2 = agent.losses2, agent.vs2 + + fig, ax = plt.subplots() + ax.set_title(label) + ax.set_xlabel('# steps') + + color = 'tab:blue' + ax.semilogy(losses, color=color) + ax.semilogy(losses2, color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('td_loss', color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(vs, color=color) + ax1.plot(vs2, color=color) + plt.savefig(label + 'convergence' + '.pdf') + plt.show() + + +if __name__ == '__main__': + max_steps = 500 + max_episodes = 20 + is_train = True + is_continued = True#False if is_train else True + + nafnet_kwargs = dict(hidden_sizes=[100, 100], activation=tf.nn.tanh + , weight_init=tf.random_uniform_initializer(-0.05, 0.05, seed=random_seed)) + + noise_info = dict(noise_function=lambda nr: max(0., 1 * (1 - (nr / 100)))) + + # prio_info = dict(alpha=.25, beta=.8, decay_function=lambda nr: max(1e-6, (1 - (nr / 25))), + # beta_decay=lambda nr: max(1e-6, (1 - (nr / 25)))) + prio_info = dict() + # the target network is updated at the end of each episode + # the number of episodes is executed each step in the environment + training_info = dict(polyak=0.9995, epoches=5, steps_per_epoch=10, batch_size=100, + learning_rate=1e-3, discount=0.999) + # filename = 'Scan_data.obj' + # filehandler = open(filename, 'rb') + # scan_data = pickle.load(filehandler) + + # init the agent + agent = NAF(env=env, directory=directory, max_steps=max_steps, max_episodes=max_episodes, prio_info=prio_info, + noise_info=noise_info, is_continued=is_continued, q_smoothing=0.01, clipped_double_q=True, + warm_up_steps=25, save_frequency=25, **nafnet_kwargs) + # run the agent + agent.training(warm_up_steps=200, ) + agent.verification(max_episodes=1000, max_steps=0) + + # plot the results + plot_convergence(agent=agent, label=label) + plot_results(env, label) diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/new_test_file_old.py b/Data_Experiments/2020_07_20_NAF@FERMI/new_test_file_old.py new file mode 100644 index 0000000..bc8f8af --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/new_test_file_old.py @@ -0,0 +1,185 @@ +import os +import pickle +import pandas as pd +import random + +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +#from local_fel_simulated_env import FelLocalEnv +from laser_trajectory_control_env import LaserTrajectoryControlEnv +from naf2 import NAF +from pernaf.pernaf.utils.statistic import Statistic + +# from simple_environment import simpleEnv + +# from pendulum import PendulumEnv as simpleEnv +# set random seed +random_seed = 111 +# set random seed + +np.random.seed(random_seed) +random.seed(random_seed) + +# from simulated_tango import SimTangoConnection +from tango_connection import TangoConnection + +random_seed = 123 +# set random seed +# tf.set_random_seed(random_seed) +np.random.seed(random_seed) +random.seed(random_seed) + +# tango = SimTangoConnection() +# env = FelLocalEnv(tango=tango) +conf_file = '/home/niky/FERMI/2020_07_20/configuration/conf_eos.json' +tango = TangoConnection(conf_file=conf_file) +env = LaserTrajectoryControlEnv(tango=tango) + +directory = "checkpoints/test_implementation/" + +label = 'New NAF' + +directory = "checkpoints/new_test/" + + +# TODO: Test the loading + +def plot_results(env, label): + # plotting + print('now plotting') + rewards = env.rewards + initial_states = env.initial_conditions + + iterations = [] + final_rews = [] + # starts = [] + sum_rews = [] + mean_rews = [] + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 0): + final_rews.append(rewards[i][len(rewards[i]) - 1]) + # starts.append(-np.sqrt(np.mean(np.square(initial_states[i])))) + iterations.append(len(rewards[i])) + sum_rews.append(np.sum(rewards[i])) + mean_rews.append(np.mean(rewards[i])) + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1) + + ax = axs[0] + color = 'blue' + ax.plot(iterations, c=color) + ax.set_ylabel('steps', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'k' + ax1.plot(np.cumsum(iterations), c=color) + ax1.set_ylabel('cumulative steps', color=color) + ax.set_title('Iterations' + plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + color = 'red' + # ax.plot(starts, c=color) + ax.set_ylabel('starts', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('finals', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(final_rews, color=color) + + fig.tight_layout() + plt.savefig(label + '.pdf') + plt.show() + + fig, ax = plt.subplots(1, 1) + color = 'blue' + ax.plot(sum_rews, color) + ax.set_ylabel('cum. reward', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'lime' + ax1.plot(mean_rews, c=color) + ax1.set_ylabel('mean', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + plt.show() + + +def plot_convergence(agent, label): + losses, vs = agent.losses, agent.vs + losses2, vs2 = agent.losses2, agent.vs2 + + fig, ax = plt.subplots() + ax.set_title(label) + ax.set_xlabel('# steps') + + color = 'tab:blue' + ax.semilogy(losses, color=color) + ax.semilogy(losses2, color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('td_loss', color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(vs, color=color) + ax1.plot(vs2, color=color) + plt.savefig(label + 'convergence' + '.pdf') + plt.show() + + +if __name__ == '__main__': + # discount = 0.999 + # batch_size = 1000000 + # learning_rate = 1e-3 + max_steps = 500 + # update_repeat = 1 + max_episodes = 200 # 20 # for debugging + # polyak = 0.999 + is_train = True + is_continued = False if is_train else True + + nafnet_kwargs = dict(hidden_sizes=[100, 100], activation=tf.nn.tanh + , weight_init=tf.random_uniform_initializer(-0.05, 0.05, seed=random_seed)) + + noise_info = dict(noise_function=lambda nr: max(0., 1 * (1 - (nr / 100)))) + + # prio_info = dict(alpha=.25, beta=.8, decay_function=lambda nr: max(1e-6, (1 - (nr / 25))), + # beta_decay=lambda nr: max(1e-6, (1 - (nr / 25)))) + prio_info = dict() + # the target network is updated at the end of each episode + # the number of episodes is executed each step in the environment + # + #training_info = dict(polyak=0.9995, epoches=5, steps_per_epoch=10, batch_size=100, + # learning_rate=1e-3, discount=0.999) + training_info = dict(polyak=0.9995, epoches=10, steps_per_epoch=10, batch_size=64, + learning_rate=1e-3, discount=0.999) + # filename = 'Scan_data.obj' + # filehandler = open(filename, 'rb') + # scan_data = pickle.load(filehandler) + + # init the agent + agent = NAF(env=env, directory=directory, max_steps=max_steps, max_episodes=max_episodes, prio_info=prio_info, + noise_info=noise_info, is_continued=is_continued, q_smoothing=0.01, clipped_double_q=True, + warm_up_steps=25, training_stop=100, save_frequency=500, **nafnet_kwargs) + # run the agent + agent.run(is_train) + # agent.verification(steps=100) + + # plot the results + plot_convergence(agent=agent, label=label) + plot_results(env, label) diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/run_naf2.py b/Data_Experiments/2020_07_20_NAF@FERMI/run_naf2.py new file mode 100644 index 0000000..7e3463b --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/run_naf2.py @@ -0,0 +1,172 @@ +import os +import pickle +import random +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +# from local_fel_simulated_env import FelLocalEnv +from laser_trajectory_control_env import LaserTrajectoryControlEnv +# from simulated_tango import SimTangoConnection +from tango_connection import TangoConnection +from naf2 import NAF + +# set random seed +random_seed = 111 +np.random.seed(random_seed) +random.seed(random_seed) + +# tango = SimTangoConnection() +# env = FelLocalEnv(tango=tango) +conf_file = '/home/niky/FERMI/2020_07_20/configuration/conf_fel.json' +tango = TangoConnection(conf_file=conf_file) +env = LaserTrajectoryControlEnv(tango=tango) + +def plot_results(env, file_name): + # plotting + print('Now plotting') + rewards = env.rewards + initial_rewards = env.init_rewards + # print('initial_rewards :', initial_rewards) + + iterations = [] + final_rews = [] + starts = [] + sum_rews = [] + mean_rews = [] + + for i in range(len(rewards)): + if (len(rewards[i]) > 0): + final_rews.append(rewards[i][len(rewards[i]) - 1]) + starts.append(initial_rewards[i]) + iterations.append(len(rewards[i])) + sum_rews.append(np.sum(rewards[i])) + mean_rews.append(np.mean(rewards[i])) + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1) + + ax = axs[0] + color = 'blue' + ax.plot(iterations, c=color) + ax.set_ylabel('steps', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'k' + ax1.plot(np.cumsum(iterations), c=color) + ax1.set_ylabel('cumulative steps', color=color) + ax.set_title('Iterations' + plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + color = 'red' + ax.plot(starts, c=color) + ax.set_ylabel('starts', color=color) + ax.axhline(-0.05, ls=':', color='r') + ax.tick_params(axis='y', labelcolor=color) + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('# episode') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('finals', color=color) + ax1.axhline(-0.05, ls=':', color=color) + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(final_rews, color=color) + + fig.tight_layout() + plt.savefig(file_name + '_episodes.pdf') + plt.show() + + fig, ax = plt.subplots(1, 1) + color = 'blue' + ax.plot(sum_rews, color) + ax.set_ylabel('cum. reward', color=color) + ax.set_xlabel('# episode') + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'lime' + ax1.plot(mean_rews, c=color) + ax1.set_ylabel('mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + plt.savefig(file_name + '_rewards.pdf') + plt.show() + +def plot_convergence(agent, file_name): + losses, vs = agent.losses, agent.vs + # losses2, vs2 = agent.losses2, agent.vs2 + + fig, ax = plt.subplots() + # ax.set_title(label) + ax.set_xlabel('# steps') + + color = 'tab:blue' + + # ax.semilogy(losses2, color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('td_loss', color=color) + ax.semilogy(losses, color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(vs, color=color) + # ax1.plot(vs2, color=color) + plt.savefig(file_name + '_convergence' + '.pdf') + plt.show() + +if __name__ == '__main__': + + directory = "checkpoints/test_new/" + + is_continued = False # False if is_train else True + + nafnet_kwargs = dict(hidden_sizes=[100, 100], activation=tf.nn.tanh + , weight_init=tf.random_uniform_initializer(-0.05, 0.05, seed=random_seed)) + + noise_info = dict(noise_function=lambda nr: max(0., 1-(nr/200))) + + # the target network is updated at the end of each episode + # the number of episodes is executed each step in the environment + training_info = dict(polyak=0.999, epochs=2, steps_per_epoch=5, batch_size=100, + learning_rate=1e-3, discount=0.9999) + + # init the agent + agent = NAF(env=env, directory=directory, noise_info=noise_info, + is_continued=is_continued, q_smoothing=0.05, clipped_double_q=True, + training_info=training_info, save_frequency=25, + **nafnet_kwargs) + + # run the agent training + agent.training(warm_up_steps=0, initial_episode_length=5, max_episodes=100, max_steps=500) + # run the agent verification + agent.verification(max_episodes=50, max_steps=25) + + # plot the results + files = [] + for f in os.listdir(directory): + if 'plot_data' in f and 'pkl' in f: + files.append(f) + print(files) + if len(files) > 0: + file_name = directory + f'plot_data_{len(files)}' + else: + file_name = directory + 'plot_data_0' + + plot_convergence(agent=agent, file_name=file_name) + plot_results(env, file_name=file_name) + + out_put_writer = open(file_name + '.pkl', 'wb') + out_rewards = env.rewards + out_inits = env.initial_conditions + out_losses, out_vs = agent.losses, agent.vs + + pickle.dump(out_rewards, out_put_writer, -1) + pickle.dump(out_inits, out_put_writer, -1) + + pickle.dump(out_losses, out_put_writer, -1) + pickle.dump(out_vs, out_put_writer, -1) + out_put_writer.close() diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/run_test_naf2.py b/Data_Experiments/2020_07_20_NAF@FERMI/run_test_naf2.py new file mode 100644 index 0000000..ac36e04 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/run_test_naf2.py @@ -0,0 +1,172 @@ +import os +import pickle +import random +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +# from local_fel_simulated_env import FelLocalEnv +from laser_trajectory_control_env import LaserTrajectoryControlEnv +# from simulated_tango import SimTangoConnection +from tango_connection import TangoConnection +from naf2 import NAF + +# set random seed +random_seed = 123 +np.random.seed(random_seed) +random.seed(random_seed) + +# tango = SimTangoConnection() +# env = FelLocalEnv(tango=tango) +conf_file = '/home/niky/FERMI/2020_07_20/configuration/conf_fel.json' +tango = TangoConnection(conf_file=conf_file) +env = LaserTrajectoryControlEnv(tango=tango) + +def plot_results(env, file_name): + # plotting + print('Now plotting') + rewards = env.rewards + initial_rewards = env.init_rewards + # print('initial_rewards :', initial_rewards) + + iterations = [] + final_rews = [] + starts = [] + sum_rews = [] + mean_rews = [] + + for i in range(len(rewards)): + if (len(rewards[i]) > 0): + final_rews.append(rewards[i][len(rewards[i]) - 1]) + starts.append(initial_rewards[i]) + iterations.append(len(rewards[i])) + sum_rews.append(np.sum(rewards[i])) + mean_rews.append(np.mean(rewards[i])) + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1) + + ax = axs[0] + color = 'blue' + ax.plot(iterations, c=color) + ax.set_ylabel('steps', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'k' + ax1.plot(np.cumsum(iterations), c=color) + ax1.set_ylabel('cumulative steps', color=color) + ax.set_title('Iterations' + plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + color = 'red' + ax.plot(starts, c=color) + ax.set_ylabel('starts', color=color) + ax.axhline(-0.05, ls=':', color='r') + ax.tick_params(axis='y', labelcolor=color) + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('# episode') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('finals', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(final_rews, color=color) + + fig.tight_layout() + plt.savefig(file_name + '_episodes.pdf') + plt.show() + + fig, ax = plt.subplots(1, 1) + color = 'blue' + ax.plot(sum_rews, color) + ax.set_ylabel('cum. reward', color=color) + ax.set_xlabel('# episode') + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'lime' + ax1.plot(mean_rews, c=color) + ax1.set_ylabel('mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + plt.savefig(file_name + '_rewards.pdf') + plt.show() + +def plot_convergence(agent, file_name): + losses, vs = agent.losses, agent.vs + # losses2, vs2 = agent.losses2, agent.vs2 + + fig, ax = plt.subplots() + # ax.set_title(label) + ax.set_xlabel('# steps') + + color = 'tab:blue' + + # ax.semilogy(losses2, color=color) + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('td_loss', color=color) + ax.semilogy(losses, color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(vs, color=color) + # ax1.plot(vs2, color=color) + plt.savefig(file_name + '_convergence' + '.pdf') + plt.show() + +if __name__ == '__main__': + + directory = "checkpoints/fel_run_0/" + + is_continued = True # False # False if is_train else True + + nafnet_kwargs = dict(hidden_sizes=[100, 100], activation=tf.nn.tanh + , weight_init=tf.random_uniform_initializer(-0.05, 0.05, seed=random_seed)) + + noise_info = dict(noise_function=lambda nr: max(0., 1 * (1 - (nr / 50)))) + + # the target network is updated at the end of each episode + # the number of episodes is executed each step in the environment + training_info = dict(polyak=0.9995, epochs=2, steps_per_epoch=5, batch_size=100, + learning_rate=1e-3, discount=0.999) + + # init the agent + agent = NAF(env=env, directory=directory, noise_info=noise_info, + is_continued=is_continued, q_smoothing=0.05, clipped_double_q=True, + training_info=training_info, save_frequency=25, + **nafnet_kwargs) + + # run the agent training + # agent.training(warm_up_steps=50, initial_episode_length=10, max_episodes=50, max_steps=500) + agent.training(warm_up_steps=0, initial_episode_length=10, max_episodes=25, max_steps=500) + # run the agent verification + agent.verification(max_episodes=30, max_steps=20) + + # plot the results + files = [] + for f in os.listdir(directory): + if 'plot_data' in f and 'pkl' in f: + files.append(f) + print(files) + if len(files) > 0: + file_name = directory + f'plot_data_{len(files)}' + else: + file_name = directory + 'plot_data_0' + + plot_convergence(agent=agent, file_name=file_name) + plot_results(env, file_name=file_name) + + out_put_writer = open(file_name + '.pkl', 'wb') + out_rewards = env.rewards + out_inits = env.initial_conditions + out_losses, out_vs = agent.losses, agent.vs + + pickle.dump(out_rewards, out_put_writer, -1) + pickle.dump(out_inits, out_put_writer, -1) + + pickle.dump(out_losses, out_put_writer, -1) + pickle.dump(out_vs, out_put_writer, -1) + out_put_writer.close() diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/show_progress_presi_gsi.py b/Data_Experiments/2020_07_20_NAF@FERMI/show_progress_presi_gsi.py new file mode 100644 index 0000000..ba4a47f --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/show_progress_presi_gsi.py @@ -0,0 +1,214 @@ +import os +import pickle +import numpy as np +import matplotlib.pyplot as plt + + +def load_pickle_logging(file_name): + directory = 'checkpoints/' + file_name + '/' + files = [] + directory = directory + 'data/' + for f in os.listdir(directory): + if 'trajectory_data' in f and 'pkl' in f: + files.append(f) + files.sort() + print(files[-1]) + + with open(directory + files[-1], 'rb') as f: + states = pickle.load(f) + actions = pickle.load(f) + rewards = pickle.load(f) + dones = pickle.load(f) + return states, actions, rewards, dones + + +def load_pickle_final(file_name): + directory = 'checkpoints/' + file_name + '/' + file = 'plot_data_0.pkl' + + with open(directory + file, 'rb') as f: + rews = pickle.load(f) + inits = pickle.load(f) + losses = pickle.load(f) + v_s = pickle.load(f) + return rews, inits, losses, v_s + + +file_name = 'FEL_training_100_double_q_Tango_11' +states_0, actions_0, rewards_0, dones_0 = load_pickle_logging(file_name) +file_name = 'FEL_training_100_double_q_Tango_11_bis' +states_1, actions_1, rewards_1, dones_1 = load_pickle_logging(file_name) +rewards = [rewards_0, rewards_1] + +file_name_s = 'FEL_training_100_single_q_Tango_11' +states_s0, actions_s0, rewards_s0, dones_s0 = load_pickle_logging(file_name_s) +file_name_s = 'FEL_training_100_single_q_Tango_11_bis' +states_s, actions_s, rewards_s, dones_s = load_pickle_logging(file_name_s) +rewards_s = [rewards_s, rewards_s0] + + +def read_rewards(rewards0): + iterations_all = [] + final_rews_all = [] + mean_rews_all = [] + for k in range(len(rewards0)): + rewards = rewards0[k] + + iterations = [] + final_rews = [] + mean_rews = [] + for i in range(len(rewards)): + if len(rewards[i]) > 0: + final_rews.append(rewards[i][len(rewards[i]) - 1]) + iterations.append(len(rewards[i])) + try: + mean_rews.append(np.sum(rewards[i][1:])) + except: + mean_rews.append([]) + iterations_all.append(iterations) + final_rews_all.append(final_rews) + mean_rews_all.append(mean_rews) + + iterations = np.mean(np.array(iterations_all), axis=0) + final_rews = np.mean(np.array(final_rews_all), axis=0) + mean_rews = np.mean(np.array(mean_rews_all), axis=0) + return iterations, final_rews, mean_rews + + +def plot_results(rewards, rewards_s, plot_name): + # plotting + print('Now plotting') + # rewards = env.rewards + # initial_rewards = env.init_rewards + # print('initial_rewards :', initial_rewards) + + iterations, final_rews, mean_rews = read_rewards(rewards) + iterations_s, final_rews_s, mean_rews_s = read_rewards(rewards_s) + + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.axvspan(0,100, alpha=0.2, color='coral') + color = 'blue' + ax.plot(iterations, c=color) + ax.plot(iterations_s, c=color, ls=':') + ax.set_ylabel('steps', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'k' + ax1.plot(np.cumsum(iterations), c=color) + ax1.plot(np.cumsum(iterations_s), c=color, ls=':') + ax1.set_ylabel('cumulative steps', color=color) + ax.set_title('Iterations' + plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + ax.axvspan(0, 100, alpha=0.2, color='coral') + color = 'blue' + # ax.plot(starts, c=color) + ax.plot(mean_rews, c=color) + ax.plot(mean_rews_s, c=color, ls=':') + ax.set_ylabel('cum. return', color=color) + # ax.axhline(-0.05, ls=':', color='r') + ax.tick_params(axis='y', labelcolor=color) + ax.set_title('Reward per episode') # + plot_suffix) + ax.set_xlabel('episodes') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.plot(final_rews[:-1], color=color) + ax1.plot(final_rews_s[:-1], color=color, ls=':') + + ax1.set_ylabel('final return', color=color) + ax1.axhline(-0.05, ls=':', color=color) + ax1.tick_params(axis='y', labelcolor=color) + + # fig.tight_layout() + # plt.savefig(file_name + '_episodes.pdf') + + fig.align_labels() + fig.tight_layout() + # fig.suptitle('NonUniformImage class', fontsize='large') + plt.savefig(plot_name + '_episodes.pdf') + plt.show() + + # fig, ax = plt.subplots(1) + # color = 'blue' + # ax.plot(sum_rews, color) + # ax.set_ylabel('cum. reward', color=color) + # ax.set_xlabel('# episode') + # ax.tick_params(axis='y', labelcolor=color) + # ax1 = plt.twinx(ax) + # color = 'lime' + # ax1.plot(mean_rews, c=color) + # ax1.set_ylabel('mean reward', color=color) # we already handled the x-label with ax1 + # ax1.tick_params(axis='y', labelcolor=color) + # # plt.savefig(file_name + '_rewards.pdf') + # plt.show() + + +label = 'FERMI_all_experiments_NAF' +# +# plot_results(rewards, rewards_s, label) + + +def read_losses_v_s(losses0, v_s0, max_length): + losses_all = [] + v_s_all = [] + for k in range(len(losses0)): + + losses = losses0[k] + print(len(losses)) + v_s = v_s0[k] + losses_all.append(losses[:max_length]) + v_s_all.append(v_s[:max_length]) + losses = np.mean(losses_all, axis=0) + v_s = np.mean(v_s_all, axis=0) + return losses, v_s + + +file_name = 'FEL_training_100_double_q_Tango_11' +rews0, inits0, losses0, v_s0 = load_pickle_final(file_name) +file_name = 'FEL_training_100_double_q_Tango_11_bis' +rews1, inits1, losses1, v_s1 = load_pickle_final(file_name) +losses, v_s = read_losses_v_s([losses0, losses1], [v_s0, v_s1], 691) +rewards = [rews0, rews1] + +file_name = 'FEL_training_100_single_q_Tango_11' +rews0, inits0, losses0, v_s0 = load_pickle_final(file_name) +file_name = 'FEL_training_100_single_q_Tango_11_bis' +rews1, inits1, losses1, v_s1 = load_pickle_final(file_name) +losses_s, v_s_s = read_losses_v_s([losses0, losses1], [v_s0, v_s1], 691) + +rewards_s = [rews0, rews1] + + +def plot_convergence(losses, v_s, losses_s, v_s_s, label): + fig, ax = plt.subplots() + ax.set_title(label) + ax.set_xlabel('steps') + + color = 'tab:blue' + ax.semilogy(losses, color=color) + ax.semilogy(losses_s, color=color, ls=':') + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('td_loss', color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(v_s, color=color) + ax1.plot(v_s_s, color=color, ls=':') + plt.savefig(label + 'convergence' + '.pdf') + plt.show() + +plot_convergence(losses, v_s, losses_s, v_s_s, label) + +label = 'FERMI_all_experiments_NAF' + +plot_results(rewards, rewards_s, label) \ No newline at end of file diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/spinning_up.py b/Data_Experiments/2020_07_20_NAF@FERMI/spinning_up.py new file mode 100644 index 0000000..44bf5af --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/spinning_up.py @@ -0,0 +1,108 @@ +import random + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import tensorflow as tf +# import PyQt5 +from spinup import td3_tf1 as td3 +from spinup import sac_tf1 as sac +from spinup.algos.tf1.ppo.ppo import ppo +from spinup.algos.tf1.trpo.trpo import trpo + +from local_fel_simulated_env import FelLocalEnv +# from pendulum import PendulumEnv as simpleEnv +# set random seed +from simulated_tango import SimTangoConnection + +random_seed = 111 +# set random seed +tf.set_random_seed(random_seed) +np.random.seed(random_seed) +random.seed(random_seed) + +tango = SimTangoConnection() +env = FelLocalEnv(tango=tango) +env_fn = lambda: env + +label = 'Sim_Tango' +directory = "checkpoints/test_implementation/" + + +def plot_results(env, label='def'): + # plotting + print('now plotting') + rewards = env.rewards + initial_states = env.initial_conditions + + iterations = [] + final_rews = [] + # starts = [] + sum_rews=[] + mean_rews = [] + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 0): + final_rews.append(rewards[i][len(rewards[i]) - 1]) + # starts.append(-np.sqrt(np.mean(np.square(initial_states[i])))) + iterations.append(len(rewards[i])) + sum_rews.append(np.sum(rewards[i])) + mean_rews.append(np.mean(rewards[i])) + plot_suffix = ""#f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, constrained_layout=True) + + ax=axs[0] + ax.plot(iterations) + ax.set_title('Iterations' + plot_suffix) + fig.suptitle(label, fontsize=12) + + ax = axs[1] + ax.plot(final_rews, 'r--') + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + # ax1 = plt.twinx(ax) + # color = 'lime' + # ax1.set_ylabel('starts', color=color) # we already handled the x-label with ax1 + # ax1.tick_params(axis='y', labelcolor=color) + # ax1.plot(starts, color=color) + plt.savefig(label+'.pdf') + # fig.tight_layout() + plt.show() + + + fig, axs = plt.subplots(1, 1) + axs.plot(sum_rews) + ax1 = plt.twinx(axs) + ax1.plot(mean_rews,c='lime') + plt.show() + +output_dir = 'logging/debug/' + +logger_kwargs = dict(output_dir=output_dir, exp_name='niky') +# + +nafnet_kwargs = dict(hidden_sizes=[100, 100]) +agent = sac(env_fn=env_fn, epochs=50, steps_per_epoch=2000, + logger_kwargs=logger_kwargs, start_steps=2000, seed=random_seed) + +# agent = ppo(env_fn=env_fn, epochs=50, steps_per_epoch=5000, ac_kwargs=nafnet_kwargs, +# logger_kwargs=logger_kwargs, gamma=0.9999, save_freq=10000) + + +plot_name = 'Stats' +name = plot_name +data = pd.read_csv(output_dir + '/progress.txt', sep="\t") + +data.index = data['TotalEnvInteracts'] +data_plot = data[['EpLen', 'MinEpRet', 'AverageEpRet']] +data_plot.plot(secondary_y=['MinEpRet', 'AverageEpRet']) + +plt.title(name) +# plt.savefig(name + '.pdf') +plt.show() + +plot_results(env) \ No newline at end of file diff --git a/Data_Experiments/2020_07_20_NAF@FERMI/tango_connection.py b/Data_Experiments/2020_07_20_NAF@FERMI/tango_connection.py new file mode 100644 index 0000000..4a79983 --- /dev/null +++ b/Data_Experiments/2020_07_20_NAF@FERMI/tango_connection.py @@ -0,0 +1,247 @@ +import json +import time +import numpy as np +import PyTango as tango + +class TangoConnection: + + def __init__(self, conf_file, **kwargs): + + # load json configuration file + with open(conf_file) as f: + self.conf_data = json.load(f) + + self.system = self.conf_data['system'] + + # get actuators data + conf_actuators = self.conf_data['actuators'] + self.actuators_data= self.get_confdata(conf_actuators) + self.actuators_device_num = self.actuators_data[0] + self.actuators_device_list = self.actuators_data[1] + self.actuators_device_attr_num = self.actuators_data[2] + self.actuators_device_attr_list = self.actuators_data[3] + + self.actuators_size = np.sum(self.actuators_device_attr_num) + self.state_size = self.actuators_size.copy() + self.action_size = self.actuators_size.copy() + self.state = np.zeros(self.state_size) + + # get sensors data + conf_sensors = self.conf_data['sensors'] + self.sensors_data = self.get_confdata(conf_sensors) + self.sensors_device_num = self.sensors_data[0] + self.sensors_device_list = self.sensors_data[1] + self.sensors_device_attr_num = self.sensors_data[2] + self.sensors_device_attr_list = self.sensors_data[3] + + self.sensors_size = np.sum(self.sensors_device_attr_num) + self.intensity = np.zeros(1) + + # get spectrometer data + conf_spectrometer = self.conf_data['spectrometer'] + self.spectrometer_data = self.get_confdata(conf_spectrometer) + self.spectrometer_device_num = self.spectrometer_data[0] + self.spectrometer_device_list = self.spectrometer_data[1] + self.spectrometer_device_attr_num = self.spectrometer_data[2] + self.spectrometer_device_attr_list = self.spectrometer_data[3] + + # get security data + conf_security = self.conf_data['security'] + self.security_data = self.get_confdata(conf_security) + self.security_device_num = self.security_data[0] + self.security_device_list = self.security_data[1] + self.security_device_attr_num = self.security_data[2] + self.security_device_attr_list = self.security_data[3] + self.security_threshold = 100. + + if 'num_samples' in kwargs: + self.num_samples = kwargs.get('num_samples') + else: + self.num_samples = 11 # 25 # 51 # 25 + + self.pause = 0.5 + 0.02*self.num_samples + # self.pause = 0.5 + 0.02*self.num_samples + 1 + + if 'target_state' in kwargs: + self.target_actuators = kwargs.get('target_state') + else: + self.target_actuators = 131072 * np.ones(self.actuators_size) + + if self.system == 'sequencer': + self.set_state(self.target_actuators) + self.target_position = self.get_position() + + # read initial values for actuators and sensors + self.init_state = self.get_state() + self.init_intensity = self.get_intensity() + + self.state = self.init_state.copy() + self.intensity = self.init_intensity.copy() + + + def get_confdata(self, conf_dev): + dev_list, dev_attr_num, dev_attr_list = [], [], [] + dev_num = len(conf_dev) + for j in range(dev_num): + dev_data = conf_dev[j] + dev_name = dev_data['host'] + dev_data['address'] + dev = tango.DeviceProxy(dev_name) + dev_attr = dev_data['attributes'] + + dev_list.append(dev) + dev_attr_num.append(len(dev_attr)) + dev_attr_list.append(dev_attr) + return [dev_num, dev_list, dev_attr_num, dev_attr_list] + + def get_position(self): + position = np.zeros(self.sensors_size) + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + position[idx] = dev.read_attribute(attr_name).value + + return position + + def set_state(self, state): + self.check_charge() + self.set_actuators(state) + self.state = state + + + def get_state(self): + self.check_charge() + state = self.get_actuators() + self.state = state + return state + + def set_actuators(self, actuators_val): + + for i in range(self.actuators_device_num): + dev = self.actuators_device_list[i] + for j in range(self.actuators_device_attr_num[i]): + idx = self.actuators_device_num * i + j + attr_name = self.actuators_device_attr_list[i][j] + attr_val = actuators_val[idx] + dev.write_attribute(attr_name, attr_val) + + time.sleep(self.pause) + pass + + def get_actuators(self): + attr_val = np.zeros(self.actuators_size) + for i in range(self.actuators_device_num): + dev = self.actuators_device_list[i] + for j in range(self.actuators_device_attr_num[i]): + idx = self.actuators_device_num * i + j + attr_name = self.actuators_device_attr_list[i][j] + attr_val[idx] = dev.read_attribute(attr_name).value + return attr_val + + def get_sensors(self): + attr_val = [] + + if self.system == 'fel': + attr_val = np.zeros(self.sensors_size) + attr_val_seq = np.zeros((self.sensors_size, self.num_samples)) + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + attr_val_seq[idx] = dev.command_inout(attr_name, [0, int(self.num_samples)]) + attr_val[idx] = np.median(attr_val_seq[idx]) + + elif self.system == 'sequencer': + position = self.get_position() + screen_intensity = np.zeros(self.sensors_device_num) + for i in range(self.sensors_device_num): + screen_position = position[self.sensors_device_num * i:self.sensors_device_num * i + 2] + target_position = self.target_position[self.sensors_device_num * i:self.sensors_device_num * i + 2] + difference = screen_position - target_position + distance = np.sqrt(np.power(difference, 2)) + if any(distance > 0.1): + screen_intensity[i] = 0.0 + else: + den = 2 * np.power(0.04, 2) + screen_intensity[i] = np.exp(-np.sum(np.power(difference, 2)) / den) + attr_val = screen_intensity + #''' + elif self.system == 'eos': + attr_val = np.zeros(self.sensors_size) + attr_val_seq = np.zeros((self.sensors_size, self.num_samples)) + idx = 0 + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + # idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + attr_val_seq[idx] = dev.command_inout(attr_name, [0, int(self.num_samples)]) + attr_val[idx] = np.median(attr_val_seq[idx]) + idx += 1 + #''' + return attr_val + + def get_intensity(self): + self.check_charge() + attr_val = self.get_sensors() + intensity = np.prod(attr_val) + self.intensity = intensity + return intensity + + def get_image(self): + self.check_charge() + attr_val = [] + for i in range(self.spectrometer_device_num): + dev = self.spectrometer_device_list[i] + for j in range(self.spectrometer_device_attr_num[i]): + # idx = self.spectrometer_device_num * i + j + attr_name = self.spectrometer_device_attr_list[i][j] + attr_val.append(dev.read_attribute(attr_name).value) + return attr_val[0] + + def get_security_check(self): + attr_val = [] + for i in range(self.security_device_num): + dev = self.security_device_list[i] + for j in range(self.spectrometer_device_attr_num[i]): + # idx = self.security_device_num * i + j + attr_name = self.security_device_attr_list[i][j] + attr_val.append(dev.read_attribute(attr_name).value) + return attr_val[0] + + def check_charge(self): + if self.system == 'fel': + #if self.system in ['eos', 'fel']: + # print('\nSECURITY CHECK\n') + flag = 0 + charge = self.get_security_check() + #while charge < 100.: + while charge < self.security_threshold: + flag = 1 + print('\nwait...\n') + time.sleep(5) + charge = self.get_security_check() + + if flag: + print('FEL is coming back!\nWait 1 minute more...\n') + time.sleep(60) + + + +if __name__ == '__main__': + + # sequencer + # system = 'sequencer' + # path = '/home/niky/PycharmProjects/FERMI/devel/sequencer_new/configuration/' + + # fel + system = 'eos' + path = '/home/niky/FERMI/2020_07_20/configuration/' + conf_file = 'conf_'+system+'.json' + + filename = path+conf_file + + tng = TangoConnection(conf_file=filename) + diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/AE_TRPO_stable_tests.py b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/AE_TRPO_stable_tests.py new file mode 100644 index 0000000..6460de3 --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/AE_TRPO_stable_tests.py @@ -0,0 +1,1720 @@ +import os +import pickle +from datetime import datetime + +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import tensorflow as tf + +from laser_trajectory_control_env import LaserTrajectoryControlEnv +from tango_connection import TangoConnection + +# set random seed +random_seed = 111 +np.random.seed(random_seed) + + +conf_file = '/home/niky/FERMI/2020_10_06/configuration/conf_fel2.json' +tango = TangoConnection(conf_file=conf_file) +real_env = LaserTrajectoryControlEnv(tango=tango) + +# Todo: Add info to buffer and test ensemble advantage +# We will take the avarage cumulative reward and the entropy of the trajectories as an observable, +# as well as the reward on the measured data during the data collection + + +# For Niky Hyper parameters: +#------------------------------------------------------ +steps_per_env = 15 +init_random_steps = 50 +# num_epochs = int((300 - init_random_steps) / (steps_per_env)) + 1 +num_epochs = 20 +# num_epochs = int((125-init_random_steps)/(steps_per_env)) +print('Number of epoches: ', num_epochs) + +hidden_sizes = [100, 100] +cr_lr = 1e-3 +gamma = 0.9999 +lam = 0.95 + +max_training_iterations = 50 + +delay_before_convergence_check = 1 + +# number_envs = 1 +critic_iter = 40 +delta = 0.01 +algorithm = 'TRPO' +conj_iters = 15 +minibatch_size = 100 +simulated_steps = 1000 + +mb_lr = 1e-3 +model_batch_size = 5 # train the dynamics model + +num_ensemble_models = 3 +model_iter = 25 + +# How often to check the progress of the network training +# e.g. lambda it, episode: (it + 1) % max(3, (ep+1)*2) == 0 +# dynamic_wait_time = lambda it, ep: True # (it + 1) % 1 == 0 # +dynamic_wait_time = lambda it, ep: (it + 1) % 2 == 0 # + +# Set the priors for the anchor method: +# TODO: How to set these correctly? +init_params = dict(init_stddev_1_w=np.sqrt(1), + init_stddev_1_b=np.sqrt(1), + init_stddev_2_w=1 / np.sqrt(100)) + +data_noise = 1e-4 # estimated noise variance + +lambda_anchor = data_noise / (np.array([init_params['init_stddev_1_w'], + init_params['init_stddev_1_b'], + init_params['init_stddev_2_w']]) ** 2) + +training_steps_model = 4000 + +# method = 'random' +method = 'pessimistic' + +# Create the logging directory: +project_directory = 'Data_logging/AE_TRPO/at_1/' + +#------------------------------------------------------ +#------------------------------------------------------ +#------------------------------------------------------ + + +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-d_' + str(delta) + '-conj_iters_' + str(conj_iters) + '-n_ep_' + str(num_epochs) + \ + '-mini_bs_' + str(minibatch_size) + '-m_bs_' + str(model_batch_size) + \ + '-mb_lr_' + str(mb_lr) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + '-ensnr_' + str(num_ensemble_models) + '-init_' + str( + init_random_steps) + '/' +project_directory = project_directory + hyp_str_all + +# To label the plots: +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-d_' + str(delta) + '-conj_iters_' + str(conj_iters) + '-n_ep_' + str(num_epochs) + \ + '\n-mini_bs_' + str(minibatch_size) + '-m_bs_' + str(model_batch_size) + \ + '-mb_lr_' + str(mb_lr) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + \ + '\n-ensnr_' + str(num_ensemble_models) +if not os.path.isdir(project_directory): + os.makedirs(project_directory) + print("created folder : ", project_directory) + + +# Class for data storage during the tests +class TrajectoryBuffer(): + '''Class for data storage during the tests''' + + def __init__(self, name, directory): + self.save_frequency = 100000 + self.directory = directory + self.name = name + self.rews = [] + self.obss = [] + self.acts = [] + self.dones = [] + self.info = "" + self.idx = -1 + + def new_trajectory(self, obs): + self.idx += 1 + self.rews.append([]) + self.acts.append([]) + self.obss.append([]) + self.dones.append([]) + self.store_step(obs=obs) + + def store_step(self, obs=None, act=None, rew=None, done=None): + self.rews[self.idx].append(rew) + self.obss[self.idx].append(obs) + self.acts[self.idx].append(act) + self.dones[self.idx].append(done) + + if self.__len__() % self.save_frequency == 0: + self.save_buffer() + + def __len__(self): + assert (len(self.rews) == len(self.obss) == len(self.acts) == len(self.dones)) + return len(self.obss) + + def save_buffer(self, **kwargs): + if 'info' in kwargs: + self.info = kwargs.get('info') + now = datetime.now() + # clock_time = "{}_{}_{}_{}_".format(now.day, now.hour, now.minute, now.second) + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}_' + data = dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + # print('saving...', data) + out_put_writer = open(self.directory + clock_time + self.name, 'wb') + pickle.dump(data, out_put_writer, -1) + # pickle.dump(self.actions, out_put_writer, -1) + out_put_writer.close() + + def get_data(self): + return dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + + +class MonitoringEnv(gym.Wrapper): + ''' + Gym Wrapper to store information for scaling to correct scpace and for post analysis. + ''' + + def __init__(self, env, **kwargs): + gym.Wrapper.__init__(self, env) + self.data_dict = dict() + self.environment_usage = 'default' + self.directory = project_directory + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(self.environment_usage) + + self.test_env_flag = False + if 'test_env' in kwargs: + self.test_env_flag = True + + def reset(self, **kwargs): + init_obs = self.env.reset(**kwargs) + # print('Reset Env: ', (init_obs),10*'-- ') + self.current_buffer.new_trajectory(init_obs) + init_obs = self.scale_state_env(init_obs) + # print('Reset Menv: ', (init_obs)) + return init_obs + + def step(self, action): + # print('a', action) + action = self.descale_action_env(action) + # print('as', action) + ob, reward, done, info = self.env.step(action) + # print('Env: ', reward) + # print('Env: ', ob, 'r:', reward, done) + self.current_buffer.store_step(obs=ob, act=action, rew=reward, done=done) + ob = self.scale_state_env(ob) + reward = self.rew_scale(reward) + # if done: + # print('Menv: ', ob, 'r:', reward, done) + # print('Menv: ', reward) + return ob, reward, done, info + + def set_usage(self, usage): + self.environment_usage = usage + if usage in self.data_dict: + self.current_buffer = self.data_dict.get(usage) + else: + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(usage) + + def close_usage(self, usage): + # Todo: Implement to save complete data + self.current_buffer = self.data_dict.get(usage) + self.current_buffer.save_buffer() + + def scale_state_env(self, ob): + scale = (self.env.observation_space.high - self.env.observation_space.low) + return (2 * ob - (self.env.observation_space.high + self.env.observation_space.low)) / scale + + def descale_action_env(self, act): + scale = (self.env.action_space.high - self.env.action_space.low) + return (scale * act + self.env.action_space.high + self.env.action_space.low) / 2 + + def rew_scale(self, rew): + # we only scale for the network training: + if not self.test_env_flag: + rew = rew * 2 + 1 + # if self.test_env_flag: + # '''Rescale reward from [-1,0] to [-1,1] for the training of the network in case of tests''' + # rew = rew * 2 + 1 + # if rew < -1: + # print('Hallo was geht: ', rew) + # else: + # print('Okay...', rew) + return rew + + def save_current_buffer(self, info=''): + self.current_buffer = self.data_dict.get(self.environment_usage) + self.current_buffer.save_buffer(info=info) + print('Saved current buffer', self.environment_usage) + + def set_directory(self, directory): + self.directory = directory + + +env_monitored = MonitoringEnv(env=real_env) + + +def make_env(**kwargs): + '''Create the environement''' + return MonitoringEnv(env=real_env, **kwargs) + + +def mlp(x, hidden_layers, output_layer, activation, last_activation=None, init_params=dict()): + ''' + Multi-layer perceptron with init conditions for anchor method + ''' + if init_params: + layer_1_w = tf.layers.Dense(hidden_layers[0], + activation=tf.tanh, # try using tf.nn.relu, tf.erf, tf.nn.tanh etc. + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=init_params.get( + 'init_stddev_1_w')), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=init_params.get( + 'init_stddev_1_b'))) + + layer_1 = layer_1_w.apply(x) + + layer_2_w = tf.layers.Dense(hidden_layers[1], + activation=tf.tanh, # try using tf.nn.relu, tf.erf, tf.nn.tanh etc. + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=init_params.get( + 'init_stddev_1_w')), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=init_params.get( + 'init_stddev_1_b'))) + + layer_2 = layer_2_w.apply(layer_1) + + output_w = tf.layers.Dense(output_layer, + activation=tf.tanh, + use_bias=False, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=init_params.get( + 'init_stddev_2_w'))) + + return output_w.apply(layer_2) + else: + for l in hidden_layers: + x = tf.layers.dense(x, units=l, activation=activation) + return tf.layers.dense(x, units=output_layer, activation=last_activation) + + +def softmax_entropy(logits): + ''' + Softmax Entropy + ''' + return -tf.reduce_sum(tf.nn.softmax(logits, axis=-1) * tf.nn.log_softmax(logits, axis=-1), axis=-1) + + +def gaussian_log_likelihood(ac, mean, log_std): + ''' + Gaussian Log Likelihood + ''' + log_p = ((ac - mean) ** 2 / (tf.exp(log_std) ** 2 + 1e-9) + 2 * log_std) + np.log(2 * np.pi) + return -0.5 * tf.reduce_sum(log_p, axis=-1) + + +def conjugate_gradient(A, b, x=None, iters=10): + ''' + Conjugate gradient method: approximate the solution of Ax=b + It solve Ax=b without forming the full matrix, just compute the matrix-vector product (The Fisher-vector product) + NB: A is not the full matrix but is a useful matrix-vector product between the averaged Fisher information matrix and arbitrary vectors + Descibed in Appendix C.1 of the TRPO paper + ''' + if x is None: + x = np.zeros_like(b) + + r = A(x) - b + p = -r + for _ in range(iters): + a = np.dot(r, r) / (np.dot(p, A(p)) + 1e-8) + x += a * p + r_n = r + a * A(p) + b = np.dot(r_n, r_n) / (np.dot(r, r) + 1e-8) + p = -r_n + b * p + r = r_n + return x + + +def gaussian_DKL(mu_q, log_std_q, mu_p, log_std_p): + ''' + Gaussian KL divergence in case of a diagonal covariance matrix + ''' + return tf.reduce_mean(tf.reduce_sum( + 0.5 * (log_std_p - log_std_q + tf.exp(log_std_q - log_std_p) + (mu_q - mu_p) ** 2 / tf.exp(log_std_p) - 1), + axis=1)) + + +def backtracking_line_search(Dkl, delta, old_loss, p=0.8): + ''' + Backtracking line searc. It look for a coefficient s.t. the constraint on the DKL is satisfied + It has both to + - improve the non-linear objective + - satisfy the constraint + ''' + ## Explained in Appendix C of the TRPO paper + a = 1 + it = 0 + + new_dkl, new_loss = Dkl(a) + while (new_dkl > delta) or (new_loss > old_loss): + a *= p + it += 1 + new_dkl, new_loss = Dkl(a) + + return a + + +def GAE(rews, v, v_last, gamma=0.99, lam=0.95): + ''' + Generalized Advantage Estimation + ''' + assert len(rews) == len(v) + vs = np.append(v, v_last) + d = np.array(rews) + gamma * vs[1:] - vs[:-1] + gae_advantage = discounted_rewards(d, 0, gamma * lam) + return gae_advantage + + +def discounted_rewards(rews, last_sv, gamma): + ''' + Discounted reward to go + Parameters: + ---------- + rews: list of rewards + last_sv: value of the last state + gamma: discount value + ''' + rtg = np.zeros_like(rews, dtype=np.float64) + rtg[-1] = rews[-1] + gamma * last_sv + for i in reversed(range(len(rews) - 1)): + rtg[i] = rews[i] + gamma * rtg[i + 1] + return rtg + + +def flatten_list(tensor_list): + ''' + Flatten a list of tensors + ''' + return tf.concat([flatten(t) for t in tensor_list], axis=0) + + +def flatten(tensor): + ''' + Flatten a tensor + ''' + return tf.reshape(tensor, shape=(-1,)) + + +def test_agent(env_test, agent_op, num_games=10): + ''' + Test an agent 'agent_op', 'num_games' times + Return mean and std + ''' + games_r = [] + games_length = [] + games_dones = [] + for _ in range(num_games): + d = False + game_r = 0 + o = env_test.reset() + game_length = 0 + while not d: + a_s, _ = agent_op([o]) + o, r, d, _ = env_test.step(a_s) + game_r += r + game_length += 1 + + games_r.append(game_r) + games_length.append(game_length) + games_dones.append(d) + return np.mean(games_r), np.std(games_r), np.mean(games_length), np.mean(games_dones) + + +class Buffer(): + ''' + Class to store the experience from a unique policy + ''' + + def __init__(self, gamma=0.99, lam=0.95): + self.gamma = gamma + self.lam = lam + self.adv = [] + self.ob = [] + self.ac = [] + self.rtg = [] + + def store(self, temp_traj, last_sv): + ''' + Add temp_traj values to the buffers and compute the advantage and reward to go + Parameters: + ----------- + temp_traj: list where each element is a list that contains: observation, reward, action, state-value + last_sv: value of the last state (Used to Bootstrap) + ''' + # store only if there are temporary trajectories + if len(temp_traj) > 0: + self.ob.extend(temp_traj[:, 0]) + rtg = discounted_rewards(temp_traj[:, 1], last_sv, self.gamma) + self.adv.extend(GAE(temp_traj[:, 1], temp_traj[:, 3], last_sv, self.gamma, self.lam)) + self.rtg.extend(rtg) + self.ac.extend(temp_traj[:, 2]) + + def get_batch(self): + # standardize the advantage values + norm_adv = (self.adv - np.mean(self.adv)) / (np.std(self.adv) + 1e-10) + return np.array(self.ob), np.array(np.expand_dims(self.ac, -1)), np.array(norm_adv), np.array(self.rtg) + + def __len__(self): + assert (len(self.adv) == len(self.ob) == len(self.ac) == len(self.rtg)) + return len(self.ob) + + +class FullBuffer(): + def __init__(self): + self.rew = [] + self.obs = [] + self.act = [] + self.nxt_obs = [] + self.done = [] + + self.train_idx = [] + self.valid_idx = [] + self.full_idx = [] + self.idx = 0 + + def store(self, obs, act, rew, nxt_obs, done): + self.rew.append(rew) + self.obs.append(obs) + self.act.append(act) + self.nxt_obs.append(nxt_obs) + self.done.append(done) + + self.idx += 1 + + def generate_random_dataset(self): + rnd = np.arange(len(self.obs)) + np.random.shuffle(rnd) + # proportion for training vs validation + self.valid_idx = rnd[: int(len(self.obs) / 3)] + self.train_idx = rnd[int(len(self.obs) / 3):] + print('Train set:', len(self.train_idx), 'Valid set:', len(self.valid_idx)) + + def get_training_batch(self): + return np.array(self.obs)[self.train_idx], np.array(np.expand_dims(self.act, -1))[self.train_idx], \ + np.array(self.rew)[self.train_idx], np.array(self.nxt_obs)[self.train_idx], np.array(self.done)[ + self.train_idx] + + def get_valid_batch(self): + return np.array(self.obs)[self.valid_idx], np.array(np.expand_dims(self.act, -1))[self.valid_idx], \ + np.array(self.rew)[self.valid_idx], np.array(self.nxt_obs)[self.valid_idx], np.array(self.done)[ + self.valid_idx] + + def get_full_batch(self): + rnd = np.arange(len(self.obs)) + np.random.shuffle(rnd) + self.full_idx = rnd + return np.array(self.obs)[self.full_idx], np.array(np.expand_dims(self.act, -1))[self.full_idx], \ + np.array(self.rew)[self.full_idx], np.array(self.nxt_obs)[self.full_idx], np.array(self.done)[ + self.full_idx] + + def __len__(self): + assert (len(self.rew) == len(self.obs) == len(self.act) == len(self.nxt_obs) == len(self.done)) + return len(self.obs) + + +def simulate_environment(env, policy, simulated_steps): + '''Lists to store rewards and length of the trajectories completed''' + buffer = Buffer(0.99, 0.95) + steps = 0 + number_episodes = 0 + + while steps < simulated_steps: + temp_buf = [] + obs = env.reset() + number_episodes += 1 + done = False + + while not done: + act, val = policy([obs]) + + obs2, rew, done, _ = env.step([act]) + + temp_buf.append([obs.copy(), rew, np.squeeze(act), np.squeeze(val)]) + + obs = obs2.copy() + steps += 1 + + if done: + buffer.store(np.array(temp_buf), 0) + temp_buf = [] + + if steps == simulated_steps: + break + + buffer.store(np.array(temp_buf), np.squeeze(policy([obs])[1])) + + print('Sim ep:', number_episodes, end=' \n') + + return buffer.get_batch(), number_episodes + + +class NetworkEnv(gym.Wrapper): + ''' + Wrapper to handle the network interaction + ''' + + def __init__(self, env, model_func=None, done_func=None, number_models=1, uncertainty_method='random'): + gym.Wrapper.__init__(self, env) + + self.uncertainty_method = uncertainty_method + if self.uncertainty_method not in ['random', 'pessimistic']: + self.uncertainty_method = 'random' + print('switched to', self.uncertainty_method, ' since ', uncertainty_method, ' is unknown.') + self.model_func = model_func + self.done_func = done_func + self.number_models = number_models + self.len_episode = 0 + # self.threshold = self.env.threshold + # print('the threshold is: ', self.threshold) + self.max_steps = env.max_steps + + def reset(self, **kwargs): + + # self.threshold = -0.05 * 2 + 1 # rescaled [-1,1] + self.len_episode = 0 + self.done = False + # kwargs['simulation'] = True + # action = self.env.reset(**kwargs) + if self.model_func is not None: + obs = np.random.uniform(-1, 1, self.env.observation_space.shape) + # print('reset', obs) + # Todo: remove + obs = self.env.reset() + # obs = self.env.reset() + else: + # obs = self.env.reset(**kwargs) + pass + # Does this work? + # self.obs = np.clip(obs, -1.0, 1.0) + self.obs = obs.copy() + # if self.test_phase: + # print('test reset', self.obs) + # print('Reset : ',self.obs) + return self.obs + + def step(self, action): + if self.model_func is not None: + # predict the next state on a random model + + if self.uncertainty_method == 'pessimistic': + obss = [] + rews = [] + for model_idx in range(num_ensemble_models): + obs, rew = self.model_func(self.obs, [np.squeeze(action)], model_idx) + obss.append(obs) + rews.append(rew) + idx = np.argmin(rew) + # idx = np.argmax(rew) + obs = obss[idx].copy() + rew = rews[idx] + else: + obs, rew = self.model_func(self.obs, [np.squeeze(action)], np.random.randint(0, self.number_models)) + self.obs = np.clip(obs.copy(), -1.0, 1.0) + # obs_real, rew_real, _, _ = self.env.step(action) + # # + # print('Diff: ', np.linalg.norm(obs - obs_real), np.linalg.norm(rew - rew_real)) + # print('Diff: ', action, obs, obs_real, rew, rew_real) + # obs += np.random.randn(obs.shape[-1]) + # # Todo: remove + # self.env.state = self.obs + # done = rew > self.threshold + + self.len_episode += 1 + # print('threshold at:', self.threshold) + # For Niky: hardcoded reward + if rew > 0.9: # self.threshold: TODO: to be changed + self.done = True + # print("Done", rew) + if self.len_episode >= self.max_steps: + self.done = True + + # if self.done: + # print('NetEnv:', self.obs, rew) + return self.obs, (rew - 1) / 2, self.done, "" + else: + # self.obs, rew, done, _ = real_env.step(action) + # return self.obs, rew, done, "" + pass + # return env.step(action) + + +class StructEnv(gym.Wrapper): + ''' + Gym Wrapper to store information like number of steps and total reward of the last espisode. + ''' + + def __init__(self, env): + gym.Wrapper.__init__(self, env) + self.n_obs = self.env.reset() + self.total_rew = 0 + self.len_episode = 0 + + def reset(self, **kwargs): + self.n_obs = self.env.reset(**kwargs) + self.total_rew = 0 + self.len_episode = 0 + return self.n_obs.copy() + + def step(self, action): + ob, reward, done, info = self.env.step(action) + # print('reward in struct', reward) + self.total_rew += reward + self.len_episode += 1 + return ob, reward, done, info + + def get_episode_reward(self): + return self.total_rew + + def get_episode_length(self): + return self.len_episode + + +def restore_model(old_model_variables, m_variables): + # variable used as index for restoring the actor's parameters + it_v2 = tf.Variable(0, trainable=False) + + restore_m_params = [] + for m_v in m_variables: + upd_m_rsh = tf.reshape(old_model_variables[it_v2: it_v2 + tf.reduce_prod(m_v.shape)], shape=m_v.shape) + restore_m_params.append(m_v.assign(upd_m_rsh)) + it_v2 += tf.reduce_prod(m_v.shape) + + return tf.group(*restore_m_params) + + +def METRPO(env_name, hidden_sizes=[32, 32], cr_lr=5e-3, num_epochs=50, gamma=0.99, lam=0.95, number_envs=1, + critic_iter=10, steps_per_env=100, delta=0.05, algorithm='TRPO', conj_iters=10, minibatch_size=1000, + mb_lr_start=0.0001, model_batch_size=512, simulated_steps=1000, num_ensemble_models=2, model_iter=15, + init_random_steps=steps_per_env, method='random'): + ''' + Model Ensemble Trust Region Policy Optimization + The states and actions are provided by the gym environement with the correct boxs. + The reward has to be between [-1,0]. + Parameters: + ----------- + env_name: Name of the environment + hidden_sizes: list of the number of hidden units for each layer + cr_lr: critic learning rate + num_epochs: number of training epochs + gamma: discount factor + lam: lambda parameter for computing the GAE + number_envs: number of "parallel" synchronous environments + # NB: it isn't distributed across multiple CPUs + critic_iter: Number of SGD iterations on the critic per epoch + steps_per_env: number of steps per environment + # NB: the total number of steps per epoch will be: steps_per_env*number_envs + delta: Maximum KL divergence between two policies. Scalar value + algorithm: type of algorithm. Either 'TRPO' or 'NPO' + conj_iters: number of conjugate gradient iterations + minibatch_size: Batch size used to train the critic + mb_lr: learning rate of the environment model + model_batch_size: batch size of the environment model + simulated_steps: number of simulated steps for each policy update + num_ensemble_models: number of models + model_iter: number of iterations without improvement before stopping training the model + ''' + # TODO: add ME-TRPO hyperparameters + + tf.reset_default_graph() + + # Create a few environments to collect the trajectories + + # envs = [StructEnv(gym.make(env_name)) for _ in range(number_envs)] + envs = [StructEnv(make_env()) for _ in range(number_envs)] + env_test = StructEnv(make_env(test_env=True)) + # env_test = gym.make(env_name) + print('env_test' * 4) + + # env_test = make_env(test=True) + # env_test = gym.wrappers.Monitor(env_test, "VIDEOS/", force=True, video_callable=lambda x: x%10 == 0) + # to be changed in real test + # env_test = FelLocalEnv(tango=tango) + # env_test.test = True + # env_test_1 = FelLocalEnv(tango=tango) + # env_test_1.test = True + + # If the scaling is not perfomed this has to be changed + low_action_space = -1 # envs[0].action_space.low + high_action_space = 1 # envs[0].action_space.high + + obs_dim = envs[0].observation_space.shape + act_dim = envs[0].action_space.shape[0] + + # print(envs[0].action_space, envs[0].observation_space, low_action_space, + # high_action_space) + + # Placeholders for model + act_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float64, name='act') + obs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float64, name='obs') + # NEW + nobs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float64, name='nobs') + rew_ph = tf.placeholder(shape=(None, 1), dtype=tf.float64, name='rew') + + ret_ph = tf.placeholder(shape=(None,), dtype=tf.float64, name='ret') + adv_ph = tf.placeholder(shape=(None,), dtype=tf.float64, name='adv') + old_p_log_ph = tf.placeholder(shape=(None,), dtype=tf.float64, name='old_p_log') + old_mu_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float64, name='old_mu') + old_log_std_ph = tf.placeholder(shape=(act_dim), dtype=tf.float64, name='old_log_std') + p_ph = tf.placeholder(shape=(None,), dtype=tf.float64, name='p_ph') + + # Placeholder for learning rate + mb_lr_ = tf.placeholder("float", None) # , name='mb_lr') + + # result of the conjugate gradient algorithm + cg_ph = tf.placeholder(shape=(None,), dtype=tf.float64, name='cg') + + ######################################################### + ######################## POLICY ######################### + ######################################################### + + old_model_variables = tf.placeholder(shape=(None,), dtype=tf.float64, name='old_model_variables') + + # Neural network that represent the policy + with tf.variable_scope('actor_nn'): + p_means = mlp(obs_ph, hidden_sizes, act_dim, tf.tanh, last_activation=tf.tanh) + p_means = tf.clip_by_value(p_means, low_action_space, high_action_space) + log_std = tf.get_variable(name='log_std', initializer=np.ones(act_dim, dtype=np.float64)) + + # Neural network that represent the value function + with tf.variable_scope('critic_nn'): + s_values = mlp(obs_ph, hidden_sizes, 1, tf.tanh, last_activation=None) + s_values = tf.squeeze(s_values) + + # Add "noise" to the predicted mean following the Gaussian distribution with standard deviation e^(log_std) + p_noisy = p_means + tf.random_normal(tf.shape(p_means), 0, 1, dtype=tf.float64) * tf.exp(log_std) + # Clip the noisy actions + a_sampl = tf.clip_by_value(p_noisy, low_action_space, high_action_space) + + # Compute the gaussian log likelihood + p_log = gaussian_log_likelihood(act_ph, p_means, log_std) + + # Measure the divergence + diverg = tf.reduce_mean(tf.exp(old_p_log_ph - p_log)) + + # ratio + ratio_new_old = tf.exp(p_log - old_p_log_ph) + # TRPO surrogate loss function + p_loss = - tf.reduce_mean(ratio_new_old * adv_ph) + + # MSE loss function + v_loss = tf.reduce_mean((ret_ph - s_values) ** 2) + # Critic optimization + v_opt = tf.train.AdamOptimizer(cr_lr).minimize(v_loss) + + def variables_in_scope(scope): + # get all trainable variables in 'scope' + return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) + + # Gather and flatten the actor parameters + p_variables = variables_in_scope('actor_nn') + p_var_flatten = flatten_list(p_variables) + + # Gradient of the policy loss with respect to the actor parameters + p_grads = tf.gradients(p_loss, p_variables) + p_grads_flatten = flatten_list(p_grads) + + ########### RESTORE ACTOR PARAMETERS ########### + p_old_variables = tf.placeholder(shape=(None,), dtype=tf.float64, name='p_old_variables') + # variable used as index for restoring the actor's parameters + it_v1 = tf.Variable(0, trainable=False) + restore_params = [] + + for p_v in p_variables: + upd_rsh = tf.reshape(p_old_variables[it_v1: it_v1 + tf.reduce_prod(p_v.shape)], shape=p_v.shape) + restore_params.append(p_v.assign(upd_rsh)) + it_v1 += tf.reduce_prod(p_v.shape) + + restore_params = tf.group(*restore_params) + + # gaussian KL divergence of the two policies + dkl_diverg = gaussian_DKL(old_mu_ph, old_log_std_ph, p_means, log_std) + + # Jacobian of the KL divergence (Needed for the Fisher matrix-vector product) + dkl_diverg_grad = tf.gradients(dkl_diverg, p_variables) + + dkl_matrix_product = tf.reduce_sum(flatten_list(dkl_diverg_grad) * p_ph) + print('dkl_matrix_product', dkl_matrix_product.shape) + # Fisher vector product + # The Fisher-vector product is a way to compute the A matrix without the need of the full A + Fx = flatten_list(tf.gradients(dkl_matrix_product, p_variables)) + + ## Step length + beta_ph = tf.placeholder(shape=(), dtype=tf.float64, name='beta') + # NPG update + npg_update = beta_ph * cg_ph + + ## alpha is found through line search + alpha = tf.Variable(1., trainable=False, dtype=tf.float64) + # TRPO update + trpo_update = alpha * npg_update + + #################### POLICY UPDATE ################### + # variable used as an index + it_v = tf.Variable(0, trainable=False) + p_opt = [] + # Apply the updates to the policy + for p_v in p_variables: + print(p_v) + upd_rsh = tf.reshape(trpo_update[it_v: it_v + tf.reduce_prod(p_v.shape)], shape=p_v.shape) + p_opt.append(p_v.assign_sub(upd_rsh)) + it_v += tf.reduce_prod(p_v.shape) + + p_opt = tf.group(*p_opt) + + ######################################################### + ######################### MODEL ######################### + ######################################################### + + # Create a new class for the model: + # NN class + class NN: + def __init__(self, x, y, y_dim, hidden_size, n, learning_rate, init_params): + self.init_params = init_params + + # set up NN + with tf.variable_scope('model_' + str(n) + '_nn'): + self.inputs = x + self.y_target = y + + self.layer_1_w = tf.layers.Dense(hidden_size, + activation=tf.tanh, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_w')), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_b'))) + + self.layer_1 = self.layer_1_w.apply(self.inputs) + + self.layer_2_w = tf.layers.Dense(hidden_size, + activation=tf.tanh, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_w')), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_b'))) + + self.layer_2 = self.layer_2_w.apply(self.layer_1) + + self.output_w = tf.layers.Dense(y_dim, + activation=tf.tanh, + use_bias=False, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_2_w'))) + # self.layer_1_w = tf.layers.Dense(hidden_size, + # activation=tf.tanh + # ) + # + # self.layer_1 = self.layer_1_w.apply(self.inputs) + # + # self.layer_2_w = tf.layers.Dense(hidden_size, + # activation=tf.tanh) + # + # self.layer_2 = self.layer_2_w.apply(self.layer_1) + # + # self.output_w = tf.layers.Dense(y_dim, + # activation=tf.tanh) + # # + + self.output = self.output_w.apply(self.layer_2) + + # set up loss and optimiser - we'll modify this later with anchoring regularisation + self.opt_method = tf.train.AdamOptimizer(learning_rate) + self.mse_ = 1 / tf.shape(self.inputs, out_type=tf.int64)[0] * \ + tf.reduce_sum(tf.square(self.y_target - self.output)) + self.loss_ = 1 / tf.shape(self.inputs, out_type=tf.int64)[0] * \ + tf.reduce_sum(tf.square(self.y_target - self.output)) + self.optimizer = self.opt_method.minimize(self.loss_) + self.optimizer_mse = self.opt_method.minimize(self.mse_) + # return self.mse_ + + def get_weights(self): + '''method to return current params''' + + ops = [self.layer_1_w.kernel, self.layer_1_w.bias, + self.layer_2_w.kernel, self.layer_2_w.bias, + self.output_w.kernel] + w1, b1, w2, b2, w = sess.run(ops) + + return w1, b1, w2, b2, w + + # def set_weights(self, variables): + # model_assign(self.n, initial_variables_models[model_idx]) + + def anchor(self, lambda_anchor): + '''regularise around initialised parameters''' + + w1, b1, w2, b2, w = self.get_weights() + + # get initial params to hold for future trainings + self.w1_init, self.b1_init, self.w2_init, self.b2_init, self.w_out_init = w1, b1, w2, b2, w + + loss_anchor = lambda_anchor[0] * tf.reduce_sum(tf.square(self.w1_init - self.layer_1_w.kernel)) + loss_anchor += lambda_anchor[1] * tf.reduce_sum(tf.square(self.b1_init - self.layer_1_w.bias)) + + loss_anchor = lambda_anchor[0] * tf.reduce_sum(tf.square(self.w2_init - self.layer_2_w.kernel)) + loss_anchor += lambda_anchor[1] * tf.reduce_sum(tf.square(self.b2_init - self.layer_2_w.bias)) + + loss_anchor += lambda_anchor[2] * tf.reduce_sum(tf.square(self.w_out_init - self.output_w.kernel)) + + # combine with original loss + # norm_val = 1/tf.shape(self.inputs)[0] + self.loss_ = self.loss_ + tf.scalar_mul(1 / tf.shape(self.inputs)[0], loss_anchor) + # self.loss_ = self.loss_ + tf.scalar_mul(1 / 1000, loss_anchor) + self.optimizer = self.opt_method.minimize(self.loss_) + return self.optimizer, self.loss_ + + m_opts = [] + m_losses = [] + + nobs_pred_m = [] + act_obs = tf.concat([obs_ph, act_ph], 1) + target = tf.concat([nobs_ph, rew_ph], 1) + + # computational graph of N models and the correct losses for the anchor method + m_classes = [] + for i in range(num_ensemble_models): + # TODO: Add variable size of network + hidden_sizes = [100, 100] + nobs_pred = mlp(x=act_obs, hidden_layers=hidden_sizes, output_layer=obs_dim[0] + 1, + activation=tf.tanh, last_activation=tf.tanh) # , init_params=init_params) + m_class = NN(x=tf.concat([obs_ph, act_ph], 1), y=tf.concat([nobs_ph, rew_ph], 1), y_dim=obs_dim[0] + 1, + learning_rate=1e-3, n=i, + hidden_size=100, init_params=init_params) + + nobs_pred = m_class.output + + nobs_pred_m.append(nobs_pred) + + # m_loss = tf.reduce_mean((tf.concat([nobs_ph, rew_ph], 1) - nobs_pred) ** 2) + # m_opts.append(tf.train.AdamOptimizer(learning_rate=mb_lr_).minimize(m_loss)) + # m_losses.append(m_loss) + + m_classes.append(m_class) + # m_losses.append(m_class.mse_) + # m_opts.append(m_class.optimizer_mse) + + ##################### RESTORE MODEL ###################### + initialize_models = [] + models_variables = [] + for i in range(num_ensemble_models): + m_variables = variables_in_scope('model_' + str(i) + '_nn') + # print('m_variables', m_variables) + initialize_models.append(restore_model(old_model_variables, m_variables)) + # List of weights as numpy + models_variables.append(flatten_list(m_variables)) + + ######################################################### + ##################### END MODEL ######################### + ######################################################### + # Time + now = datetime.now() + clock_time = "{}_{}_{}_{}".format(now.day, now.hour, now.minute, now.second) + print('Time:', clock_time) + + # Set scalars and hisograms for TensorBoard + tf.summary.scalar('p_loss', p_loss, collections=['train']) + tf.summary.scalar('v_loss', v_loss, collections=['train']) + tf.summary.scalar('p_divergence', diverg, collections=['train']) + tf.summary.scalar('ratio_new_old', tf.reduce_mean(ratio_new_old), collections=['train']) + tf.summary.scalar('dkl_diverg', dkl_diverg, collections=['train']) + tf.summary.scalar('alpha', alpha, collections=['train']) + tf.summary.scalar('beta', beta_ph, collections=['train']) + tf.summary.scalar('p_std_mn', tf.reduce_mean(tf.exp(log_std)), collections=['train']) + tf.summary.scalar('s_values_mn', tf.reduce_mean(s_values), collections=['train']) + tf.summary.histogram('p_log', p_log, collections=['train']) + tf.summary.histogram('p_means', p_means, collections=['train']) + tf.summary.histogram('s_values', s_values, collections=['train']) + tf.summary.histogram('adv_ph', adv_ph, collections=['train']) + tf.summary.histogram('log_std', log_std, collections=['train']) + scalar_summary = tf.summary.merge_all('train') + + tf.summary.scalar('old_v_loss', v_loss, collections=['pre_train']) + tf.summary.scalar('old_p_loss', p_loss, collections=['pre_train']) + pre_scalar_summary = tf.summary.merge_all('pre_train') + + hyp_str = '-spe_' + str(steps_per_env) + '-envs_' + str(number_envs) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-delta_' + str(delta) + '-conj_iters_' + str(conj_iters) + + file_writer = tf.summary.FileWriter('log_dir/' + env_name + '/' + algorithm + '_' + clock_time + '_' + hyp_str, + tf.get_default_graph()) + + ################################################################################################# + # Session start!!!!!!!! + # create a session + sess = tf.Session() + # initialize the variables + sess.run(tf.global_variables_initializer()) + + def action_op(o): + return sess.run([p_means, s_values], feed_dict={obs_ph: o}) + + def action_op_noise(o): + return sess.run([a_sampl, s_values], feed_dict={obs_ph: o}) + + def model_op(o, a, md_idx): + mo = sess.run(nobs_pred_m[md_idx], feed_dict={obs_ph: [o], act_ph: [a[0]]}) + return np.squeeze(mo[:, :-1]), np.squeeze(mo[:, -1]) + + def run_model_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew): + # print({'obs_ph': r_obs.shape, 'act_ph': r_act.shape, 'nobs_ph': r_nxt_obs.shape}) + r_act = np.squeeze(r_act, axis=2) + # print(r_act.shape) + r_rew = np.reshape(r_rew, (-1, 1)) + # print(r_rew.shape) + return_val = sess.run(m_loss_anchor[model_idx], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew}) + # return_val = sess.run(m_losses[model_idx], + # feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew}) + return return_val + + def run_model_opt_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew, mb_lr): + r_act = np.squeeze(r_act, axis=2) + r_rew = np.reshape(r_rew, (-1, 1)) + # return sess.run([m_opts[model_idx], m_losses[model_idx]], + # feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr}) + return sess.run([m_opts_anchor[model_idx], m_loss_anchor[model_idx]], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr}) + + def model_assign(i, model_variables_to_assign): + ''' + Update the i-th model's parameters + ''' + return sess.run(initialize_models[i], feed_dict={old_model_variables: model_variables_to_assign}) + + # def anchor(model_idx): + # m_classes[model_idx].anchor(lambda_anchor=lambda_anchor) + + def policy_update(obs_batch, act_batch, adv_batch, rtg_batch, it): + # log probabilities, logits and log std of the "old" policy + # "old" policy refer to the policy to optimize and that has been used to sample from the environment + act_batch = np.squeeze(act_batch, axis=2) + old_p_log, old_p_means, old_log_std = sess.run([p_log, p_means, log_std], + feed_dict={obs_ph: obs_batch, act_ph: act_batch, + adv_ph: adv_batch, ret_ph: rtg_batch}) + # get also the "old" parameters + old_actor_params = sess.run(p_var_flatten) + if it < 1: + std_vals = sess.run([log_std], feed_dict={log_std: np.ones(act_dim)}) + # print(std_vals) + # old_p_loss is later used in the line search + # run pre_scalar_summary for a summary before the optimization + old_p_loss, summary = sess.run([p_loss, pre_scalar_summary], + feed_dict={obs_ph: obs_batch, act_ph: act_batch, adv_ph: adv_batch, + ret_ph: rtg_batch, old_p_log_ph: old_p_log}) + file_writer.add_summary(summary, step_count) + + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def H_f(p): + ''' + Run the Fisher-Vector product on 'p' to approximate the Hessian of the DKL + ''' + return sess.run(Fx, + feed_dict={old_mu_ph: old_p_means, old_log_std_ph: old_log_std, p_ph: p, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, ret_ph: rtg_batch}) + + g_f = sess.run(p_grads_flatten, + feed_dict={old_mu_ph: old_p_means, obs_ph: obs_batch, act_ph: act_batch, adv_ph: adv_batch, + ret_ph: rtg_batch, old_p_log_ph: old_p_log}) + ## Compute the Conjugate Gradient so to obtain an approximation of H^(-1)*g + # Where H in reality isn't the true Hessian of the KL divergence but an approximation of it computed via Fisher-Vector Product (F) + conj_grad = conjugate_gradient(H_f, g_f, iters=conj_iters) + + # Compute the step length + beta_np = np.sqrt(2 * delta / (1e-10 + np.sum(conj_grad * H_f(conj_grad)))) + + def DKL(alpha_v): + ''' + Compute the KL divergence. + It optimize the function to compute the DKL. Afterwards it restore the old parameters. + ''' + sess.run(p_opt, feed_dict={beta_ph: beta_np, alpha: alpha_v, cg_ph: conj_grad, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, old_p_log_ph: old_p_log}) + a_res = sess.run([dkl_diverg, p_loss], + feed_dict={old_mu_ph: old_p_means, old_log_std_ph: old_log_std, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, ret_ph: rtg_batch, + old_p_log_ph: old_p_log}) + sess.run(restore_params, feed_dict={p_old_variables: old_actor_params}) + return a_res + + # Actor optimization step + # Different for TRPO or NPG + # Backtracing line search to find the maximum alpha coefficient s.t. the constraint is valid + best_alpha = backtracking_line_search(DKL, delta, old_p_loss, p=0.8) + sess.run(p_opt, feed_dict={beta_ph: beta_np, alpha: best_alpha, + cg_ph: conj_grad, obs_ph: obs_batch, act_ph: act_batch, + adv_ph: adv_batch, old_p_log_ph: old_p_log}) + + lb = len(obs_batch) + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + # Value function optimization steps + for _ in range(critic_iter): + # shuffle the batch on every iteration + np.random.shuffle(shuffled_batch) + for idx in range(0, lb, minibatch_size): + minib = shuffled_batch[idx:min(idx + minibatch_size, lb)] + sess.run(v_opt, feed_dict={obs_ph: obs_batch[minib], ret_ph: rtg_batch[minib]}) + + def train_model(tr_obs, tr_act, tr_nxt_obs, tr_rew, v_obs, v_act, v_nxt_obs, v_rew, + f_obs, f_act, f_nxt_obs, f_rew, + step_count, model_idx, mb_lr): + + # Get validation loss on the old model only used for monitoring + mb_valid_loss1 = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + # Restore the initial random weights to have a new, clean neural network + # initial_variables_models - list stored before already in the code below - + # important for the anchor method + model_assign(model_idx, initial_variables_models[model_idx]) + + # update the anchor model losses: + # m_opts_anchor = [] + # m_loss_anchor = [] + # for i in range(num_ensemble_models): + # This stores the anchor weights to the models for the regularisation: + # opt, loss = m_classes[model_idx].anchor(lambda_anchor=lambda_anchor) + # m_opts_anchor[model_idx] = opt + # m_loss_anchor[model_idx] = loss + + # Get validation loss on the now initialized model + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + acc_m_losses = [] + last_m_losses = [] + md_params = sess.run(models_variables[model_idx]) + best_mb = {'iter': 0, 'loss': mb_valid_loss, 'params': md_params} + iteration = 0 + + # Create mini-batch for training + lb = len(f_obs) + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + # Run until the number of model_iter has passed from the best val loss at it on... + for iteration in range(0, training_steps_model): + + # update the model on each mini-batch + last_m_losses = [] + # for idx in range(0, 20): # repeat gradient step for some time + # minib = shuffled_batch[idx:min(idx + minibatch_size, lb)] + # want all data (shuffeld) + # np.random.shuffle(shuffled_batch) + minib = shuffled_batch + if len(minib) != minibatch_size: + _, ml = run_model_opt_loss(model_idx, f_obs[minib], f_act[minib], f_nxt_obs[minib], + f_rew[minib], mb_lr=mb_lr) + acc_m_losses.append(ml) + last_m_losses.append(ml) + else: + pass + # print('Warning!') + + # Check if the loss on the validation set has improved + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + # if mb_valid_loss < best_mb['loss']: + best_mb['loss'] = mb_valid_loss + best_mb['iter'] = iteration + # store the parameters to the array + best_mb['params'] = sess.run(models_variables[model_idx]) + + # it += 1 + + # if it>=10000: + # break + # print('iteration: ', it) + + # Restore the model with the lower validation loss + model_assign(model_idx, best_mb['params']) + + print('Model:{}, iter:{} -- Old Val loss:{:.6f} ' + 'New Val loss:{:.6f} -- New Train loss:{:.6f}'.format(model_idx, iteration, + mb_valid_loss1, best_mb['loss'], + np.mean(last_m_losses))) + summary = tf.Summary() + summary.value.add(tag='supplementary/m_loss', simple_value=np.mean(acc_m_losses)) + summary.value.add(tag='supplementary/iterations', simple_value=iteration) + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def plot_results(env_wrapper, label, **kwargs): + # plotting + print('now plotting...') + rewards = env_wrapper.env.current_buffer.get_data()['rews'] + + # states = env_wrapper.env.current_buffer.get_data()['obss'] + # pd.DataFrame(states).hist() + # plt.show() + + # initial_states = env.initial_conditions + + iterations = [] + finals = [] + means = [] + stds = [] + + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 1): + # finals.append(rewards[i][len(rewards[i]) - 1]) + finals.append(rewards[i][-1]) + means.append(np.mean(rewards[i][1:])) + stds.append(np.std(rewards[i][1:])) + iterations.append(len(rewards[i])) + # print(iterations) + x = range(len(iterations)) + iterations = np.array(iterations) + finals = np.array(finals) + means = np.array(means) + stds = np.array(stds) + + plot_suffix = label # , Fermi time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.plot(x, iterations) + ax.set_ylabel('Iterations (1)') + ax.set_title(plot_suffix) + # fig.suptitle(label, fontsize=12) + if 'data_number' in kwargs: + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(x, kwargs.get('data_number'), color=color) + + ax = axs[1] + color = 'blue' + ax.set_ylabel('Final reward', color=color) # we already handled the x-label with ax1 + ax.tick_params(axis='y', labelcolor=color) + ax.plot(x, finals, color=color) + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.fill_between(x, means - stds, means + stds, + alpha=0.5, edgecolor=color, facecolor='#FF9848') + ax1.plot(x, means, color=color) + + # ax.set_ylim(ax1.get_ylim()) + if 'save_name' in kwargs: + plt.savefig(kwargs.get('save_name') + '.pdf') + # fig.tight_layout() + plt.show() + + def plot_observables(data, label, **kwargs): + """plot observables during the test""" + + sim_rewards_all = np.array(data.get('sim_rewards_all')) + step_counts_all = np.array(data.get('step_counts_all')) + batch_rews_all = np.array(data.get('batch_rews_all')) + tests_all = np.array(data.get('tests_all')) + + fig, axs = plt.subplots(2, 1, sharex=True) + x = np.arange(len(batch_rews_all[0])) + ax = axs[0] + ax.step(x, batch_rews_all[0]) + ax.fill_between(x, batch_rews_all[0] - batch_rews_all[1], batch_rews_all[0] + batch_rews_all[1], + alpha=0.5) + ax.set_ylabel('rews per batch') + + ax.set_title(label) + + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('data points', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.step(x, step_counts_all, color=color) + + ax = axs[1] + ax.grid(True) + ax.plot(sim_rewards_all[0], ls=':') + ax.fill_between(x, sim_rewards_all[0] - sim_rewards_all[1], sim_rewards_all[0] + sim_rewards_all[1], + alpha=0.5) + + try: + ax.plot(tests_all[0]) + ax.fill_between(x, tests_all[0] - tests_all[1], tests_all[0] + tests_all[1], + alpha=0.5) + except: + pass + + ax.set_ylabel('rewards tests') + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('entropy', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.plot(entropy_all, color=color) + + plt.show() + + def save_data(data, **kwargs): + '''logging functon''' + # if 'directory_name' in kwargs: + # project_directory = kwargs.get('directory_name') + now = datetime.now() + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}' + out_put_writer = open(project_directory + clock_time + '_training_observables', 'wb') + pickle.dump(data, out_put_writer, -1) + out_put_writer.close() + + # variable to store the total number of steps + step_count = 0 + model_buffer = FullBuffer() + print('Env batch size:', steps_per_env, ' Batch size:', steps_per_env * number_envs) + + # Create a simulated environment + sim_env = NetworkEnv(make_env(), model_op, None, number_models=num_ensemble_models, uncertainty_method=method) + + # ------------------------------------------------------------------------------------------------------ + # -------------------------------------Try to set correct anchors--------------------------------------- + # Get the initial parameters of each model + # These are used in later epochs when we aim to re-train the models anew with the new dataset + initial_variables_models = [] + for model_var in models_variables: + # num list of parameters + initial_variables_models.append(sess.run(model_var)) + + # update the anchor model losses: + m_opts_anchor = [] + m_loss_anchor = [] + for i in range(num_ensemble_models): + # This stores the anchor weights to the models for the regularisation: + opt, loss = m_classes[i].anchor(lambda_anchor=lambda_anchor) + m_opts_anchor.append(opt) + m_loss_anchor.append(loss) + + # ------------------------------------------------------------------------------------------------------ + # -------------------------------------Try to set correct anchors--------------------------------------- + + total_iterations = 0 + + converged_flag = False + # save_data = save_data(clock_time) + sim_rewards_all = [] + sim_rewards_std_all = [] + entropy_all = [] + tests_all = [] + tests_std_all = [] + batch_rews_all = [] + batch_rews_std_all = [] + step_counts_all = [] + + + + for ep in range(num_epochs): + if (converged_flag): + print('Converged!!!!') + break + # lists to store rewards and length of the trajectories completed + batch_rew = [] + batch_len = [] + print('============================', ep, '============================') + # Execute in serial the environment, storing temporarily the trajectories. + for env in envs: + # Todo: Test randomization stronger if reward lower...we need a good scheme + # target_threshold ????? + init_log_std = np.ones(act_dim) * np.log(np.random.rand() * 1) + env.reset() + + # iterate over a fixed number of steps + steps_train = init_random_steps if ep == 0 else steps_per_env + # steps_train = steps_per_env + for _ in range(steps_train): + # found = False + # while not(found): + # run the policy + + if ep == 0: + # Sample random action during the first epoch + act = np.random.uniform(-1, 1, size=env.action_space.shape[-1]) + + else: + + act = sess.run(a_sampl, feed_dict={obs_ph: [env.n_obs], log_std: init_log_std}) + # act = np.clip(act + np.random.randn(act.shape[0], act.shape[1]) * 0.1, -1, 1) + + act = np.squeeze(act) + # print('act', act*12) + # take a step in the environment + obs2, rew, done, _ = env.step(np.array(act)) + + # add the new transition to the temporary buffer + model_buffer.store(env.n_obs.copy(), act, rew.copy(), obs2.copy(), done) + + env.n_obs = obs2.copy() + step_count += 1 + + if done: + batch_rew.append(env.get_episode_reward()) + batch_len.append(env.get_episode_length()) + + env.reset() + init_log_std = np.ones(act_dim) * np.log(np.random.rand() * 1) + + # if ep == 0: + # # try: + # # Initialize randomly a training and validation set + # model_buffer.generate_random_dataset() + # # get both datasets + # train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + # valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + # target_threshold = max(max(valid_rew), max(train_rew)) + # # print('-- '*38, target_threshold) + # found = target_threshold>=-0.1 and step_count>=191 + # # except: + # # pass + + # save the data for plotting the collected data for the model + env.save_current_buffer() + + print('Ep:%d Rew:%.2f -- Step:%d' % (ep, np.mean(batch_rew), step_count)) + + # env_test.env.set_usage('default') + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + ############################################################ + ###################### MODEL LEARNING ###################### + ############################################################ + + # Initialize randomly a training and validation set + model_buffer.generate_random_dataset() + + # get both datasets + train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + full_obs, full_act, full_rew, full_nxt_obs, _ = model_buffer.get_full_batch() + + std_vals = sess.run(log_std) + print('Log Std policy:', std_vals, np.mean(std_vals)) + + target_threshold = max(max(valid_rew), max(train_rew)) + sim_env.threshold = target_threshold # min(target_threshold, -0.05) + print('maximum: ', sim_env.threshold) + + # Learning rate as function of ep + lr = lambda ep: 1e-3 - ep / num_epochs * (1e-3 - 5e-4) + mb_lr = 1e-3 # if ep < 1 else 1e-3 + # simulated_steps = simulated_steps if ep<10 else 10000 + print('mb_lr: ', mb_lr) + for i in range(num_ensemble_models): + # train the dynamic model on the datasets just sampled + train_model(train_obs, train_act, train_nxt_obs, train_rew, valid_obs, valid_act, valid_nxt_obs, valid_rew, + full_obs, full_act, full_nxt_obs, full_rew, + step_count, i, mb_lr=mb_lr) + + ############################################################ + ###################### POLICY LEARNING ###################### + ############################################################ + + best_sim_test = -1e16 * np.ones(num_ensemble_models) + + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + + # # For Niky: perform default test! ----------------------------- + # if ep == 0: + # it = 0 + # # perform test! ----------------------------- + # env_test.env.set_usage('test') + # mn_test, mn_test_std, mn_length, mn_success = test_agent(env_test, action_op, num_games=50) + # # perform test! ----------------------------- + # label = f'Init {total_iterations}, ' + \ + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + # f'ep: {ep}, it: {it}\n' + hyp_str_all + # # plot results of test ----------------------------- + # plot_results(env_test, label=label) + + env_test.env.set_usage('default') + + for it in range(max_training_iterations): + if converged_flag: + break + total_iterations += 1 + print('\t Policy it', it, end='..') + + ##################### MODEL SIMLUATION ##################### + # obs_batch, act_batch, adv_batch, rtg_batch = simulate_environment(sim_env, action_op_noise, simulated_steps) + batch, ep_length = simulate_environment(sim_env, action_op_noise, simulated_steps) + # verification_simulate_environment(sim_env, env_test, action_op_noise, 50) + obs_batch, act_batch, adv_batch, rtg_batch = batch + + ################# TRPO UPDATE ################ + policy_update(obs_batch, act_batch, adv_batch, rtg_batch, it) + std_vals = sess.run(log_std) + print('Log Std policy inner:', np.mean(std_vals)) + if np.mean(std_vals) < -5: + converged_flag = True + # Testing the policy on a real environment + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=1) + # plot_results(env_test, 'ME-TRPO') + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), np.round(mn_length, 2)) + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=1) + # summary = tf.Summary() + # summary.value.add(tag='test/performance', simple_value=mn_test) + # file_writer.add_summary(summary, step_count) + # file_writer.flush() + + # Test the policy on simulated environment. + # dynamic_wait_time_count = dynamic_wait_time(ep) + if dynamic_wait_time(it, ep): + + print('Iterations: ', total_iterations) + label = f'Total {total_iterations}, ' + \ + f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + f'ep: {ep}, it: {it}\n' + hyp_str_all + + # # For Niky: perform test! ----------------------------- + # env_test.env.set_usage('test') + # mn_test, mn_test_std, mn_length, mn_success = test_agent(env_test, action_op, num_games=50) + # # perform test! ----------------------------- + # + # # plot results of test ----------------------------- + # plot_results(env_test, label=label) + # + # # saving test buffer ----------------------------- + # # env_test.save_current_buffer(info=label) + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), + # np.round(mn_length, 2), np.round(mn_success, 2)) + # + # # save the data for plotting the tests + # tests_all.append(mn_test) + # tests_std_all.append(mn_test_std) + # + # # perform test end! ----------------------------- + # + # + + env_test.env.set_usage('default') + + # Start of test on the simulation: + print('Simulated test:', end=' ** ') + + sim_rewards = [] + for i in range(num_ensemble_models): + sim_m_env = NetworkEnv(make_env(), model_op, None, i + 1) + mn_sim_rew, _, _, _ = test_agent(sim_m_env, action_op, num_games=25) + mn_sim_rew = np.round(mn_sim_rew, 2) + sim_rewards.append(mn_sim_rew) + print(mn_sim_rew, end=' ** ') + + print("") + # Stop of test on the simulation + + # collecting/summarizing the data for plotting the progress ------------------- + entropy_all.append(np.mean(std_vals)) + step_counts_all.append(step_count) + + sim_rewards = np.array(sim_rewards) + sim_rewards_all.append(np.mean(sim_rewards)) + sim_rewards_std_all.append(np.std(sim_rewards)) + + batch_rews_all.append(np.mean(batch_rew)) + batch_rews_std_all.append(np.std(batch_rew)) + + data = dict(sim_rewards_all=[sim_rewards_all, sim_rewards_std_all], + entropy_all=entropy_all, + step_counts_all=step_counts_all, + batch_rews_all=[batch_rews_all, batch_rews_std_all], + tests_all=[tests_all, tests_std_all], # data of test with real env + info=label) + + # save the data for plotting the progress ------------------- + save_data(data=data) + + # plotting the progress ------------------- + plot_observables(data=data, label=label) + + # stop training if the policy hasn't improved + if np.sum(best_sim_test >= sim_rewards) > int(num_ensemble_models * 2/3): + # or (len(sim_rewards[sim_rewards >= 990]) > int(num_ensemble_models * 0.7)): + if it > delay_before_convergence_check and ep < num_epochs - 1: + # Test the entropy measure as convergence criterion + # if np.diff(entropy_all)[-1] < 0: + # print('break') + # break + print('Break...') + break + else: + best_sim_test = sim_rewards + # Final verification: + # env_final = FelLocalEnv(tango=tango) + # env_final.test = True + # env.TOTAL_COUNTER = len(model_buffer.train_idx) + len(model_buffer.valid_idx) + # mn_test, mn_test_std, mn_length = test_agent(env_final, action_op, num_games=100) + # plot_results(env_final, 'ME-TRPO', save_name='Fermi') + + + # For Niky: perform final test! ----------------------------- + env_test.env.set_usage('final') + mn_test, mn_test_std, mn_length, _ = test_agent(env_test, action_op, num_games=50) + + label = f'Verification : total {total_iterations}, ' + \ + f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + f'ep: {ep}, it: {it}\n' + \ + f'rew: {mn_test}, std: {mn_test_std}' + plot_results(env_test, label=label) + + env_test.save_current_buffer(info=label) + + # For Niky: end final test! ----------------------------- + + # env_test.env.set_usage('default') + + # closing environments.. + for env in envs: + env.close() + file_writer.close() + + +if __name__ == '__main__': + METRPO('', hidden_sizes=hidden_sizes, cr_lr=cr_lr, gamma=gamma, lam=lam, num_epochs=num_epochs, + steps_per_env=steps_per_env, + number_envs=1, critic_iter=critic_iter, delta=delta, algorithm='AETRPO', conj_iters=conj_iters, + minibatch_size=minibatch_size, + mb_lr_start=mb_lr, model_batch_size=model_batch_size, simulated_steps=simulated_steps, + num_ensemble_models=num_ensemble_models, model_iter=model_iter, init_random_steps=init_random_steps, + method=method) + # plot the results + +# important notes: +# Storage +# Hyperparameters +# Scaling + +# Changes: +# No init steps and less step per env 31 instead of 51 and the number of iterations is dynamic diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/Last_run.txt b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/Last_run.txt new file mode 100644 index 0000000..7ac041b --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/Last_run.txt @@ -0,0 +1,3826 @@ +(tf1) niky@pc-hptest:~/FERMI/2020_10_06$ python -i ME_TRPO_stable.py +Number of epoches: 28 +created folder : Data_logging/ME_TRPO_stable/mts_5/-nr_steps_15-cr_lr0.0001-crit_it_15-d_0.05-conj_iters_15-n_ep_28-mini_bs_500-m_bs_5-mb_lr_0.001-sim_steps_2000-m_iter_15-ensnr_9-init_45/ +WARNING:tensorflow:From ME_TRPO_stable.py:652: The name tf.reset_default_graph is deprecated. Please use tf.compat.v1.reset_default_graph instead. + +env_testenv_testenv_testenv_test +WARNING:tensorflow:From ME_TRPO_stable.py:681: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:707: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:246: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version. +Instructions for updating: +Use keras.layers.Dense instead. +WARNING:tensorflow:From /home/niky/anaconda3/envs/tf1/lib/python3.7/site-packages/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +Please use `layer.__call__` method instead. +WARNING:tensorflow:From ME_TRPO_stable.py:710: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:718: The name tf.random_normal is deprecated. Please use tf.random.normal instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:736: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead. + +WARNING:tensorflow:From /home/niky/anaconda3/envs/tf1/lib/python3.7/site-packages/tensorflow_core/python/ops/math_grad.py:1375: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use tf.where in 2.0, which has the same broadcast rule as np.where +WARNING:tensorflow:From ME_TRPO_stable.py:740: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:740: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead. + +WARNING:tensorflow:Variable += will be deprecated. Use variable.assign_add if you want assignment to the variable value or 'x = x + y' if you want a new python Tensor object. +dkl_matrix_product () + +WARNING:tensorflow:Variable += will be deprecated. Use variable.assign_add if you want assignment to the variable value or 'x = x + y' if you want a new python Tensor object. + + + + + + +WARNING:tensorflow:Variable += will be deprecated. Use variable.assign_add if you want assignment to the variable value or 'x = x + y' if you want a new python Tensor object. +Time: 7_8_35_59 +WARNING:tensorflow:From ME_TRPO_stable.py:944: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:953: The name tf.summary.histogram is deprecated. Please use tf.compat.v1.summary.histogram instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:958: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:967: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:968: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead. + +WARNING:tensorflow:From ME_TRPO_stable.py:973: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead. + +2020-10-07 08:36:00.111724: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 +2020-10-07 08:36:00.141857: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.142324: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: +name: Quadro K2200 major: 5 minor: 0 memoryClockRate(GHz): 1.124 +pciBusID: 0000:01:00.0 +2020-10-07 08:36:00.142473: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0 +2020-10-07 08:36:00.143326: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0 +2020-10-07 08:36:00.144046: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0 +2020-10-07 08:36:00.144227: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0 +2020-10-07 08:36:00.145134: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0 +2020-10-07 08:36:00.145803: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0 +2020-10-07 08:36:00.147902: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 +2020-10-07 08:36:00.147991: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.148499: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.148775: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1746] Adding visible gpu devices: 0 +2020-10-07 08:36:00.149010: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA +2020-10-07 08:36:00.153045: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3399905000 Hz +2020-10-07 08:36:00.153238: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x556c62e2b9e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: +2020-10-07 08:36:00.153250: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version +2020-10-07 08:36:00.153353: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.153687: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: +name: Quadro K2200 major: 5 minor: 0 memoryClockRate(GHz): 1.124 +pciBusID: 0000:01:00.0 +2020-10-07 08:36:00.153718: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0 +2020-10-07 08:36:00.153730: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0 +2020-10-07 08:36:00.153742: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0 +2020-10-07 08:36:00.153752: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0 +2020-10-07 08:36:00.153763: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0 +2020-10-07 08:36:00.153774: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0 +2020-10-07 08:36:00.153785: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 +2020-10-07 08:36:00.153826: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.154302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.154621: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1746] Adding visible gpu devices: 0 +2020-10-07 08:36:00.154648: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0 +2020-10-07 08:36:00.178604: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix: +2020-10-07 08:36:00.178626: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165] 0 +2020-10-07 08:36:00.178632: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1178] 0: N +2020-10-07 08:36:00.178766: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.179110: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.179348: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero +2020-10-07 08:36:00.179570: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1304] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3611 MB memory) -> physical GPU (device: 0, name: Quadro K2200, pci bus id: 0000:01:00.0, compute capability: 5.0) +2020-10-07 08:36:00.180848: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x556c64af3290 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: +2020-10-07 08:36:00.180863: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Quadro K2200, Compute Capability 5.0 +WARNING:tensorflow:From ME_TRPO_stable.py:975: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead. + +Env batch size: 15 Batch size: 15 +============================ 0 ============================ +step 1 state [0.77383333 0.50233333 0.12733333 0.5935 ] a [-0.05515504 -0.01065683 0.04487708 -0.03411245] r -0.6816039691269542 +step 2 state [0.71533333 0.42266667 0.114 0.54983333] a [-0.05847284 -0.07958695 -0.01329592 -0.04355298] r -0.501393114884196 +step 3 state [0.68816667 0.50433333 0.07016667 0.48 ] a [-0.0270573 0.08178541 -0.04371226 -0.06980122] r -0.9128878775933583 +step 4 state [0.71633333 0.5245 0.0325 0.47433333] a [ 0.02826671 0.02020715 -0.03762441 -0.00562977] r -0.9759565177879035 +step 5 state [0.65266667 0.45333333 0.09916667 0.52316667] a [-0.06360537 -0.07100707 0.0667957 0.04899376] r -0.8748885249722018 +step 6 state [0.70933333 0.50583333 0.18083333 0.536 ] a [0.05676161 0.05253458 0.08182581 0.01287897] r -0.5346316189510176 +step 7 state [0.7615 0.49266667 0.102 0.52833333] a [ 0.05229449 -0.01311369 -0.07875867 -0.00764389] r -0.9172962220812412 +step 8 state [0.69566667 0.5455 0.13483333 0.53916667] a [-0.06577899 0.05287001 0.03295462 0.0108809 ] r -0.8179269583976627 +step 9 state [0.658 0.6285 0.0745 0.55833333] a [-0.03762878 0.08307896 -0.06032632 0.01923595] r -0.969162616887803 +step 10 state [0.6555 0.61266667 0.11233333 0.52866667] a [-0.00249395 -0.01582883 0.03797485 -0.02953101] r -0.9252142583957406 +step 1 state [0.87783333 0.66233333 0.50683333 0.0915 ] a [-0.03017227 0.07453697 0.06978473 0.0523074 ] r -0.551275124233533 +step 2 state [0.80016667 0.73616667 0.58183333 0.1425 ] a [-0.07765382 0.07389548 0.07507319 0.05109851] r -0.3753260232485953 +step 3 state [0.797 0.81383333 0.56783333 0.11216667] a [-0.00312033 0.0777932 -0.01383682 -0.03019331] r -0.5258031834702579 +step 4 state [0.71633333 0.73683333 0.49333333 0.05 ] a [-0.08059286 -0.07696968 -0.07448506 -0.06200653] r -0.878114533122619 +step 5 state [0.6385 0.69116667 0.5005 0. ] a [-0.07776947 -0.04559648 0.00731815 -0.05356237] r -0.9610608243614431 +step 6 state [0.585 0.63266667 0.531 0. ] a [-0.05344254 -0.0583978 0.03050219 -0.00516524] r -0.9687439237407531 +step 7 state [0.62966667 0.66066667 0.52116667 0.01583333] a [ 0.04470799 0.02813568 -0.00978864 0.01587091] r -0.9378423702963535 +step 8 state [0.62316667 0.73783333 0.46233333 0.0835 ] a [-0.00639056 0.07721115 -0.0586981 0.06780219] r -0.897203053158666 +step 9 state [0.62266667 0.75316667 0.493 0.02966667] a [-0.00034586 0.01541641 0.03077633 -0.05375822] r -0.9524381835104636 +step 10 state [0.68333333 0.75016667 0.42116667 0.007 ] a [ 0.06078091 -0.0029852 -0.07177637 -0.02254478] r -0.9705203439039172 +step 1 state [0.99183333 0.52066667 0.22816667 1. ] a [-0.00465608 -0.05341919 -0.05412003 0.07485493] r -0.9825766806281452 +step 2 state [1. 0.45966667 0.29933333 1. ] a [ 0.02594705 -0.06083408 0.07129764 0.05696527] r -0.9866056246376356 +step 3 state [0.92533333 0.41016667 0.36133333 1. ] a [-0.07453733 -0.04938608 0.06208361 0.02897468] r -0.9824016759297428 +step 4 state [0.8915 0.44916667 0.381 0.95166667] a [-0.03378791 0.03906885 0.01972338 -0.04825013] r -0.9683969427442473 +step 5 state [0.94183333 0.37566667 0.349 0.90283333] a [ 0.0503441 -0.07348215 -0.03196278 -0.04877208] r -0.9153130915596717 +step 6 state [1. 0.44383333 0.37833333 0.83866667] a [ 0.07869847 0.06823643 0.02940903 -0.06413565] r -0.6226464027510876 +step 7 state [1. 0.37566667 0.39983333 0.90566667] a [ 0.07633011 -0.06804778 0.02163274 0.06715009] r -0.8689537118976468 +step 8 state [0.97316667 0.401 0.47066667 0.8845 ] a [-0.02670045 0.02546738 0.07085975 -0.02100275] r -0.9044087326577651 +step 9 state [1. 0.467 0.49066667 0.82633333] a [ 0.04522736 0.06602745 0.02002968 -0.05815665] r -0.6126709855370955 +step 10 state [0.979 0.4895 0.48383333 0.83166667] a [-0.02084671 0.02264009 -0.00682154 0.00537303] r -0.719468412659382 +step 1 state [0.08116667 0.98483333 0.40333333 0.737 ] a [ 0.02664366 -0.00391276 -0.00756751 -0.06253375] r -0.23053131005136673 +step 2 state [0.0665 0.947 0.37766667 0.816 ] a [-0.01458796 -0.03781738 -0.02554319 0.07906238] r -0.38547296633061623 +step 3 state [0.019 0.97733333 0.2995 0.87183333] a [-0.04740324 0.03047382 -0.07803167 0.05589716] r -0.8446434483418115 +step 4 state [0.0525 1. 0.31466667 0.82466667] a [ 0.03360872 0.0776573 0.0153027 -0.0471091 ] r -0.7092699308722978 +step 5 state [0.089 0.92166667 0.301 0.78816667] a [ 0.03653771 -0.07818834 -0.01354867 -0.03646248] r -0.5060205486437506 +step 6 state [0.02333333 0.90933333 0.35766667 0.72316667] a [-0.06562594 -0.01227563 0.05676292 -0.06493144] r -0.22735535513949556 +step 7 state [0.10433333 0.86483333 0.3945 0.74983333] a [ 0.08110874 -0.04445111 0.03690193 0.02681534] r -0.14465352301940793 +step 8 state [0.03383333 0.89766667 0.35533333 0.81666667] a [-0.07037388 0.03288072 -0.03911563 0.06697306] r -0.2627195171992097 +step 9 state [0.08683333 0.88083333 0.40866667 0.76616667] a [ 0.05304398 -0.01679447 0.0533809 -0.05036415] r -0.10553877983114468 +step 10 state [0.0905 0.86616667 0.4795 0.82033333] a [ 0.00375418 -0.01458753 0.07097231 0.05423008] r -0.10801581832432772 +step 1 state [0.33916667 0.91033333 0.28566667 0.60433333] a [-0.06386851 -0.08140128 0.06885841 0.00206625] r -0.5123042051448277 +step 2 state [0.39633333 0.84266667 0.33616667 0.53133333] a [ 0.05718035 -0.06752555 0.0505711 -0.07296197] r -0.2732587212968628 +step 3 state [0.41883333 0.92066667 0.319 0.55783333] a [ 0.02258241 0.07813019 -0.01709463 0.0265978 ] r -0.38733061280175163 +step 4 state [0.37116667 0.9855 0.4 0.5975 ] a [-0.04752221 0.0649776 0.08110527 0.0397791 ] r -0.17291911278483563 +step 5 state [0.39466667 0.94416667 0.39583333 0.55883333] a [ 0.02358111 -0.04130727 -0.00403184 -0.03865722] r -0.193970785104694 +Saved current buffer default +Ep:0 Rew:-4.11 -- Step:45 +Train set: 36 Valid set: 9 +Log Std policy: [1. 1. 1. 1.] 1.0 +maximum: 0.7889224403377106 +mb_lr: 0.001 +2020-10-07 08:37:05.145852: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0 +Model:0, iter:26 -- Old Val loss:0.474854 New Val loss:0.042130 -- New Train loss:0.025215 +WARNING:tensorflow:From ME_TRPO_stable.py:1152: The name tf.Summary is deprecated. Please use tf.compat.v1.Summary instead. + +Model:1, iter:25 -- Old Val loss:0.429767 New Val loss:0.029031 -- New Train loss:0.012858 +Model:2, iter:21 -- Old Val loss:0.475154 New Val loss:0.028172 -- New Train loss:0.015545 +Model:3, iter:22 -- Old Val loss:0.494809 New Val loss:0.027610 -- New Train loss:0.016759 +Model:4, iter:21 -- Old Val loss:0.412139 New Val loss:0.034971 -- New Train loss:0.014043 +Model:5, iter:18 -- Old Val loss:0.428813 New Val loss:0.042377 -- New Train loss:0.015077 +Model:6, iter:22 -- Old Val loss:0.377994 New Val loss:0.029579 -- New Train loss:0.024371 +Model:7, iter:24 -- Old Val loss:0.490942 New Val loss:0.040260 -- New Train loss:0.024728 +Model:8, iter:29 -- Old Val loss:0.278464 New Val loss:0.053221 -- New Train loss:0.018404 + Policy it 0..Sim ep: 212 +Log Std policy inner: 0.99750084 + Policy it 1..Sim ep: 207 +Log Std policy inner: 1.0082792 + Policy it 2..Sim ep: 215 +Log Std policy inner: 0.97984093 +Iterations: 3 +Simulated test: ** -5.813951534347143 ** -5.4350458119159155 ** -4.5849144306522795 ** -4.7916208831878615 ** -4.812536174015549 ** -4.98183790332987 ** -4.389322760710493 ** -4.480410088832723 ** -4.444944402886322 ** + Policy it 3..Sim ep: 217 +Log Std policy inner: 0.9189714 + Policy it 4..Sim ep: 217 +Log Std policy inner: 0.8443409 + Policy it 5..Sim ep: 217 +Log Std policy inner: 0.74179363 +Iterations: 6 +Simulated test: ** -4.80905131772699 ** -3.4353258156655646 ** -3.9230968470239898 ** -3.682867781899986 ** -3.3803388585677023 ** -3.373952116947621 ** -3.5064990504435265 ** -3.7577251352986787 ** -3.520490172005666 ** + Policy it 6..Sim ep: 223 +Log Std policy inner: 0.63291514 + Policy it 7..Sim ep: 223 +Log Std policy inner: 0.5307928 + Policy it 8..Sim ep: 241 +Log Std policy inner: 0.43903077 +Iterations: 9 +Simulated test: ** -4.350149533637014 ** -2.595009361169068 ** -3.2949609169061294 ** -2.6912391276485867 ** -2.947102738520625 ** -3.132098125780758 ** -2.6767915086069842 ** -2.848493107750546 ** -2.719590749454219 ** + Policy it 9..Sim ep: 238 +Log Std policy inner: 0.34795994 + Policy it 10..Sim ep: 241 +Log Std policy inner: 0.28147912 + Policy it 11..Sim ep: 252 +Log Std policy inner: 0.22793701 +Iterations: 12 +Simulated test: ** -4.445698681476934 ** -2.796153685345489 ** -2.7351853413549545 ** -2.62563913162885 ** -2.346199430981069 ** -2.6019813301420074 ** -2.4186448338610353 ** -2.7036700334957278 ** -2.477746718698181 ** + Policy it 12..Sim ep: 241 +Log Std policy inner: 0.15786104 + Policy it 13..Sim ep: 236 +Log Std policy inner: 0.10983217 + Policy it 14..Sim ep: 241 +Log Std policy inner: 0.092568964 +Iterations: 15 +Simulated test: ** -4.442844196392253 ** -2.5933409737993496 ** -2.416870782780461 ** -2.223882289406611 ** -2.2650687457225285 ** -2.288969077602378 ** -2.0437484242615755 ** -1.979220107368892 ** -2.3076159590733005 ** + Policy it 15..Sim ep: 255 +Log Std policy inner: 0.041406915 + Policy it 16..Sim ep: 244 +Log Std policy inner: -0.001682654 + Policy it 17..Sim ep: 249 +Log Std policy inner: -0.0632378 +Iterations: 18 +Simulated test: ** -4.6094849921180865 ** -2.6579150981345445 ** -2.2242233906534965 ** -2.299147743405192 ** -1.9656244806881296 ** -2.1541195743053687 ** -2.168244373906637 ** -2.130592123712995 ** -2.0662274413224075 ** + Policy it 18..Sim ep: 251 +Log Std policy inner: -0.099277586 + Policy it 19..Sim ep: 251 +Log Std policy inner: -0.15458497 + Policy it 20..Sim ep: 246 +Log Std policy inner: -0.1820266 +Iterations: 21 +Simulated test: ** -4.440477866844449 ** -1.9975197314168327 ** -2.3047749877907338 ** -2.0569594959320967 ** -2.0100735856452956 ** -2.2583842558780454 ** -2.071751360089984 ** -1.9148221116838977 ** -1.940669871901191 ** + Policy it 21..Sim ep: 245 +Log Std policy inner: -0.28026158 + Policy it 22..Sim ep: 254 +Log Std policy inner: -0.3130417 + Policy it 23..Sim ep: 249 +Log Std policy inner: -0.3294967 +Iterations: 24 +Simulated test: ** -4.790292178649688 ** -2.3808009936718735 ** -2.346426137216622 ** -1.922871613399475 ** -2.114387945192575 ** -2.2514308138890193 ** -1.9641614879388363 ** -2.1605899400776254 ** -1.9023837791822735 ** + Policy it 24..Sim ep: 261 +Log Std policy inner: -0.38643056 + Policy it 25..Sim ep: 257 +Log Std policy inner: -0.4436145 + Policy it 26..Sim ep: 255 +Log Std policy inner: -0.4581002 +Iterations: 27 +Simulated test: ** -4.516041028958425 ** -2.4756270666833733 ** -2.1952644387236795 ** -2.21099977086531 ** -2.013950303221354 ** -2.421707143309177 ** -2.3136673495091964 ** -2.061446057197172 ** -1.9655282605905087 ** + Policy it 27..Sim ep: 259 +Log Std policy inner: -0.486961 + Policy it 28..Sim ep: 254 +Log Std policy inner: -0.51484984 + Policy it 29..Sim ep: 264 +Log Std policy inner: -0.53455675 +Iterations: 30 +Simulated test: ** -4.704023914124118 ** -2.075625673176837 ** -2.2343514259019868 ** -2.062769030935888 ** -1.966803817055188 ** -2.3180057604127797 ** -2.030530234638718 ** -2.059871277513448 ** -1.9533480016095564 ** + Policy it 30..Sim ep: 251 +Log Std policy inner: -0.5764799 + Policy it 31..Sim ep: 258 +Log Std policy inner: -0.5738221 + Policy it 32..Sim ep: 256 +Log Std policy inner: -0.597875 +Iterations: 33 +Simulated test: ** -4.753796093875717 ** -2.1520649109469376 ** -2.0630353468313114 ** -2.0871067716903053 ** -1.753921146672219 ** -2.3108373788007883 ** -1.9248513036733492 ** -2.0574561202689075 ** -1.8846574860299006 ** + Policy it 33..Sim ep: 254 +Log Std policy inner: -0.6208415 + Policy it 34..Sim ep: 253 +Log Std policy inner: -0.6357076 + Policy it 35..Sim ep: 252 +Log Std policy inner: -0.6686546 +Iterations: 36 +Simulated test: ** -4.691010219935561 ** -2.595345248010708 ** -2.2209728623571574 ** -2.0818441512587014 ** -2.0168619881826455 ** -2.180099321191956 ** -2.1125674433948007 ** -1.9143464248022064 ** -1.9038672729278914 ** + Policy it 36..Sim ep: 260 +Log Std policy inner: -0.6965925 + Policy it 37..Sim ep: 270 +Log Std policy inner: -0.7009095 + Policy it 38..Sim ep: 256 +Log Std policy inner: -0.69972765 +Iterations: 39 +Simulated test: ** -4.757820628066401 ** -2.4809979062271306 ** -2.0540355230402203 ** -2.2846501090738456 ** -1.7376177821744931 ** -2.004254025100963 ** -2.0271791065018623 ** -1.8540983201481867 ** -1.784735689368681 ** + Policy it 39..Sim ep: 268 +Log Std policy inner: -0.7211797 + Policy it 40..Sim ep: 259 +Log Std policy inner: -0.74159396 + Policy it 41..Sim ep: 253 +Log Std policy inner: -0.7527119 +Iterations: 42 +Simulated test: ** -4.755625585476082 ** -2.098606061207829 ** -2.151980871070409 ** -2.0900144930015085 ** -2.0207771267002683 ** -2.2070407795050415 ** -2.074162896030466 ** -2.1092706402327166 ** -1.8610466724727304 ** + Policy it 42..Sim ep: 264 +Log Std policy inner: -0.74640507 + Policy it 43..Sim ep: 254 +Log Std policy inner: -0.75069773 + Policy it 44..Sim ep: 260 +Log Std policy inner: -0.75360024 +Iterations: 45 +Simulated test: ** -4.695246048333356 ** -2.446256172281719 ** -2.3007449507759885 ** -2.135886157938512 ** -2.204078751409834 ** -2.359917959490558 ** -1.9394522763608257 ** -1.8733140926621854 ** -1.8124838591096342 ** + Policy it 45..Sim ep: 265 +Log Std policy inner: -0.7686681 + Policy it 46..Sim ep: 268 +Log Std policy inner: -0.77485 + Policy it 47..Sim ep: 269 +Log Std policy inner: -0.7789106 +Iterations: 48 +Simulated test: ** -4.618843174544163 ** -2.4622883176300094 ** -2.2394876792409923 ** -2.245783177993435 ** -1.908770011162851 ** -2.04197716580471 ** -1.8518042365089058 ** -1.7343105586261662 ** -1.8781765684555285 ** + Policy it 48..Sim ep: 266 +Log Std policy inner: -0.7910764 + Policy it 49..Sim ep: 267 +Log Std policy inner: -0.8005116 +============================ 1 ============================ +step 1 state [0.56166667 0.49483333 0.02866667 0.80516667] a [-0.08333333 -0.0051256 -0.015651 -0.08333333] r -0.9964922052957922 +step 2 state [0.47833333 0.4655 0.112 0.735 ] a [-0.08333333 -0.02925509 0.08333333 -0.07006303] r -0.9389998462421647 +step 3 state [0.43016667 0.54883333 0.19533333 0.65166667] a [-0.04811342 0.08333333 0.08333333 -0.08333333] r -0.5595040876691946 +step 4 state [0.407 0.58 0.27266667 0.56833333] a [-0.02311062 0.03121696 0.07749555 -0.08333333] r -0.18473287429267982 +step 5 state [0.32366667 0.64266667 0.30883333 0.5865 ] a [-0.08333333 0.06282984 0.03630917 0.01829877] r -0.1423477906640762 +step 6 state [0.407 0.64116667 0.379 0.5975 ] a [ 0.08333333 -0.00136524 0.07029532 0.01105594] r -0.037061248749851616 +step 1 state [0.6505 0.8145 0.073 0.22183333] a [-0.07064401 0.08333333 -0.03446369 -0.0530035 ] r -0.9985075040645419 +step 2 state [0.56716667 0.85466667 0. 0.1385 ] a [-0.08333333 0.04019338 -0.08333333 -0.08333333] r -0.9999883294651919 +step 3 state [0.48383333 0.848 0. 0.12683333] a [-0.08333333 -0.00655757 -0.08333333 -0.0116392 ] r -1.000012846734026 +step 4 state [4.00500000e-01 8.13666667e-01 5.00000000e-04 4.35000000e-02] a [-0.08333333 -0.03420589 0.00051012 -0.08333333] r -1.000101108901828 +step 5 state [0.31716667 0.86133333 0. 0.05016667] a [-0.08333333 0.04781291 -0.08333333 0.00676537] r -1.0000863985405275 +step 6 state [0.23383333 0.8985 0.08333333 0.06333333] a [-0.08333333 0.03728322 0.08333333 0.01318203] r -1.000253115968598 +step 7 state [0.25983333 0.98183333 0.082 0.0745 ] a [ 0.02602102 0.08333333 -0.00119838 0.01117611] r -1.000096205448061 +step 8 state [0.2105 1. 0.15033333 0.1245 ] a [-0.0492541 0.0564478 0.06835748 0.05015472] r -0.9997578671381534 +step 9 state [0.218 1. 0.2325 0.07366667] a [ 0.00752884 0.01965125 0.08231984 -0.05083318] r -0.9990910003349442 +Saved current buffer default +Ep:1 Rew:0.28 -- Step:60 +Train set: 48 Valid set: 12 +Log Std policy: [-0.4748453 -0.7897477 -2.556345 0.6188916] -0.8005116 +maximum: 0.9258775025002968 +mb_lr: 0.001 +Model:0, iter:29 -- Old Val loss:0.112086 New Val loss:0.073301 -- New Train loss:0.016899 +Model:1, iter:28 -- Old Val loss:0.094141 New Val loss:0.065526 -- New Train loss:0.014356 +Model:2, iter:25 -- Old Val loss:0.085518 New Val loss:0.058330 -- New Train loss:0.025114 +Model:3, iter:19 -- Old Val loss:0.074101 New Val loss:0.085280 -- New Train loss:0.014158 +Model:4, iter:19 -- Old Val loss:0.102135 New Val loss:0.122201 -- New Train loss:0.020396 +Model:5, iter:20 -- Old Val loss:0.094666 New Val loss:0.114598 -- New Train loss:0.017929 +Model:6, iter:127 -- Old Val loss:0.093814 New Val loss:0.030728 -- New Train loss:0.002048 +Model:7, iter:19 -- Old Val loss:0.100550 New Val loss:0.089229 -- New Train loss:0.019700 +Model:8, iter:19 -- Old Val loss:0.097593 New Val loss:0.069860 -- New Train loss:0.020756 + Policy it 0..Sim ep: 357 +Log Std policy inner: -0.8228998 + Policy it 1..Sim ep: 367 +Log Std policy inner: -0.8225193 + Policy it 2..Sim ep: 378 +Log Std policy inner: -0.8227291 +Iterations: 53 +Simulated test: ** -1.4356739869015291 ** -1.2715794191067107 ** -1.456037475685589 ** -1.5614465325139464 ** -1.3973183986265212 ** -1.5403147627908038 ** -1.8680316753464286 ** -1.7754986609614571 ** -2.2863914612459486 ** + Policy it 3..Sim ep: 365 +Log Std policy inner: -0.8356264 + Policy it 4..Sim ep: 367 +Log Std policy inner: -0.81415564 + Policy it 5..Sim ep: 356 +Log Std policy inner: -0.82227933 +Iterations: 56 +Simulated test: ** -1.1810306875081733 ** -1.0963658653129824 ** -1.2212186978646786 ** -1.243115461312118 ** -1.5792233621352352 ** -1.792266210722446 ** -1.6280953302257695 ** -1.8913130949158221 ** -1.6392576217255554 ** + Policy it 6..Sim ep: 389 +Log Std policy inner: -0.82725585 + Policy it 7..Sim ep: 356 +Log Std policy inner: -0.8302105 + Policy it 8..Sim ep: 357 +Log Std policy inner: -0.8418595 +Iterations: 59 +Simulated test: ** -1.2521601735136938 ** -1.031514164954424 ** -1.2726646229834295 ** -1.2161409711511806 ** -1.3624348997347988 ** -1.7406026489287614 ** -1.8179666869505309 ** -1.576365414787142 ** -1.7274842910282313 ** + Policy it 9..Sim ep: 384 +Log Std policy inner: -0.84265697 + Policy it 10..Sim ep: 373 +Log Std policy inner: -0.8510432 + Policy it 11..Sim ep: 366 +Log Std policy inner: -0.8435656 +Iterations: 62 +Simulated test: ** -1.08018119253451 ** -0.7477249265252612 ** -1.540730379585875 ** -1.386401754996332 ** -1.4290346975432475 ** -1.651638292913558 ** -1.8089798436767888 ** -1.9195685265479552 ** -1.642327350312844 ** + Policy it 12..Sim ep: 359 +Log Std policy inner: -0.85092294 + Policy it 13..Sim ep: 364 +Log Std policy inner: -0.839536 + Policy it 14..Sim ep: 376 +Log Std policy inner: -0.83172286 +Iterations: 65 +Simulated test: ** -1.2846757329907268 ** -1.0609639463271014 ** -1.4716011329484173 ** -1.0657471617066767 ** -2.030981143456884 ** -1.6430655062710866 ** -1.5726976310613099 ** -1.770027236422611 ** -1.9059787017875351 ** + Policy it 15..Sim ep: 374 +Log Std policy inner: -0.83657694 + Policy it 16..Sim ep: 365 +Log Std policy inner: -0.8346825 + Policy it 17..Sim ep: 389 +Log Std policy inner: -0.83787876 +Iterations: 68 +Simulated test: ** -1.6306338090857024 ** -1.0673520929994993 ** -1.4074585617752746 ** -1.4022245624358767 ** -1.7568369447859005 ** -1.9210647093484294 ** -1.5750881761079654 ** -2.2234737153036988 ** -2.045826443200931 ** +break +============================ 2 ============================ +step 1 state [0.16883333 0.769 0.683 0.4745 ] a [-0.03811318 0.07583613 0.07373738 -0.0167883 ] r -0.14800441635618333 +step 2 state [0.095 0.85233333 0.69116667 0.44383333] a [-0.07371554 0.08333333 0.00827779 -0.03058845] r -0.16756639687232933 +step 3 state [0.01166667 0.93566667 0.7745 0.44283333] a [-0.08333333 0.08333333 0.08333333 -0.00099673] r -0.2921895765819501 +step 4 state [0. 0.99966667 0.81033333 0.4375 ] a [-0.08333333 0.06406632 0.03597114 -0.00520634] r -0.3145302357765668 +step 5 state [0. 1. 0.89366667 0.38483333] a [-0.08333333 0.05999929 0.08333333 -0.05256504] r -0.8285922943243339 +step 6 state [0. 1. 0.96616667 0.43683333] a [-0.08170436 0.07985363 0.07250407 0.05207671] r -0.933371280930764 +step 7 state [0. 1. 1. 0.45733333] a [-0.07820639 0.08333333 0.08333333 0.02050089] r -0.9707023386726701 +step 8 state [0. 1. 1. 0.49833333] a [-0.06981592 0.0741421 0.0742554 0.04110684] r -0.9736314502751817 +step 9 state [0. 1. 1. 0.45266667] a [-0.02486939 0.08333333 0.08333333 -0.04556402] r -0.9774533810233771 +step 10 state [0. 1. 1. 0.52116667] a [-0.08333333 0.08333333 0.03142315 0.06854187] r -0.9673275926831616 +step 1 state [0.96566667 0.09183333 0.916 0.2495 ] a [0.04179244 0.07775351 0.08141864 0.08333333] r -0.9998703790705015 +step 2 state [1. 0.1695 0.99933333 0.33216667] a [0.04029499 0.07773684 0.08333333 0.08272824] r -1.0000669228369978 +step 3 state [1. 0.23883333 1. 0.4155 ] a [0.05282481 0.06939608 0.08333333 0.08333333] r -0.999973564547912 +step 4 state [1. 0.30083333 1. 0.498 ] a [0.05882757 0.06210951 0.08059925 0.08258645] r -0.9999539401975543 +step 5 state [1. 0.35116667 1. 0.57916667] a [0.0669063 0.0504968 0.08247616 0.08120055] r -1.0000570584523834 +Saved current buffer default +Ep:2 Rew:-3.15 -- Step:75 +Train set: 60 Valid set: 15 +Log Std policy: [-0.43646926 -0.7065726 -2.7545714 0.5460983 ] -0.83787876 +maximum: 0.9258775025002968 +mb_lr: 0.001 +Model:0, iter:22 -- Old Val loss:0.132788 New Val loss:0.099607 -- New Train loss:0.023651 +Model:1, iter:104 -- Old Val loss:0.128051 New Val loss:0.065392 -- New Train loss:0.002597 +Model:2, iter:48 -- Old Val loss:0.127599 New Val loss:0.054527 -- New Train loss:0.006423 +Model:3, iter:25 -- Old Val loss:0.137228 New Val loss:0.095313 -- New Train loss:0.030459 +Model:4, iter:129 -- Old Val loss:0.142483 New Val loss:0.053285 -- New Train loss:0.001585 +Model:5, iter:66 -- Old Val loss:0.134818 New Val loss:0.079339 -- New Train loss:0.004303 +Model:6, iter:137 -- Old Val loss:0.100901 New Val loss:0.048587 -- New Train loss:0.000854 +Model:7, iter:176 -- Old Val loss:0.123946 New Val loss:0.041547 -- New Train loss:0.000987 +Model:8, iter:57 -- Old Val loss:0.126519 New Val loss:0.064162 -- New Train loss:0.008283 + Policy it 0..Sim ep: 221 +Log Std policy inner: -0.8314397 + Policy it 1..Sim ep: 217 +Log Std policy inner: -0.8378755 + Policy it 2..Sim ep: 219 +Log Std policy inner: -0.83980787 +Iterations: 71 +Simulated test: ** -5.945927179121063 ** -4.578923993988428 ** -5.141090461548301 ** -5.175614754013659 ** -6.140197771117673 ** -5.564815958380932 ** -5.700026827994443 ** -5.77539687036493 ** -5.784111243754888 ** + Policy it 3..Sim ep: 222 +Log Std policy inner: -0.8596039 + Policy it 4..Sim ep: 214 +Log Std policy inner: -0.85423166 + Policy it 5..Sim ep: 225 +Log Std policy inner: -0.85624313 +Iterations: 74 +Simulated test: ** -5.712425285588251 ** -4.918815142428212 ** -4.9429070832784054 ** -4.790687208501622 ** -5.1891128730739 ** -4.9989759862388015 ** -5.290081389608677 ** -5.07321034331806 ** -5.572040872856305 ** + Policy it 6..Sim ep: 221 +Log Std policy inner: -0.8603689 + Policy it 7..Sim ep: 220 +Log Std policy inner: -0.86058897 + Policy it 8..Sim ep: 216 +Log Std policy inner: -0.8521157 +Iterations: 77 +Simulated test: ** -5.669751100860303 ** -4.483110695814685 ** -4.703179901478579 ** -4.758046488160617 ** -5.106315485208761 ** -5.0795260524153125 ** -5.751526451353566 ** -5.5381921315914955 ** -5.500511391853215 ** + Policy it 9..Sim ep: 220 +Log Std policy inner: -0.8793448 + Policy it 10..Sim ep: 222 +Log Std policy inner: -0.87716997 + Policy it 11..Sim ep: 226 +Log Std policy inner: -0.883881 +Iterations: 80 +Simulated test: ** -6.02658388231881 ** -5.238706317762408 ** -4.615114538278431 ** -5.008599928958283 ** -5.201596653028392 ** -5.554849480761768 ** -5.626037394443119 ** -5.619756602213601 ** -5.38202897089388 ** + Policy it 12..Sim ep: 222 +Log Std policy inner: -0.88542795 + Policy it 13..Sim ep: 219 +Log Std policy inner: -0.8852748 + Policy it 14..Sim ep: 221 +Log Std policy inner: -0.8842494 +Iterations: 83 +Simulated test: ** -5.738724979730323 ** -4.326038150913082 ** -4.73593454012298 ** -5.474843702562502 ** -5.28402309994679 ** -5.130318734893808 ** -5.66272581741694 ** -5.648011050818022 ** -5.376323009548941 ** + Policy it 15..Sim ep: 227 +Log Std policy inner: -0.8890927 + Policy it 16..Sim ep: 224 +Log Std policy inner: -0.9035735 + Policy it 17..Sim ep: 220 +Log Std policy inner: -0.8956868 +Iterations: 86 +Simulated test: ** -5.262823858358752 ** -4.933171188847627 ** -4.579498295556841 ** -4.889935916197137 ** -5.141304167933995 ** -5.174358802102506 ** -5.403697683429345 ** -4.961979220172506 ** -5.361984479578678 ** + Policy it 18..Sim ep: 229 +Log Std policy inner: -0.8918924 + Policy it 19..Sim ep: 222 +Log Std policy inner: -0.8851151 + Policy it 20..Sim ep: 227 +Log Std policy inner: -0.9155339 +Iterations: 89 +Simulated test: ** -5.307868660405511 ** -4.769193268250092 ** -4.310345165545004 ** -4.714635496288538 ** -5.16013679711381 ** -5.11318361139507 ** -5.598349601880182 ** -5.245404591687256 ** -5.6279655211605135 ** + Policy it 21..Sim ep: 222 +Log Std policy inner: -0.91016704 + Policy it 22..Sim ep: 224 +Log Std policy inner: -0.91113293 + Policy it 23..Sim ep: 221 +Log Std policy inner: -0.92145777 +Iterations: 92 +Simulated test: ** -5.585439425465884 ** -4.701306958474452 ** -4.406376175129553 ** -5.0225207916402725 ** -5.301968613188946 ** -4.776221902474935 ** -5.182540231006569 ** -5.5191348615736935 ** -5.430868751288217 ** + Policy it 24..Sim ep: 230 +Log Std policy inner: -0.9141788 + Policy it 25..Sim ep: 222 +Log Std policy inner: -0.8989945 + Policy it 26..Sim ep: 221 +Log Std policy inner: -0.9225768 +Iterations: 95 +Simulated test: ** -5.365293544644955 ** -4.264704668664344 ** -4.685643331850879 ** -4.625419918174739 ** -5.231333226836141 ** -5.021431860403681 ** -4.930079044757877 ** -4.798727383576334 ** -5.494394337946432 ** + Policy it 27..Sim ep: 216 +Log Std policy inner: -0.9267136 + Policy it 28..Sim ep: 225 +Log Std policy inner: -0.9106288 + Policy it 29..Sim ep: 224 +Log Std policy inner: -0.90865415 +Iterations: 98 +Simulated test: ** -5.65686865246913 ** -4.57091353102136 ** -4.5715063311287665 ** -5.331751343390788 ** -5.032006846026052 ** -5.253777140045859 ** -5.286154333005834 ** -4.997797671423177 ** -4.919776612631977 ** + Policy it 30..Sim ep: 219 +Log Std policy inner: -0.91736615 + Policy it 31..Sim ep: 227 +Log Std policy inner: -0.9213168 + Policy it 32..Sim ep: 229 +Log Std policy inner: -0.9150454 +Iterations: 101 +Simulated test: ** -5.500212972358567 ** -4.75004459539894 ** -4.820874235655938 ** -5.086643494275049 ** -5.190893995640217 ** -4.906019015497296 ** -5.041570163359865 ** -5.2381038611358965 ** -4.646109833397204 ** + Policy it 33..Sim ep: 220 +Log Std policy inner: -0.9213085 + Policy it 34..Sim ep: 222 +Log Std policy inner: -0.9111488 + Policy it 35..Sim ep: 223 +Log Std policy inner: -0.90788037 +Iterations: 104 +Simulated test: ** -5.218308593238471 ** -4.371168684198637 ** -4.630351655482664 ** -4.8054888022626985 ** -5.370893295006099 ** -5.127123221566435 ** -5.319556914376007 ** -5.116555063364504 ** -4.91714142606972 ** + Policy it 36..Sim ep: 219 +Log Std policy inner: -0.89849883 + Policy it 37..Sim ep: 223 +Log Std policy inner: -0.8965944 + Policy it 38..Sim ep: 222 +Log Std policy inner: -0.9135427 +Iterations: 107 +Simulated test: ** -5.036544068432413 ** -5.003612834821833 ** -4.631143134538142 ** -5.331680446663377 ** -5.108564661492128 ** -5.20436630461074 ** -5.092927287518978 ** -5.113665065373061 ** -5.234229791751713 ** + Policy it 39..Sim ep: 230 +Log Std policy inner: -0.9189936 + Policy it 40..Sim ep: 222 +Log Std policy inner: -0.9100193 + Policy it 41..Sim ep: 223 +Log Std policy inner: -0.9172621 +Iterations: 110 +Simulated test: ** -4.949844806205656 ** -4.444324626124581 ** -4.428963084164716 ** -4.668252957950026 ** -5.146693647695356 ** -5.195416461966816 ** -5.366446514837444 ** -5.048087294297293 ** -5.240122042424918 ** + Policy it 42..Sim ep: 220 +Log Std policy inner: -0.9189043 + Policy it 43..Sim ep: 221 +Log Std policy inner: -0.9071228 + Policy it 44..Sim ep: 216 +Log Std policy inner: -0.9051794 +Iterations: 113 +Simulated test: ** -5.9527985000883925 ** -4.73336388775555 ** -4.355884451012826 ** -4.875852204799303 ** -4.455465364596166 ** -5.076665313717386 ** -5.0601455473713575 ** -4.813732573913003 ** -5.354474418240425 ** + Policy it 45..Sim ep: 222 +Log Std policy inner: -0.89718795 + Policy it 46..Sim ep: 227 +Log Std policy inner: -0.87648994 + Policy it 47..Sim ep: 229 +Log Std policy inner: -0.86045265 +Iterations: 116 +Simulated test: ** -5.292295915463473 ** -4.459257056564093 ** -4.737723550300871 ** -5.035404909027857 ** -5.151151567028719 ** -5.158340693659848 ** -5.1095844998286335 ** -4.343958498450229 ** -5.062922868206806 ** + Policy it 48..Sim ep: 220 +Log Std policy inner: -0.87127334 + Policy it 49..Sim ep: 220 +Log Std policy inner: -0.88754827 +============================ 3 ============================ +step 1 state [0.88616667 0.57566667 1. 0.08683333] a [-0.08333333 0.08333333 0.07258059 0.08333333] r -0.9997919661847519 +step 2 state [0.85566667 0.659 1. 0.12566667] a [-0.03038076 0.08333333 0.05755655 0.03885976] r -0.9997183346958193 +step 3 state [0.77233333 0.63033333 1. 0.209 ] a [-0.08333333 -0.02857189 0.0368486 0.08333333] r -0.999492531463093 +step 4 state [0.74783333 0.71366667 1. 0.29233333] a [-0.024476 0.08333333 0.08333333 0.08333333] r -0.9987611339575084 +step 5 state [0.6645 0.797 1. 0.37566667] a [-0.08333333 0.08333333 0.08333333 0.08333333] r -0.9977647166562269 +step 6 state [0.608 0.85566667 1. 0.33583333] a [-0.05641049 0.05875768 0.07039693 -0.0398257 ] r -0.9959980711650094 +step 7 state [0.61533333 0.929 1. 0.29 ] a [ 0.00746007 0.07347532 0.08333333 -0.04577919] r -0.9938208707020573 +step 8 state [0.55716667 0.9005 1. 0.31116667] a [-0.05810114 -0.02844442 0.08333333 0.0212502 ] r -0.9942908470410214 +step 9 state [0.5165 0.95566667 1. 0.24333333] a [-0.04056971 0.05527936 0.08333333 -0.06780808] r -0.9937654818671138 +step 10 state [0.59983333 0.9485 1. 0.278 ] a [ 0.08333333 -0.00715316 0.07693313 0.03474054] r -0.9920338652317956 +step 1 state [0.60116667 0.62033333 0.32033333 0.55733333] a [-0.04218961 0.07562493 0.03674735 0.01558917] r -0.06171464511805769 +step 2 state [0.51783333 0.69783333 0.3805 0.64066667] a [-0.08333333 0.07766537 0.06018166 0.08333333] r 0.0030953767555674805 +step 1 state [0.22133333 0.91183333 0.1495 0.18916667] a [-0.08333333 0.08333333 0.08333333 0.01678016] r -0.9986211962911812 +step 2 state [0.16583333 0.99516667 0.23283333 0.19683333] a [-0.05535197 0.08333333 0.08333333 0.00778582] r -0.9975443120202724 +step 3 state [0.0825 1. 0.29666667 0.14716667] a [-0.08333333 0.05175982 0.06399327 -0.0496069 ] r -0.9978196653868885 +Saved current buffer default +Ep:3 Rew:-4.02 -- Step:90 +Train set: 72 Valid set: 18 +Log Std policy: [-1.1033645 -0.14156072 -2.7338643 0.42859617] -0.88754827 +maximum: 1.006190753511135 +mb_lr: 0.001 +Model:0, iter:85 -- Old Val loss:0.059399 New Val loss:0.032249 -- New Train loss:0.003739 +Model:1, iter:17 -- Old Val loss:0.021082 New Val loss:0.075382 -- New Train loss:0.037934 +Model:2, iter:53 -- Old Val loss:0.038416 New Val loss:0.037697 -- New Train loss:0.008493 +Model:3, iter:86 -- Old Val loss:0.051453 New Val loss:0.035673 -- New Train loss:0.002839 +Model:4, iter:57 -- Old Val loss:0.012710 New Val loss:0.036061 -- New Train loss:0.003443 +Model:5, iter:62 -- Old Val loss:0.048192 New Val loss:0.022076 -- New Train loss:0.003357 +Model:6, iter:43 -- Old Val loss:0.011806 New Val loss:0.037310 -- New Train loss:0.006163 +Model:7, iter:54 -- Old Val loss:0.007530 New Val loss:0.043510 -- New Train loss:0.004545 +Model:8, iter:38 -- Old Val loss:0.044334 New Val loss:0.040360 -- New Train loss:0.007167 + Policy it 0..Sim ep: 227 +Log Std policy inner: -0.8945862 + Policy it 1..Sim ep: 221 +Log Std policy inner: -0.9043374 + Policy it 2..Sim ep: 232 +Log Std policy inner: -0.90336055 +Iterations: 121 +Simulated test: ** -4.815174209161778 ** -5.642420413893415 ** -5.480169421762985 ** -5.865383342310087 ** -5.714411128645879 ** -6.2615897467848844 ** -5.447799236892024 ** -5.646381181261968 ** -5.502345305335475 ** + Policy it 3..Sim ep: 218 +Log Std policy inner: -0.8999077 + Policy it 4..Sim ep: 219 +Log Std policy inner: -0.90975815 + Policy it 5..Sim ep: 224 +Log Std policy inner: -0.9147357 +Iterations: 124 +Simulated test: ** -4.934236898992676 ** -6.3378597823635205 ** -5.781006384142675 ** -5.5974350564833735 ** -5.324747053156316 ** -5.488626488489972 ** -5.981716793735977 ** -5.795678494732419 ** -5.417911624039989 ** + Policy it 6..Sim ep: 213 +Log Std policy inner: -0.923118 + Policy it 7..Sim ep: 221 +Log Std policy inner: -0.9271617 + Policy it 8..Sim ep: 219 +Log Std policy inner: -0.9231882 +Iterations: 127 +Simulated test: ** -4.811960753655294 ** -5.853178686675383 ** -5.505351606842596 ** -5.642088178046979 ** -5.603253907621838 ** -5.526366562769981 ** -5.811192351195786 ** -5.655835647638305 ** -5.680239235998597 ** + Policy it 9..Sim ep: 220 +Log Std policy inner: -0.9419521 + Policy it 10..Sim ep: 217 +Log Std policy inner: -0.94754153 + Policy it 11..Sim ep: 224 +Log Std policy inner: -0.9642065 +Iterations: 130 +Simulated test: ** -4.837925751073053 ** -5.11505861335434 ** -5.684220790030667 ** -6.180220509237552 ** -5.479396150325192 ** -5.822972546014935 ** -5.875824222843512 ** -5.518011694808374 ** -4.688814754408086 ** + Policy it 12..Sim ep: 213 +Log Std policy inner: -0.9734342 + Policy it 13..Sim ep: 226 +Log Std policy inner: -0.9820564 + Policy it 14..Sim ep: 219 +Log Std policy inner: -0.98602194 +Iterations: 133 +Simulated test: ** -4.84728590161365 ** -5.7601637194844075 ** -5.631211429111427 ** -5.279724484691396 ** -5.714982873878907 ** -5.769995888103731 ** -5.6720322830718946 ** -5.646464011827484 ** -5.612750856292551 ** + Policy it 15..Sim ep: 221 +Log Std policy inner: -0.9985979 + Policy it 16..Sim ep: 218 +Log Std policy inner: -0.97969174 + Policy it 17..Sim ep: 222 +Log Std policy inner: -0.96080476 +Iterations: 136 +Simulated test: ** -4.691719256400829 ** -5.626909750927589 ** -5.616892035822384 ** -5.9510907992572175 ** -5.254989611054189 ** -5.744008898698375 ** -5.738013937821961 ** -5.631056569439534 ** -5.470693214982748 ** + Policy it 18..Sim ep: 219 +Log Std policy inner: -0.94208753 + Policy it 19..Sim ep: 221 +Log Std policy inner: -0.9450697 + Policy it 20..Sim ep: 219 +Log Std policy inner: -0.93373066 +Iterations: 139 +Simulated test: ** -4.6833721317921295 ** -5.452843980596517 ** -5.460233992291615 ** -6.06236345673562 ** -5.197240838273137 ** -5.690513500154484 ** -5.364754566392512 ** -5.330161823583767 ** -5.24029694206547 ** + Policy it 21..Sim ep: 222 +Log Std policy inner: -0.95415545 + Policy it 22..Sim ep: 222 +Log Std policy inner: -0.95014405 + Policy it 23..Sim ep: 221 +Log Std policy inner: -0.95414627 +Iterations: 142 +Simulated test: ** -4.851393572598463 ** -5.969133142101928 ** -5.400035237642005 ** -5.2825243119325025 ** -5.910826163976453 ** -5.997438944296445 ** -5.847545433755731 ** -5.442743355495622 ** -5.115143563635647 ** + Policy it 24..Sim ep: 218 +Log Std policy inner: -0.93666637 + Policy it 25..Sim ep: 222 +Log Std policy inner: -0.9554857 + Policy it 26..Sim ep: 219 +Log Std policy inner: -0.9497647 +Iterations: 145 +Simulated test: ** -3.829891003710218 ** -5.577491127385292 ** -5.494659005545546 ** -5.7664899014361435 ** -5.761037940042879 ** -5.414840116532577 ** -5.445663322006585 ** -4.856189203182002 ** -4.681759134695458 ** + Policy it 27..Sim ep: 220 +Log Std policy inner: -0.94053614 + Policy it 28..Sim ep: 217 +Log Std policy inner: -0.9401485 + Policy it 29..Sim ep: 220 +Log Std policy inner: -0.92908716 +Iterations: 148 +Simulated test: ** -4.60519990589979 ** -5.661559199580515 ** -5.393048832415371 ** -5.192022241995437 ** -5.786285236327967 ** -5.817181897105765 ** -4.752533982189197 ** -5.0255586990086885 ** -5.103016026281985 ** + Policy it 30..Sim ep: 223 +Log Std policy inner: -0.9329593 + Policy it 31..Sim ep: 227 +Log Std policy inner: -0.92842305 + Policy it 32..Sim ep: 222 +Log Std policy inner: -0.9246879 +Iterations: 151 +Simulated test: ** -4.38747430064017 ** -5.388884217141313 ** -5.539659737905604 ** -5.714643536096555 ** -4.855802596859867 ** -6.090225116855581 ** -5.219077927003382 ** -5.675476541350945 ** -5.515727797155268 ** + Policy it 33..Sim ep: 228 +Log Std policy inner: -0.9489333 + Policy it 34..Sim ep: 218 +Log Std policy inner: -0.95782286 + Policy it 35..Sim ep: 221 +Log Std policy inner: -0.9577624 +Iterations: 154 +Simulated test: ** -4.107333890239243 ** -5.493248643584084 ** -4.936675895728404 ** -5.593015852660174 ** -5.548731136908755 ** -6.086368668650976 ** -5.060125341420062 ** -5.205328857802088 ** -5.220423468443332 ** + Policy it 36..Sim ep: 226 +Log Std policy inner: -0.95862216 + Policy it 37..Sim ep: 230 +Log Std policy inner: -0.97222334 + Policy it 38..Sim ep: 232 +Log Std policy inner: -0.9789926 +Iterations: 157 +Simulated test: ** -4.033628189036389 ** -5.415640711892629 ** -5.073794037036132 ** -5.400185658327537 ** -5.448543450774159 ** -5.595477890757466 ** -5.315196805202868 ** -5.353188515889924 ** -5.136185711528815 ** + Policy it 39..Sim ep: 224 +Log Std policy inner: -1.0004745 + Policy it 40..Sim ep: 225 +Log Std policy inner: -1.0082316 + Policy it 41..Sim ep: 225 +Log Std policy inner: -1.0183599 +Iterations: 160 +Simulated test: ** -4.03002094427502 ** -4.959266917236382 ** -4.8321375791021275 ** -5.7221368116670055 ** -5.552676236628904 ** -5.329881999022327 ** -5.61144485902827 ** -4.700108045860543 ** -5.335728732203716 ** + Policy it 42..Sim ep: 229 +Log Std policy inner: -1.0253232 + Policy it 43..Sim ep: 219 +Log Std policy inner: -1.0218682 + Policy it 44..Sim ep: 228 +Log Std policy inner: -1.0417982 +Iterations: 163 +Simulated test: ** -4.334943954276387 ** -5.842379658008867 ** -5.215746056291391 ** -5.809424904335611 ** -5.176679377865512 ** -5.542999997026055 ** -5.202783556600917 ** -5.455670791454613 ** -4.690969813720439 ** + Policy it 45..Sim ep: 232 +Log Std policy inner: -1.0557345 + Policy it 46..Sim ep: 217 +Log Std policy inner: -1.0658288 + Policy it 47..Sim ep: 223 +Log Std policy inner: -1.055358 +Iterations: 166 +Simulated test: ** -3.8842192345776128 ** -5.570014043026604 ** -5.506201620332431 ** -5.620489551653154 ** -4.974490140541457 ** -5.47918777876941 ** -5.58833789774988 ** -5.208302066718461 ** -5.264874899470015 ** + Policy it 48..Sim ep: 232 +Log Std policy inner: -1.0636368 + Policy it 49..Sim ep: 223 +Log Std policy inner: -1.0979403 +============================ 4 ============================ +step 1 state [0.798 0.87466667 0.39266667 0.53483333] a [-0.08292852 0.08067396 0.08333333 0.06754427] r -0.14365761459924065 +step 2 state [0.733 0.958 0.46966667 0.611 ] a [-0.06495955 0.08333333 0.07703282 0.07619104] r -0.044486905353175565 +step 1 state [0.37933333 0.0925 0.391 0.78516667] a [-0.07383872 0.08333333 0.03099495 0.07676583] r -0.09554991647515632 +step 2 state [0.30433333 0.16766667 0.47433333 0.8685 ] a [-0.07494842 0.07528533 0.08333333 0.08333333] r -0.308987448376862 +step 3 state [0.30233333 0.251 0.55766667 0.95183333] a [-0.00187396 0.08333333 0.08333333 0.08333333] r -0.9275335774755614 +step 4 state [0.25083333 0.33433333 0.62533333 1. ] a [-0.05143824 0.08333333 0.0677491 0.08333333] r -0.979185763583277 +step 5 state [0.1675 0.38683333 0.65616667 1. ] a [-0.08333333 0.05262825 0.03089353 0.08106803] r -0.9741629198221388 +step 6 state [0.15633333 0.46483333 0.7395 1. ] a [-0.0110533 0.07801953 0.08333333 0.07763247] r -0.9853380075528269 +step 7 state [0.073 0.54816667 0.82283333 1. ] a [-0.08333333 0.08333333 0.08333333 0.08215605] r -0.993929250831107 +step 8 state [0. 0.56883333 0.8775 1. ] a [-0.08333333 0.02073555 0.0548136 0.08333333] r -0.9974840093885128 +step 9 state [0. 0.64816667 0.96083333 1. ] a [-0.08333333 0.07947721 0.08333333 0.04572143] r -0.9997718002934575 +step 10 state [0. 0.7315 1. 1. ] a [-0.08333333 0.08333333 0.08333333 0.02036624] r -0.9997471992379785 +step 1 state [0.33833333 0.92033333 0.484 0.55316667] a [-0.06289766 0.08333333 0.03887611 0.08333333] r 0.017708454603186263 +step 1 state [0.55833333 1. 0.56 0.92566667] a [ 0.04877511 0.08333333 -0.00787861 -0.04323686] r -0.6823177930404456 +step 2 state [0.6095 1. 0.546 0.84233333] a [ 0.05127925 0.06444004 -0.01395671 -0.08333333] r -0.32525500617344894 +Saved current buffer default +Ep:4 Rew:-1.29 -- Step:105 +Train set: 84 Valid set: 21 +Log Std policy: [-1.5917503 0.72631776 -3.073843 -0.45248598] -1.0979403 +maximum: 1.0354169092063725 +mb_lr: 0.001 +Model:0, iter:40 -- Old Val loss:0.062504 New Val loss:0.053043 -- New Train loss:0.010285 +Model:1, iter:45 -- Old Val loss:0.121353 New Val loss:0.067920 -- New Train loss:0.006542 +Model:2, iter:18 -- Old Val loss:0.067887 New Val loss:0.149141 -- New Train loss:0.057203 +Model:3, iter:70 -- Old Val loss:0.035315 New Val loss:0.036823 -- New Train loss:0.003003 +Model:4, iter:55 -- Old Val loss:0.045890 New Val loss:0.046956 -- New Train loss:0.003203 +Model:5, iter:52 -- Old Val loss:0.049082 New Val loss:0.039075 -- New Train loss:0.002968 +Model:6, iter:116 -- Old Val loss:0.051818 New Val loss:0.030793 -- New Train loss:0.001259 +Model:7, iter:76 -- Old Val loss:0.063455 New Val loss:0.041433 -- New Train loss:0.002427 +Model:8, iter:63 -- Old Val loss:0.071240 New Val loss:0.047992 -- New Train loss:0.002291 + Policy it 0..Sim ep: 216 +Log Std policy inner: -1.096933 + Policy it 1..Sim ep: 218 +Log Std policy inner: -1.0741684 + Policy it 2..Sim ep: 219 +Log Std policy inner: -1.0556308 +Iterations: 171 +Simulated test: ** -7.728430311569245 ** -7.499357388780918 ** -8.25998828395066 ** -8.05199015508406 ** -7.69117722811643 ** -7.138581976034911 ** -7.58617467242293 ** -7.556646177286748 ** -6.730049884597538 ** + Policy it 3..Sim ep: 215 +Log Std policy inner: -1.073195 + Policy it 4..Sim ep: 213 +Log Std policy inner: -1.0721823 + Policy it 5..Sim ep: 219 +Log Std policy inner: -1.0652318 +Iterations: 174 +Simulated test: ** -7.5896627142746 ** -7.427760086600319 ** -8.280194626799785 ** -7.554965030176099 ** -7.886742049950408 ** -7.548131670129951 ** -7.037370062095579 ** -7.885425054403604 ** -6.931647134134546 ** + Policy it 6..Sim ep: 217 +Log Std policy inner: -1.0329003 + Policy it 7..Sim ep: 217 +Log Std policy inner: -1.0278959 + Policy it 8..Sim ep: 216 +Log Std policy inner: -1.0157747 +Iterations: 177 +Simulated test: ** -7.121559894979 ** -7.530487102488987 ** -8.055328827546909 ** -7.958616297000553 ** -7.7206662610592325 ** -7.789927370704245 ** -7.103210529780772 ** -7.491382371108048 ** -7.4154934444185345 ** + Policy it 9..Sim ep: 222 +Log Std policy inner: -1.0152361 + Policy it 10..Sim ep: 218 +Log Std policy inner: -0.99137104 + Policy it 11..Sim ep: 212 +Log Std policy inner: -0.9994453 +Iterations: 180 +Simulated test: ** -7.3611766472714955 ** -7.581299267795402 ** -8.136800828928244 ** -7.941117228118237 ** -7.242946307638777 ** -7.53860091422277 ** -7.723339326197747 ** -7.520741841411218 ** -7.501682770629413 ** + Policy it 12..Sim ep: 215 +Log Std policy inner: -0.99451476 + Policy it 13..Sim ep: 216 +Log Std policy inner: -0.9915443 + Policy it 14..Sim ep: 211 +Log Std policy inner: -1.0109594 +Iterations: 183 +Simulated test: ** -7.1250285712548065 ** -7.574928321213228 ** -8.318541573011316 ** -8.166195328913163 ** -7.517102275820216 ** -7.589690617583692 ** -7.232148165972903 ** -7.180212949502748 ** -7.0103078672499395 ** + Policy it 15..Sim ep: 220 +Log Std policy inner: -1.0066448 + Policy it 16..Sim ep: 213 +Log Std policy inner: -0.98806584 + Policy it 17..Sim ep: 221 +Log Std policy inner: -0.9771968 +Iterations: 186 +Simulated test: ** -7.279671456832439 ** -7.647228148035065 ** -8.343078947765898 ** -8.033273273618834 ** -8.121461448576301 ** -7.939975818140665 ** -7.637949089915492 ** -7.646831871140749 ** -7.02046109255869 ** +break +============================ 5 ============================ +step 1 state [0.45616667 0.25816667 1. 0.67183333] a [-0.07871609 0.08333333 0.08333333 0.07501935] r -0.9999833933069423 +step 2 state [0.386 0.3415 1. 0.75383333] a [-0.0701032 0.08333333 0.07974547 0.08214137] r -1.0002241366507498 +step 3 state [0.30266667 0.42483333 1. 0.83716667] a [-0.08333333 0.08333333 0.08333333 0.08333333] r -1.000096395284648 +step 4 state [0.24283333 0.49433333 1. 0.9205 ] a [-0.05973483 0.06950475 0.08333333 0.08333333] r -1.000111106251086 +step 5 state [0.18066667 0.55816667 1. 0.977 ] a [-0.06208566 0.06384103 0.08333333 0.05652322] r -1.0000718112673654 +step 6 state [0.10616667 0.6415 1. 1. ] a [-0.07444955 0.08333333 0.07721833 0.08333333] r -1.000032516283645 +step 7 state [0.02283333 0.724 1. 1. ] a [-0.08333333 0.08263314 0.08333333 0.02639771] r -1.0000521637755053 +step 8 state [0. 0.79416667 1. 1. ] a [-0.05786754 0.07018577 0.06645226 0.03122526] r -0.9999490144432389 +step 9 state [0. 0.82616667 1. 1. ] a [-0.08039674 0.03204282 0.08234229 0.04892342] r -0.9997083326679508 +step 10 state [0. 0.9095 1. 1. ] a [-0.08315096 0.08333333 0.08333333 0.02385841] r -0.9995167696223134 +step 1 state [0.54633333 0.39883333 0.54266667 0.8855 ] a [0.01226958 0.08333333 0.08333333 0.00738193] r -0.672028220455821 +step 2 state [0.5285 0.48216667 0.626 0.86083333] a [-0.01769733 0.08333333 0.08333333 -0.02456608] r -0.7831589956918967 +step 3 state [0.44516667 0.53016667 0.70783333 0.94316667] a [-0.08333333 0.04804947 0.08197066 0.082363 ] r -0.9783154140797975 +step 4 state [0.36183333 0.6135 0.79116667 0.9525 ] a [-0.08333333 0.08333333 0.08333333 0.0093515 ] r -0.9925900269879533 +step 5 state [0.35366667 0.69683333 0.79466667 1. ] a [-0.00805187 0.08333333 0.00356716 0.07316039] r -0.9924083817490628 +Saved current buffer default +Ep:5 Rew:-10.00 -- Step:120 +Train set: 96 Valid set: 24 +Log Std policy: [-1.4163584 0.8227917 -3.241868 -0.07335263] -0.9771968 +maximum: 1.0354169092063725 +mb_lr: 0.001 +Model:0, iter:68 -- Old Val loss:0.018978 New Val loss:0.010280 -- New Train loss:0.003096 +Model:1, iter:163 -- Old Val loss:0.018291 New Val loss:0.013032 -- New Train loss:0.000834 +Model:2, iter:131 -- Old Val loss:0.059587 New Val loss:0.006857 -- New Train loss:0.000920 +Model:3, iter:126 -- Old Val loss:0.009959 New Val loss:0.012574 -- New Train loss:0.001001 +Model:4, iter:131 -- Old Val loss:0.008764 New Val loss:0.008689 -- New Train loss:0.001204 +Model:5, iter:17 -- Old Val loss:0.005774 New Val loss:0.123863 -- New Train loss:0.042342 +Model:6, iter:98 -- Old Val loss:0.004600 New Val loss:0.008017 -- New Train loss:0.001471 +Model:7, iter:92 -- Old Val loss:0.006105 New Val loss:0.008043 -- New Train loss:0.001800 +Model:8, iter:176 -- Old Val loss:0.006868 New Val loss:0.012301 -- New Train loss:0.000661 + Policy it 0..Sim ep: 221 +Log Std policy inner: -0.9697589 + Policy it 1..Sim ep: 222 +Log Std policy inner: -0.9714205 + Policy it 2..Sim ep: 218 +Log Std policy inner: -0.94243544 +Iterations: 189 +Simulated test: ** -5.848737671072595 ** -6.74240133320447 ** -6.956274328071158 ** -7.338505566606764 ** -6.845825619343668 ** -7.150635552951135 ** -7.0528489413266655 ** -7.195322966080567 ** -6.686975330681162 ** + Policy it 3..Sim ep: 219 +Log Std policy inner: -0.9295354 + Policy it 4..Sim ep: 213 +Log Std policy inner: -0.9011086 + Policy it 5..Sim ep: 219 +Log Std policy inner: -0.8851435 +Iterations: 192 +Simulated test: ** -6.888997476978111 ** -6.610738687000703 ** -7.348496426531346 ** -7.581902811140753 ** -6.816841757381335 ** -7.327645576819777 ** -6.902525949226693 ** -7.178380843512714 ** -7.128893338296329 ** + Policy it 6..Sim ep: 217 +Log Std policy inner: -0.8550108 + Policy it 7..Sim ep: 221 +Log Std policy inner: -0.84462893 + Policy it 8..Sim ep: 214 +Log Std policy inner: -0.83255446 +Iterations: 195 +Simulated test: ** -6.316515260599553 ** -6.456373264407157 ** -6.952602561298991 ** -7.2130098837148395 ** -6.583467100072303 ** -6.069730790354661 ** -6.96347414478194 ** -6.192611842752667 ** -6.622414648458362 ** + Policy it 9..Sim ep: 220 +Log Std policy inner: -0.825425 + Policy it 10..Sim ep: 223 +Log Std policy inner: -0.8139028 + Policy it 11..Sim ep: 221 +Log Std policy inner: -0.7977727 +Iterations: 198 +Simulated test: ** -6.4042617561930095 ** -6.222904024371528 ** -6.7668502965156225 ** -6.580739600267261 ** -6.824741438825003 ** -6.609447342433268 ** -6.014838178099598 ** -6.730261025712825 ** -6.447703864165814 ** + Policy it 12..Sim ep: 215 +Log Std policy inner: -0.73402923 + Policy it 13..Sim ep: 217 +Log Std policy inner: -0.7278572 + Policy it 14..Sim ep: 220 +Log Std policy inner: -0.7231856 +Iterations: 201 +Simulated test: ** -5.566266899253241 ** -7.056917006012518 ** -6.726073862332851 ** -6.366226092432625 ** -6.411740457924315 ** -6.6880353760672735 ** -6.787113734394661 ** -6.613482472427422 ** -7.168632646413752 ** + Policy it 15..Sim ep: 224 +Log Std policy inner: -0.73909277 + Policy it 16..Sim ep: 217 +Log Std policy inner: -0.7202597 + Policy it 17..Sim ep: 223 +Log Std policy inner: -0.7283689 +Iterations: 204 +Simulated test: ** -6.3061331406608225 ** -6.14330424747197 ** -6.159308194676414 ** -6.7286276804632505 ** -6.489008626160212 ** -6.709402448501351 ** -6.487248391670291 ** -6.669604991292581 ** -6.779719580276869 ** + Policy it 18..Sim ep: 221 +Log Std policy inner: -0.7400486 + Policy it 19..Sim ep: 219 +Log Std policy inner: -0.7182689 + Policy it 20..Sim ep: 221 +Log Std policy inner: -0.71996903 +Iterations: 207 +Simulated test: ** -5.878602141004522 ** -6.945355969892553 ** -6.409094248558395 ** -6.145546568596619 ** -6.517902809064835 ** -6.347048689196818 ** -6.373006335410755 ** -7.086345450051012 ** -6.119048497747863 ** + Policy it 21..Sim ep: 224 +Log Std policy inner: -0.7226668 + Policy it 22..Sim ep: 215 +Log Std policy inner: -0.71803826 + Policy it 23..Sim ep: 224 +Log Std policy inner: -0.70853233 +Iterations: 210 +Simulated test: ** -6.531731308698654 ** -6.45644323971821 ** -6.6459391392167895 ** -6.630791722844588 ** -6.640619029709196 ** -6.488151288991794 ** -6.4126193912443705 ** -6.773151338457828 ** -6.113887585916091 ** + Policy it 24..Sim ep: 222 +Log Std policy inner: -0.7198065 + Policy it 25..Sim ep: 227 +Log Std policy inner: -0.7080144 + Policy it 26..Sim ep: 222 +Log Std policy inner: -0.67813486 +Iterations: 213 +Simulated test: ** -5.587905780406436 ** -5.937705195562448 ** -6.023967432756908 ** -6.39356523927534 ** -6.660886850766838 ** -6.013621654876042 ** -6.181214885853697 ** -6.627359950885584 ** -6.355601613107138 ** + Policy it 27..Sim ep: 220 +Log Std policy inner: -0.6354015 + Policy it 28..Sim ep: 230 +Log Std policy inner: -0.64665216 + Policy it 29..Sim ep: 216 +Log Std policy inner: -0.6350003 +Iterations: 216 +Simulated test: ** -6.33895926483674 ** -6.172036331188865 ** -6.465962071284885 ** -6.637537536256714 ** -6.502999838553951 ** -6.8609662891243355 ** -6.293465067373472 ** -6.56598077739589 ** -7.20968149331864 ** +break +============================ 6 ============================ +step 1 state [0.4595 0.64516667 0.842 0.07766667] a [-0.06105764 0.08333333 0.04854871 0.05132144] r -0.9776365932186573 +step 2 state [0.37616667 0.7105 0.92533333 0.105 ] a [-0.08333333 0.06544201 0.08333333 0.02744578] r -0.9910905325686873 +step 3 state [0.379 0.62716667 1. 0.11466667] a [ 0.00297303 -0.08333333 0.08333333 0.00980962] r -0.998433844394755 +step 4 state [0.30566667 0.59983333 1. 0.06616667] a [-0.07325415 -0.0273318 0.08333333 -0.04842339] r -0.9985715771638932 +step 5 state [0.22233333 0.59733333 1. 0.07216667] a [-0.08333333 -0.0024268 0.08333333 0.00613228] r -0.9984043302299397 +step 6 state [0.14 0.56033333 1. 0. ] a [-0.08222041 -0.03697133 0.038254 -0.08333333] r -0.9982370870491475 +step 7 state [0.05666667 0.61283333 1. 0. ] a [-0.08333333 0.0525333 0.03606003 -0.08333333] r -0.9983044142009568 +step 8 state [0. 0.56933333 1. 0. ] a [-0.08333333 -0.04341689 0.08333333 -0.05795715] r -0.9989818908119423 +step 9 state [0. 0.61283333 0.9765 0. ] a [-0.0567205 0.0436462 -0.02339139 -0.08333333] r -0.9986382408898844 +step 10 state [0. 0.577 0.9145 0.0265] a [-0.08333333 -0.03574564 -0.06184289 0.02655047] r -0.9962134568959244 +step 1 state [0. 0.63233333 0.93283333 0.1495 ] a [-0.08333333 0.05769771 0.08333333 -0.00956804] r -0.9966109618483645 +step 2 state [0. 0.6985 1. 0.1825] a [-0.08333333 0.06632793 0.08333333 0.03315076] r -0.9982013242607451 +step 3 state [0. 0.766 1. 0.24466667] a [-0.08333333 0.06760179 0.08333333 0.06232887] r -0.998495873081453 +step 4 state [0. 0.83916667 1. 0.22383333] a [-0.0725872 0.07323726 0.0611657 -0.02077031] r -0.9974649895199024 +step 5 state [0. 0.9225 1. 0.17066667] a [-0.08333333 0.08333333 0.02538381 -0.05314247] r -0.9971901140539747 +Saved current buffer default +Ep:6 Rew:-9.91 -- Step:135 +Train set: 108 Valid set: 27 +Log Std policy: [-0.9959612 0.48174956 -2.4142418 0.38845232] -0.6350003 +maximum: 1.0354169092063725 +mb_lr: 0.001 +Model:0, iter:67 -- Old Val loss:0.009315 New Val loss:0.020855 -- New Train loss:0.002325 +Model:1, iter:45 -- Old Val loss:0.030086 New Val loss:0.029386 -- New Train loss:0.002970 +Model:2, iter:62 -- Old Val loss:0.005366 New Val loss:0.017000 -- New Train loss:0.002387 +Model:3, iter:105 -- Old Val loss:0.013468 New Val loss:0.021796 -- New Train loss:0.000800 +Model:4, iter:36 -- Old Val loss:0.012824 New Val loss:0.012154 -- New Train loss:0.004332 +Model:5, iter:41 -- Old Val loss:0.111023 New Val loss:0.017954 -- New Train loss:0.004750 +Model:6, iter:62 -- Old Val loss:0.026815 New Val loss:0.018814 -- New Train loss:0.002200 +Model:7, iter:123 -- Old Val loss:0.008961 New Val loss:0.027566 -- New Train loss:0.001498 +Model:8, iter:34 -- Old Val loss:0.011049 New Val loss:0.030841 -- New Train loss:0.003538 + Policy it 0..Sim ep: 218 +Log Std policy inner: -0.6341361 + Policy it 1..Sim ep: 211 +Log Std policy inner: -0.62730086 + Policy it 2..Sim ep: 217 +Log Std policy inner: -0.63789237 +Iterations: 219 +Simulated test: ** -7.869369224971742 ** -7.2325188231165525 ** -6.909238515848992 ** -6.904671405552653 ** -7.234860864530783 ** -6.913714010766707 ** -7.067903764461807 ** -6.957884401520714 ** -7.246959014495369 ** + Policy it 3..Sim ep: 216 +Log Std policy inner: -0.66439736 + Policy it 4..Sim ep: 211 +Log Std policy inner: -0.6598607 + Policy it 5..Sim ep: 219 +Log Std policy inner: -0.6545788 +Iterations: 222 +Simulated test: ** -6.45323640064802 ** -7.100879036879633 ** -7.0501371159462725 ** -7.210030824022834 ** -7.34970635799109 ** -7.104197195246815 ** -7.033698957320302 ** -7.342352066019084 ** -7.039824829185672 ** + Policy it 6..Sim ep: 210 +Log Std policy inner: -0.6548027 + Policy it 7..Sim ep: 216 +Log Std policy inner: -0.6902428 + Policy it 8..Sim ep: 216 +Log Std policy inner: -0.6818647 +Iterations: 225 +Simulated test: ** -6.67196671853133 ** -7.517590395566076 ** -7.196486825308529 ** -7.085383266825229 ** -7.549926918437995 ** -6.480078379465267 ** -6.573688447473105 ** -6.620900518028066 ** -6.532195990670007 ** + Policy it 9..Sim ep: 218 +Log Std policy inner: -0.69358927 + Policy it 10..Sim ep: 218 +Log Std policy inner: -0.69684964 + Policy it 11..Sim ep: 214 +Log Std policy inner: -0.730623 +Iterations: 228 +Simulated test: ** -7.229450155352242 ** -7.787253444287926 ** -6.886688499386073 ** -7.283822814433369 ** -7.481378358839429 ** -6.724161320824642 ** -6.9135401455685495 ** -7.322409943413804 ** -7.061893424761947 ** +break +============================ 7 ============================ +step 1 state [0.11766667 0.46 0.62383333 1. ] a [-0.08333333 0.0274998 0.07246543 0.08333333] r -0.9648393522515115 +step 2 state [0.0765 0.54333333 0.70716667 1. ] a [-0.04102743 0.08333333 0.08333333 0.08333333] r -0.9819130384397787 +step 3 state [0. 0.62666667 0.7455 1. ] a [-0.08333333 0.08333333 0.03842201 0.08333333] r -0.9826104092662757 +step 4 state [0. 0.71 0.68983333 1. ] a [-0.06359451 0.08333333 -0.0556126 0.05468729] r -0.9484955876313831 +step 5 state [0. 0.71433333 0.77133333 0.91666667] a [-0.01775894 0.00448819 0.08158046 -0.08333333] r -0.962539256310639 +step 6 state [0. 0.79766667 0.85466667 0.86716667] a [-0.0073857 0.08333333 0.08333333 -0.04942092] r -0.9753653501580607 +step 7 state [0. 0.881 0.938 0.885] a [-0.08333333 0.08333333 0.08333333 0.01794192] r -0.9961708734833996 +step 8 state [0. 0.90016667 0.91983333 0.80166667] a [-0.05165786 0.01919709 -0.01808281 -0.08333333] r -0.9818928599945475 +step 9 state [0. 0.9835 1. 0.79133333] a [-0.03706595 0.08333333 0.08333333 -0.01021043] r -0.9968388798927321 +step 10 state [0. 1. 1. 0.73383333] a [-0.04788996 0.08333333 0.01390861 -0.05738212] r -0.9938931618386158 +step 1 state [0.0395 0.7855 0.6925 0.82783333] a [-0.08333333 0.08333333 0.00434572 0.08333333] r -0.40531232223556257 +step 2 state [0. 0.70216667 0.74316667 0.87433333] a [-0.08333333 -0.08333333 0.05072408 0.04658214] r -0.8532424279633555 +step 3 state [0. 0.7855 0.8265 0.95766667] a [-0.03227909 0.08333333 0.08333333 0.08333333] r -0.9904110038902983 +step 4 state [0.075 0.816 0.90766667 0.89983333] a [ 0.07516176 0.03054637 0.0812976 -0.05782157] r -0.9961414056495127 +step 5 state [0. 0.89933333 0.991 0.96766667] a [-0.08333333 0.08333333 0.08333333 0.06799879] r -0.9992283011137199 +Saved current buffer default +Ep:7 Rew:-9.57 -- Step:150 +Train set: 120 Valid set: 30 +Log Std policy: [-0.83663243 0.0357984 -2.2501538 0.12849571] -0.730623 +maximum: 1.0354169092063725 +mb_lr: 0.001 +Model:0, iter:47 -- Old Val loss:0.006578 New Val loss:0.016046 -- New Train loss:0.002878 +Model:1, iter:57 -- Old Val loss:0.009857 New Val loss:0.010492 -- New Train loss:0.002467 +Model:2, iter:43 -- Old Val loss:0.007272 New Val loss:0.010426 -- New Train loss:0.003166 +Model:3, iter:43 -- Old Val loss:0.003494 New Val loss:0.007973 -- New Train loss:0.002674 +Model:4, iter:96 -- Old Val loss:0.008936 New Val loss:0.013754 -- New Train loss:0.001225 +Model:5, iter:44 -- Old Val loss:0.009133 New Val loss:0.006846 -- New Train loss:0.004024 +Model:6, iter:97 -- Old Val loss:0.004491 New Val loss:0.014587 -- New Train loss:0.001407 +Model:7, iter:28 -- Old Val loss:0.004711 New Val loss:0.015332 -- New Train loss:0.004085 +Model:8, iter:50 -- Old Val loss:0.015580 New Val loss:0.023189 -- New Train loss:0.002517 + Policy it 0..Sim ep: 214 +Log Std policy inner: -0.739878 + Policy it 1..Sim ep: 212 +Log Std policy inner: -0.7505743 + Policy it 2..Sim ep: 220 +Log Std policy inner: -0.7649376 +Iterations: 231 +Simulated test: ** -7.075930379261263 ** -6.229300639582798 ** -6.447870303993113 ** -6.483544105719775 ** -6.826932771342399 ** -6.085419232686982 ** -7.149637015682528 ** -6.459884637278737 ** -6.730178200820956 ** + Policy it 3..Sim ep: 223 +Log Std policy inner: -0.82417685 + Policy it 4..Sim ep: 220 +Log Std policy inner: -0.8298615 + Policy it 5..Sim ep: 217 +Log Std policy inner: -0.8368079 +Iterations: 234 +Simulated test: ** -5.506562806544825 ** -6.054979759472481 ** -6.4558760132352475 ** -6.631491260165348 ** -6.42307985921856 ** -6.670674382774159 ** -6.867280070327688 ** -6.313853577664122 ** -7.018487004991621 ** + Policy it 6..Sim ep: 218 +Log Std policy inner: -0.83684003 + Policy it 7..Sim ep: 219 +Log Std policy inner: -0.84773326 + Policy it 8..Sim ep: 226 +Log Std policy inner: -0.86178315 +Iterations: 237 +Simulated test: ** -6.551697412315989 ** -6.5382868617307395 ** -6.025054758491461 ** -6.87022848182387 ** -6.869077971815131 ** -7.283980895908317 ** -6.655337728881277 ** -6.241184567422606 ** -6.391083579427214 ** + Policy it 9..Sim ep: 222 +Log Std policy inner: -0.8717991 + Policy it 10..Sim ep: 226 +Log Std policy inner: -0.9072531 + Policy it 11..Sim ep: 215 +Log Std policy inner: -0.9670486 +Iterations: 240 +Simulated test: ** -6.758468954805284 ** -6.850994498264044 ** -6.217168403831311 ** -6.845581603284227 ** -6.583263076427393 ** -6.077425624661846 ** -6.731149478851584 ** -6.787283037886954 ** -7.137366407895461 ** + Policy it 12..Sim ep: 222 +Log Std policy inner: -0.98541176 + Policy it 13..Sim ep: 218 +Log Std policy inner: -0.94020826 + Policy it 14..Sim ep: 218 +Log Std policy inner: -0.9749316 +Iterations: 243 +Simulated test: ** -6.835946172093973 ** -6.526453369557857 ** -7.269474920653738 ** -6.6731801593024285 ** -6.3389719595410865 ** -7.211486561002675 ** -6.907667064933339 ** -6.308893164684996 ** -7.198778573680902 ** + Policy it 15..Sim ep: 226 +Log Std policy inner: -1.0263629 + Policy it 16..Sim ep: 230 +Log Std policy inner: -1.0493561 + Policy it 17..Sim ep: 215 +Log Std policy inner: -1.053001 +Iterations: 246 +Simulated test: ** -6.833627407089807 ** -6.135547832858283 ** -6.687773186285049 ** -6.2342911752872165 ** -5.643106778787915 ** -6.901084779412777 ** -6.397620792509988 ** -6.857900066872826 ** -7.109838126858522 ** + Policy it 18..Sim ep: 218 +Log Std policy inner: -1.0349449 + Policy it 19..Sim ep: 227 +Log Std policy inner: -1.014108 + Policy it 20..Sim ep: 222 +Log Std policy inner: -1.0396354 +Iterations: 249 +Simulated test: ** -6.179231077590957 ** -6.264921236308291 ** -7.024058403415839 ** -6.360137112811207 ** -6.403138583163964 ** -6.886073422324698 ** -5.17805716374889 ** -6.763959689529147 ** -6.543123335652053 ** + Policy it 21..Sim ep: 236 +Log Std policy inner: -1.0719929 + Policy it 22..Sim ep: 222 +Log Std policy inner: -1.0730315 + Policy it 23..Sim ep: 223 +Log Std policy inner: -1.1003125 +Iterations: 252 +Simulated test: ** -6.859790170961642 ** -5.542040897342522 ** -6.585909891036572 ** -7.057990971742547 ** -5.667754745380953 ** -5.691971328158397 ** -6.082041803585598 ** -6.133613524609245 ** -6.938357100908179 ** + Policy it 24..Sim ep: 225 +Log Std policy inner: -1.131126 + Policy it 25..Sim ep: 218 +Log Std policy inner: -1.1669322 + Policy it 26..Sim ep: 221 +Log Std policy inner: -1.2107602 +Iterations: 255 +Simulated test: ** -6.874238347215578 ** -6.935412433964666 ** -6.340334873187821 ** -6.07535525948042 ** -5.3822183724551 ** -5.154949901074869 ** -6.166779521460412 ** -6.031595991590584 ** -6.554271267431322 ** + Policy it 27..Sim ep: 231 +Log Std policy inner: -1.1904758 + Policy it 28..Sim ep: 234 +Log Std policy inner: -1.1762348 + Policy it 29..Sim ep: 231 +Log Std policy inner: -1.1840609 +Iterations: 258 +Simulated test: ** -5.737825618364295 ** -5.884645655401982 ** -6.1889049148675985 ** -6.621234865171136 ** -5.490044208955951 ** -6.417675388510106 ** -6.577883207749109 ** -6.020355788929737 ** -5.91903478815686 ** + Policy it 30..Sim ep: 219 +Log Std policy inner: -1.2179593 + Policy it 31..Sim ep: 230 +Log Std policy inner: -1.2142694 + Policy it 32..Sim ep: 225 +Log Std policy inner: -1.1887337 +Iterations: 261 +Simulated test: ** -5.880569546296028 ** -5.751069515408017 ** -5.893053577116225 ** -5.417338360786671 ** -6.445337556162849 ** -6.839359580548189 ** -5.9567051998412355 ** -5.867270734923659 ** -6.087623094761511 ** + Policy it 33..Sim ep: 228 +Log Std policy inner: -1.1298459 + Policy it 34..Sim ep: 226 +Log Std policy inner: -1.1442845 + Policy it 35..Sim ep: 223 +Log Std policy inner: -1.130064 +Iterations: 264 +Simulated test: ** -6.38507386721496 ** -5.667862142683007 ** -6.179171035215404 ** -6.221147881040816 ** -6.003307686843909 ** -6.056461467567133 ** -5.882170929327549 ** -5.984484529722249 ** -5.667518528500804 ** + Policy it 36..Sim ep: 228 +Log Std policy inner: -1.1431403 + Policy it 37..Sim ep: 228 +Log Std policy inner: -1.1202205 + Policy it 38..Sim ep: 228 +Log Std policy inner: -1.137487 +Iterations: 267 +Simulated test: ** -6.63213458288461 ** -5.951131151290319 ** -6.376950431133737 ** -5.903712778475601 ** -5.707755799220177 ** -5.932147441408597 ** -6.166816759541543 ** -5.735390336955898 ** -6.649764574218425 ** + Policy it 39..Sim ep: 224 +Log Std policy inner: -1.1547527 + Policy it 40..Sim ep: 223 +Log Std policy inner: -1.1810695 + Policy it 41..Sim ep: 229 +Log Std policy inner: -1.1974363 +Iterations: 270 +Simulated test: ** -6.920388126175385 ** -5.609706001798623 ** -5.926406769984169 ** -6.121725200565997 ** -6.016260839935858 ** -5.047491058362648 ** -6.199802164373686 ** -6.078618093817495 ** -6.142857445657719 ** + Policy it 42..Sim ep: 228 +Log Std policy inner: -1.2186061 + Policy it 43..Sim ep: 226 +Log Std policy inner: -1.2065203 + Policy it 44..Sim ep: 227 +Log Std policy inner: -1.2163006 +Iterations: 273 +Simulated test: ** -6.158945416659117 ** -5.766456356591079 ** -6.169758372814394 ** -5.955601944265072 ** -5.713826472110814 ** -4.91761788426491 ** -5.64034756742767 ** -6.436668072792527 ** -6.553568796783802 ** + Policy it 45..Sim ep: 227 +Log Std policy inner: -1.2123488 + Policy it 46..Sim ep: 243 +Log Std policy inner: -1.2378304 + Policy it 47..Sim ep: 227 +Log Std policy inner: -1.253916 +Iterations: 276 +Simulated test: ** -5.972605782623432 ** -5.212319503636682 ** -6.177185952403233 ** -5.401780085449572 ** -5.595033094780519 ** -5.7663447003276085 ** -5.635197726453189 ** -5.387134131797938 ** -5.392835894005839 ** + Policy it 48..Sim ep: 226 +Log Std policy inner: -1.2585598 + Policy it 49..Sim ep: 230 +Log Std policy inner: -1.27614 +============================ 8 ============================ +step 1 state [0.22966667 0.78666667 0.59433333 0.64533333] a [ 0.05298962 0.02522057 -0.06457154 0.0034074 ] r 0.03849362061246331 +step 1 state [0.42066667 0.36916667 0.08833333 0.10266667] a [-0.04591144 0.07928555 -0.05849537 0.06269497] r -0.9952168094243352 +step 2 state [0.33733333 0.371 0.01516667 0.186 ] a [-0.08333333 0.00185298 -0.07301143 0.08333333] r -0.9982894440286292 +step 3 state [0.38383333 0.34816667 0.0945 0.186 ] a [ 4.65774337e-02 -2.27450927e-02 7.93833435e-02 2.12242206e-05] r -0.9809036111016889 +step 4 state [0.30883333 0.4315 0.11933333 0.17316667] a [-0.07497013 0.08333333 0.02484583 -0.01273879] r -0.9890809525360997 +step 5 state [0.32383333 0.51483333 0.14116667 0.24366667] a [0.01500642 0.08333333 0.02190554 0.07060453] r -0.9708698237605804 +step 6 state [0.2405 0.5015 0.12416667 0.31783333] a [-0.08333333 -0.01330704 -0.01695076 0.07418419] r -0.9789924289301147 +step 7 state [0.15716667 0.4905 0.1585 0.26133333] a [-0.08333333 -0.01094487 0.03434322 -0.05649031] r -0.9791572238089336 +step 8 state [0.07383333 0.57383333 0.155 0.34466667] a [-0.08333333 0.08333333 -0.00343547 0.08333333] r -0.9599421107964049 +step 9 state [0.05383333 0.562 0.23833333 0.428 ] a [-0.01995823 -0.01170283 0.08333333 0.08333333] r -0.565969225824772 +step 10 state [0. 0.64533333 0.28683333 0.51133333] a [-0.08254606 0.08333333 0.04858086 0.08333333] r -0.3147798252704793 +step 1 state [0.56633333 0.23116667 0.77183333 0.6285 ] a [-0.05880973 0.08333333 0.06776452 0.03503031] r -0.9713264931177845 +step 2 state [0.483 0.3145 0.85516667 0.71183333] a [-0.08333333 0.08333333 0.08333333 0.08333333] r -0.9962784100907885 +step 3 state [0.39966667 0.39783333 0.9385 0.79516667] a [-0.08333333 0.08333333 0.08333333 0.08333333] r -0.9996096334619398 +step 4 state [0.31633333 0.4535 0.98766667 0.8785 ] a [-0.08333333 0.05573495 0.04928867 0.08333333] r -0.9999637071329629 +Saved current buffer default +Ep:8 Rew:-3.19 -- Step:165 +Train set: 132 Valid set: 33 +Log Std policy: [-1.2772123 -1.6159383 -1.1516927 -1.0597168] -1.27614 +maximum: 1.0769872412249266 +mb_lr: 0.001 +Model:0, iter:76 -- Old Val loss:0.031308 New Val loss:0.070175 -- New Train loss:0.001244 +Model:1, iter:50 -- Old Val loss:0.020289 New Val loss:0.064793 -- New Train loss:0.002645 +Model:2, iter:35 -- Old Val loss:0.036956 New Val loss:0.053860 -- New Train loss:0.004799 +Model:3, iter:27 -- Old Val loss:0.024174 New Val loss:0.063203 -- New Train loss:0.005530 +Model:4, iter:32 -- Old Val loss:0.040825 New Val loss:0.065556 -- New Train loss:0.004962 +Model:5, iter:46 -- Old Val loss:0.036672 New Val loss:0.062275 -- New Train loss:0.002967 +Model:6, iter:55 -- Old Val loss:0.032199 New Val loss:0.076804 -- New Train loss:0.002157 +Model:7, iter:33 -- Old Val loss:0.035099 New Val loss:0.053482 -- New Train loss:0.004815 +Model:8, iter:57 -- Old Val loss:0.036956 New Val loss:0.071471 -- New Train loss:0.001895 + Policy it 0..Sim ep: 214 +Log Std policy inner: -1.2614723 + Policy it 1..Sim ep: 221 +Log Std policy inner: -1.2619703 + Policy it 2..Sim ep: 230 +Log Std policy inner: -1.2956076 +Iterations: 281 +Simulated test: ** -5.9154358967900045 ** -5.607767906600493 ** -5.825385672767879 ** -5.7825224676902875 ** -5.50286251256126 ** -5.674316593422555 ** -5.470836413850775 ** -5.722005441051442 ** -5.9293603613856245 ** + Policy it 3..Sim ep: 223 +Log Std policy inner: -1.341992 + Policy it 4..Sim ep: 220 +Log Std policy inner: -1.3568732 + Policy it 5..Sim ep: 222 +Log Std policy inner: -1.3414447 +Iterations: 284 +Simulated test: ** -5.110321991208475 ** -5.784301069222856 ** -5.834317748394387 ** -6.597116442755796 ** -5.573635008046404 ** -5.478849440928025 ** -6.167168490402982 ** -5.635858163475059 ** -5.736654309523292 ** + Policy it 6..Sim ep: 226 +Log Std policy inner: -1.3461063 + Policy it 7..Sim ep: 221 +Log Std policy inner: -1.3113725 + Policy it 8..Sim ep: 219 +Log Std policy inner: -1.2956781 +Iterations: 287 +Simulated test: ** -5.39107880145777 ** -5.563069674541184 ** -5.282739878387074 ** -5.913973490266362 ** -5.776788642807515 ** -5.827032230321784 ** -5.620581584571628 ** -5.369477356360876 ** -6.034744732079562 ** + Policy it 9..Sim ep: 226 +Log Std policy inner: -1.2601666 + Policy it 10..Sim ep: 229 +Log Std policy inner: -1.2648945 + Policy it 11..Sim ep: 223 +Log Std policy inner: -1.2760807 +Iterations: 290 +Simulated test: ** -5.221368772810238 ** -4.762462281593762 ** -6.040697259237058 ** -6.154109766394831 ** -5.590322461309842 ** -5.297979973362526 ** -5.3345837446523365 ** -5.948305162181787 ** -5.5345756734651514 ** + Policy it 12..Sim ep: 227 +Log Std policy inner: -1.2795877 + Policy it 13..Sim ep: 229 +Log Std policy inner: -1.2972158 + Policy it 14..Sim ep: 227 +Log Std policy inner: -1.2570375 +Iterations: 293 +Simulated test: ** -5.63916416849941 ** -4.999299177104258 ** -5.255721647528117 ** -5.041110476443136 ** -5.338745686075999 ** -5.359268644151744 ** -5.700069945558207 ** -5.609796099369414 ** -5.318256587991782 ** + Policy it 15..Sim ep: 226 +Log Std policy inner: -1.2683522 + Policy it 16..Sim ep: 230 +Log Std policy inner: -1.2337191 + Policy it 17..Sim ep: 224 +Log Std policy inner: -1.2293674 +Iterations: 296 +Simulated test: ** -4.472440794556751 ** -5.285158389740973 ** -5.591026213360019 ** -5.613506421814673 ** -5.1509122216457035 ** -5.10809492769913 ** -5.248587924095919 ** -5.4695961617567805 ** -5.298359978042426 ** + Policy it 18..Sim ep: 221 +Log Std policy inner: -1.2039182 + Policy it 19..Sim ep: 236 +Log Std policy inner: -1.2061968 + Policy it 20..Sim ep: 232 +Log Std policy inner: -1.215647 +Iterations: 299 +Simulated test: ** -4.562362954549026 ** -4.6337766119359 ** -5.782928629554808 ** -5.118122596987523 ** -4.775127342592459 ** -4.398422788373137 ** -5.171455690371804 ** -5.447418321811129 ** -5.0426614337973295 ** + Policy it 21..Sim ep: 226 +Log Std policy inner: -1.2068365 + Policy it 22..Sim ep: 238 +Log Std policy inner: -1.2163467 + Policy it 23..Sim ep: 234 +Log Std policy inner: -1.2348359 +Iterations: 302 +Simulated test: ** -5.164044317934604 ** -4.992701227866346 ** -5.074647097190027 ** -5.339175185459316 ** -5.52574767125072 ** -5.783522345316596 ** -4.825178678713565 ** -5.385441693312023 ** -5.118410609819548 ** + Policy it 24..Sim ep: 228 +Log Std policy inner: -1.2444385 + Policy it 25..Sim ep: 236 +Log Std policy inner: -1.2733375 + Policy it 26..Sim ep: 237 +Log Std policy inner: -1.2867402 +Iterations: 305 +Simulated test: ** -4.6514920138422164 ** -4.496135172665817 ** -4.950436442254577 ** -5.244910797719495 ** -5.059424332871568 ** -5.069807490889798 ** -5.927117782128043 ** -5.364170592259615 ** -4.989316556039267 ** + Policy it 27..Sim ep: 239 +Log Std policy inner: -1.2790132 + Policy it 28..Sim ep: 231 +Log Std policy inner: -1.3080683 + Policy it 29..Sim ep: 229 +Log Std policy inner: -1.31125 +Iterations: 308 +Simulated test: ** -5.266998996259353 ** -5.179592306285631 ** -4.42708935849223 ** -5.117235554884683 ** -5.513768735564081 ** -4.949639781874139 ** -5.042493809179868 ** -5.490727158421651 ** -4.942373574750963 ** + Policy it 30..Sim ep: 234 +Log Std policy inner: -1.335475 + Policy it 31..Sim ep: 230 +Log Std policy inner: -1.3352563 + Policy it 32..Sim ep: 238 +Log Std policy inner: -1.3351269 +Iterations: 311 +Simulated test: ** -5.320606136944844 ** -5.271466742485063 ** -5.224254337038146 ** -5.30545163730887 ** -5.017005814205622 ** -4.972562555950135 ** -5.341733282697387 ** -5.157319521009922 ** -5.0475092079269235 ** +break +============================ 9 ============================ +step 1 state [0.6735 0.23683333 0.67233333 0.91833333] a [-0.08333333 0.06821615 0.08333333 0.05692398] r -0.9963780651704468 +step 2 state [0.59016667 0.32016667 0.71983333 1. ] a [-0.08333333 0.08333333 0.04763453 0.08333333] r -0.9991384589826245 +step 3 state [0.50683333 0.4035 0.80316667 1. ] a [-0.08333333 0.08333333 0.08333333 0.07228646] r -0.9995658386076333 +step 4 state [0.4235 0.48683333 0.8865 1. ] a [-0.08333333 0.08333333 0.08333333 0.08113874] r -0.9998900593711374 +step 5 state [0.3425 0.55133333 0.96433333 1. ] a [-0.08088258 0.06465472 0.07787336 0.05032939] r -0.9998851469353267 +step 6 state [0.42583333 0.468 1. 1. ] a [ 0.08333333 -0.08333333 0.0805892 0.02578268] r -1.0002092425245657 +step 7 state [0.47883333 0.38466667 1. 1. ] a [ 0.0531278 -0.08333333 0.08060243 0.04312839] r -1.0002436090163203 +step 8 state [0.56216667 0.3315 1. 1. ] a [ 0.08333333 -0.05303567 0.08333333 0.08333333] r -1.0002485185151422 +step 9 state [0.58766667 0.25833333 1. 1. ] a [ 0.02553286 -0.07302548 0.08333333 0.08333333] r -1.0001306905434124 +step 10 state [0.671 0.175 1. 1. ] a [ 0.08333333 -0.08333333 0.08333333 0.05033217] r -1.000037410065793 +step 1 state [0.079 0.17783333 0.16516667 0.32083333] a [-0.07607132 0.08333333 0.08333333 0.07387534] r -0.5033370900761144 +step 2 state [0. 0.2075 0.23583333 0.39383333] a [-0.08333333 0.02975158 0.07083122 0.07300692] r -0.24874194855497633 +step 3 state [0. 0.29083333 0.18316667 0.43716667] a [-0.02749481 0.08333333 -0.05256349 0.04339463] r -0.2919389713657231 +step 4 state [0. 0.3425 0.258 0.5205] a [-0.03627913 0.05167402 0.07493946 0.08333333] r -0.0822600923029092 +step 5 state [0. 0.42583333 0.31583333 0.60383333] a [-0.08333333 0.08333333 0.05791972 0.08333333] r 0.0770248943627696 +Saved current buffer default +Ep:9 Rew:-3.54 -- Step:180 +Train set: 144 Valid set: 36 +Log Std policy: [-1.4666332 -2.1128166 0.14819315 -1.9092511 ] -1.3351269 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:32 -- Old Val loss:0.025796 New Val loss:0.055386 -- New Train loss:0.004831 +Model:1, iter:42 -- Old Val loss:0.020196 New Val loss:0.032827 -- New Train loss:0.004144 +Model:2, iter:37 -- Old Val loss:0.028238 New Val loss:0.028534 -- New Train loss:0.003993 +Model:3, iter:56 -- Old Val loss:0.051619 New Val loss:0.029773 -- New Train loss:0.001934 +Model:4, iter:31 -- Old Val loss:0.026135 New Val loss:0.034855 -- New Train loss:0.005531 +Model:5, iter:113 -- Old Val loss:0.023617 New Val loss:0.032641 -- New Train loss:0.000604 +Model:6, iter:22 -- Old Val loss:0.026493 New Val loss:0.052342 -- New Train loss:0.005896 +Model:7, iter:39 -- Old Val loss:0.026629 New Val loss:0.028695 -- New Train loss:0.003255 +Model:8, iter:63 -- Old Val loss:0.026074 New Val loss:0.025194 -- New Train loss:0.001441 + Policy it 0..Sim ep: 239 +Log Std policy inner: -1.3142918 + Policy it 1..Sim ep: 249 +Log Std policy inner: -1.3485262 + Policy it 2..Sim ep: 245 +Log Std policy inner: -1.3527349 +Iterations: 314 +Simulated test: ** -7.035768718463369 ** -6.767683692388236 ** -6.243127854062477 ** -6.249172213927377 ** -5.648143993092235 ** -5.500557541913004 ** -5.662764012960833 ** -5.7065242485783525 ** -5.573644285284681 ** + Policy it 3..Sim ep: 244 +Log Std policy inner: -1.3759329 + Policy it 4..Sim ep: 258 +Log Std policy inner: -1.37362 + Policy it 5..Sim ep: 255 +Log Std policy inner: -1.3626547 +Iterations: 317 +Simulated test: ** -7.13919180134777 ** -6.793567861469928 ** -5.5059588344371875 ** -5.811584998390171 ** -5.186027720263228 ** -5.780590557034884 ** -5.745665466953069 ** -5.531294616300147 ** -5.5365843412495455 ** + Policy it 6..Sim ep: 254 +Log Std policy inner: -1.3697741 + Policy it 7..Sim ep: 247 +Log Std policy inner: -1.3853354 + Policy it 8..Sim ep: 243 +Log Std policy inner: -1.3792036 +Iterations: 320 +Simulated test: ** -6.892314064854291 ** -6.5878975333762355 ** -6.048239682270214 ** -5.646600879232865 ** -5.888920319918543 ** -5.58194050565362 ** -5.545792429400754 ** -5.5488771965383785 ** -5.997883695734199 ** + Policy it 9..Sim ep: 253 +Log Std policy inner: -1.4123871 + Policy it 10..Sim ep: 243 +Log Std policy inner: -1.4344711 + Policy it 11..Sim ep: 244 +Log Std policy inner: -1.4475899 +Iterations: 323 +Simulated test: ** -7.247048581214622 ** -6.687784046919696 ** -5.356025612058584 ** -6.227140933961055 ** -5.43074242630857 ** -5.450292658484541 ** -5.2811819560569715 ** -5.946161738049995 ** -5.2447675967728715 ** + Policy it 12..Sim ep: 245 +Log Std policy inner: -1.4492623 + Policy it 13..Sim ep: 244 +Log Std policy inner: -1.4852448 + Policy it 14..Sim ep: 252 +Log Std policy inner: -1.4940093 +Iterations: 326 +Simulated test: ** -7.457662394694053 ** -6.854641807274893 ** -5.880431227628142 ** -5.847485048731324 ** -5.413292016297346 ** -5.412871209220903 ** -5.214348050304689 ** -5.3999478035490025 ** -5.669742405379656 ** + Policy it 15..Sim ep: 245 +Log Std policy inner: -1.479994 + Policy it 16..Sim ep: 238 +Log Std policy inner: -1.473659 + Policy it 17..Sim ep: 245 +Log Std policy inner: -1.4583083 +Iterations: 329 +Simulated test: ** -6.435403930885368 ** -6.363080602327828 ** -5.921068074792275 ** -5.664209975134582 ** -5.494369317351374 ** -5.067847843334311 ** -5.762355559068965 ** -5.043612839716952 ** -5.716919399431791 ** + Policy it 18..Sim ep: 246 +Log Std policy inner: -1.4714742 + Policy it 19..Sim ep: 255 +Log Std policy inner: -1.4868678 + Policy it 20..Sim ep: 253 +Log Std policy inner: -1.5035083 +Iterations: 332 +Simulated test: ** -6.805984759842977 ** -6.2648921196593435 ** -6.011013599012804 ** -6.055150592516875 ** -5.734408484869636 ** -4.961418144764611 ** -5.537225648541935 ** -5.46498543344438 ** -5.930863180705782 ** + Policy it 21..Sim ep: 247 +Log Std policy inner: -1.501362 + Policy it 22..Sim ep: 246 +Log Std policy inner: -1.487319 + Policy it 23..Sim ep: 252 +Log Std policy inner: -1.4921474 +Iterations: 335 +Simulated test: ** -6.576378054119996 ** -6.982760663339868 ** -6.252117561938649 ** -6.378705710854847 ** -5.1986972130776845 ** -4.910535996161052 ** -5.58418819944025 ** -5.744299247525633 ** -5.902854647598579 ** + Policy it 24..Sim ep: 243 +Log Std policy inner: -1.5068778 + Policy it 25..Sim ep: 255 +Log Std policy inner: -1.4928318 + Policy it 26..Sim ep: 253 +Log Std policy inner: -1.4736862 +Iterations: 338 +Simulated test: ** -6.790438791267807 ** -6.696634468622506 ** -5.850346773257479 ** -5.5269764719926755 ** -5.266540918382234 ** -5.010365808864008 ** -5.410205966795329 ** -5.318231917631347 ** -5.337443829472177 ** + Policy it 27..Sim ep: 240 +Log Std policy inner: -1.4543345 + Policy it 28..Sim ep: 245 +Log Std policy inner: -1.4892614 + Policy it 29..Sim ep: 241 +Log Std policy inner: -1.4953567 +Iterations: 341 +Simulated test: ** -6.9870956014562395 ** -6.79652489460539 ** -5.2021757680823795 ** -6.428077445873059 ** -6.1476179216936 ** -5.601442516993266 ** -5.558395151065197 ** -5.638512327724602 ** -4.860878207045607 ** +break +============================ 10 ============================ +step 1 state [0.23133333 0.121 0.4415 0.11316667] a [-0.08333333 0.08333333 -0.08333333 0.07904451] r -0.9133522023393907 +step 2 state [0.18533333 0.20433333 0.35816667 0.16066667] a [-0.04590009 0.08333333 -0.08333333 0.04765205] r -0.8444081934808414 +step 3 state [0.102 0.28766667 0.35533333 0.23483333] a [-0.08333333 0.08333333 -0.00283056 0.07429221] r -0.5942478211608786 +step 4 state [0.01866667 0.33283333 0.3215 0.31816667] a [-0.08333333 0.04525377 -0.0336932 0.08333333] r -0.359247614245814 +step 5 state [0. 0.41616667 0.3025 0.257 ] a [-0.08333333 0.08333333 -0.01887657 -0.06100396] r -0.6955290450956644 +step 6 state [0. 0.4995 0.25 0.2875] a [-0.08333333 0.08333333 -0.05244268 0.03051555] r -0.7660860084729102 +step 7 state [0. 0.57016667 0.2 0.37083333] a [-0.06775189 0.0708036 -0.04995783 0.08333333] r -0.6055397081046443 +step 8 state [0. 0.6535 0.207 0.43683333] a [-0.02131889 0.08333333 0.00716266 0.06606407] r -0.5451236147492513 +step 9 state [0. 0.73683333 0.29033333 0.52016667] a [-0.08333333 0.08333333 0.08333333 0.08333333] r -0.20036447336081542 +step 10 state [0. 0.813 0.322 0.567] a [-0.08333333 0.0762296 0.03181033 0.04687697] r -0.21067603899916876 +step 1 state [0.52516667 0.54616667 0. 0.00916667] a [-0.02205891 0.08243082 -0.06216575 -0.03371823] r -1.0000226714666516 +step 2 state [0.44183333 0.6295 0.02533333 0. ] a [-0.08333333 0.08333333 0.02548971 -0.05122475] r -1.0000275787787172 +step 3 state [0.36516667 0.71283333 0.00416667 0.025 ] a [-0.0766629 0.08333333 -0.02108196 0.02505259] r -0.999934339849473 +step 4 state [0.28183333 0.6895 0. 0.08966667] a [-0.08333333 -0.0232098 -0.06547878 0.06468551] r -1.000106095771765 +step 5 state [0.1985 0.77283333 0. 0.14533333] a [-0.08333333 0.08333333 -0.08333333 0.05570139] r -1.0001505288819812 +Saved current buffer default +Ep:10 Rew:-1.47 -- Step:195 +Train set: 156 Valid set: 39 +Log Std policy: [-1.7536619 -2.3254843 0.32392627 -2.2262065 ] -1.4953567 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:78 -- Old Val loss:0.032016 New Val loss:0.016522 -- New Train loss:0.001545 +Model:1, iter:71 -- Old Val loss:0.023626 New Val loss:0.016919 -- New Train loss:0.001373 +Model:2, iter:119 -- Old Val loss:0.027454 New Val loss:0.009610 -- New Train loss:0.000878 +Model:3, iter:128 -- Old Val loss:0.016659 New Val loss:0.017275 -- New Train loss:0.000669 +Model:4, iter:36 -- Old Val loss:0.026790 New Val loss:0.026449 -- New Train loss:0.005564 +Model:5, iter:121 -- Old Val loss:0.024587 New Val loss:0.015540 -- New Train loss:0.000683 +Model:6, iter:73 -- Old Val loss:0.033147 New Val loss:0.029826 -- New Train loss:0.001840 +Model:7, iter:86 -- Old Val loss:0.025195 New Val loss:0.022398 -- New Train loss:0.001611 +Model:8, iter:56 -- Old Val loss:0.021348 New Val loss:0.014081 -- New Train loss:0.001868 + Policy it 0..Sim ep: 283 +Log Std policy inner: -1.4872303 + Policy it 1..Sim ep: 269 +Log Std policy inner: -1.525423 + Policy it 2..Sim ep: 275 +Log Std policy inner: -1.5701447 +Iterations: 344 +Simulated test: ** -5.8855892356159165 ** -5.369192079220957 ** -5.789472084473818 ** -4.558261390051339 ** -4.780741367189912 ** -4.562546079584863 ** -5.3208645162452015 ** -5.627450551195361 ** -4.284679091131547 ** + Policy it 3..Sim ep: 273 +Log Std policy inner: -1.5814364 + Policy it 4..Sim ep: 275 +Log Std policy inner: -1.5854559 + Policy it 5..Sim ep: 260 +Log Std policy inner: -1.5952637 +Iterations: 347 +Simulated test: ** -5.99221459653927 ** -5.104481176364934 ** -5.907115074477624 ** -4.980187640297227 ** -4.996757277315482 ** -5.147459109311458 ** -4.77116376823571 ** -4.725036548010539 ** -4.4885160111100415 ** + Policy it 6..Sim ep: 273 +Log Std policy inner: -1.5975437 + Policy it 7..Sim ep: 267 +Log Std policy inner: -1.6128168 + Policy it 8..Sim ep: 274 +Log Std policy inner: -1.6407435 +Iterations: 350 +Simulated test: ** -5.913793428119971 ** -4.4600265771651175 ** -5.592985547669232 ** -4.97644192387932 ** -4.269345885349903 ** -5.204448189372197 ** -4.9427748552314 ** -4.884363863549661 ** -4.838353817112511 ** + Policy it 9..Sim ep: 261 +Log Std policy inner: -1.6353154 + Policy it 10..Sim ep: 264 +Log Std policy inner: -1.6538124 + Policy it 11..Sim ep: 264 +Log Std policy inner: -1.6663389 +Iterations: 353 +Simulated test: ** -6.293896444526909 ** -6.114544516809983 ** -5.260888779725792 ** -4.589428322177846 ** -4.3530528624361615 ** -4.445434945935849 ** -4.373972051204182 ** -4.493493590882863 ** -4.809225211113226 ** + Policy it 12..Sim ep: 283 +Log Std policy inner: -1.6781329 + Policy it 13..Sim ep: 278 +Log Std policy inner: -1.6875757 + Policy it 14..Sim ep: 277 +Log Std policy inner: -1.6881796 +Iterations: 356 +Simulated test: ** -5.787157639906509 ** -5.8138210613915 ** -4.755749723319895 ** -4.491010011027101 ** -4.528691344668623 ** -4.64239845026168 ** -4.546525247567334 ** -4.9601500884164125 ** -5.237130564552499 ** + Policy it 15..Sim ep: 264 +Log Std policy inner: -1.6791269 + Policy it 16..Sim ep: 270 +Log Std policy inner: -1.6742737 + Policy it 17..Sim ep: 271 +Log Std policy inner: -1.697093 +Iterations: 359 +Simulated test: ** -5.587129883926828 ** -4.6093566688010466 ** -4.892870360830566 ** -5.136217143698014 ** -4.911591867747484 ** -4.153079087763326 ** -4.149498692083871 ** -4.647020899889903 ** -4.573239599009394 ** + Policy it 18..Sim ep: 258 +Log Std policy inner: -1.7233273 + Policy it 19..Sim ep: 265 +Log Std policy inner: -1.7094574 + Policy it 20..Sim ep: 280 +Log Std policy inner: -1.7125609 +Iterations: 362 +Simulated test: ** -5.783867007024819 ** -4.229529877398163 ** -4.857802416365594 ** -5.25543651732849 ** -5.063494062868558 ** -4.653899989541096 ** -5.232927613624051 ** -4.975451330539218 ** -4.480161067063454 ** + Policy it 21..Sim ep: 256 +Log Std policy inner: -1.6998785 + Policy it 22..Sim ep: 260 +Log Std policy inner: -1.7373717 + Policy it 23..Sim ep: 270 +Log Std policy inner: -1.7465553 +Iterations: 365 +Simulated test: ** -6.151121516787389 ** -5.132461313153181 ** -4.849716333814431 ** -4.636684785715188 ** -4.3934966270904985 ** -4.646759548168629 ** -4.4156298923460415 ** -4.3428458535039685 ** -4.302711642894428 ** + Policy it 24..Sim ep: 256 +Log Std policy inner: -1.7592392 + Policy it 25..Sim ep: 271 +Log Std policy inner: -1.7666717 + Policy it 26..Sim ep: 277 +Log Std policy inner: -1.7579858 +Iterations: 368 +Simulated test: ** -6.185065902032656 ** -4.586833791786339 ** -3.980442660301924 ** -4.450531902157236 ** -4.593856520415284 ** -4.682695685983635 ** -3.7848233885900116 ** -4.472804445946822 ** -4.176387025525328 ** + Policy it 27..Sim ep: 275 +Log Std policy inner: -1.7585429 + Policy it 28..Sim ep: 271 +Log Std policy inner: -1.7814736 + Policy it 29..Sim ep: 270 +Log Std policy inner: -1.7916954 +Iterations: 371 +Simulated test: ** -6.285831741548755 ** -5.065227924654027 ** -4.708841022355482 ** -4.274834952475503 ** -4.5345032358367465 ** -4.332930473538581 ** -4.704613296219613 ** -4.42203134582378 ** -5.432813837137073 ** + Policy it 30..Sim ep: 272 +Log Std policy inner: -1.7899911 + Policy it 31..Sim ep: 268 +Log Std policy inner: -1.7938856 + Policy it 32..Sim ep: 259 +Log Std policy inner: -1.8165827 +Iterations: 374 +Simulated test: ** -5.75598296778553 ** -4.621114984820597 ** -4.767002460405347 ** -4.14580965907604 ** -3.8837578122352716 ** -3.282086520093726 ** -4.889804361290299 ** -4.3686916863056835 ** -4.852042821911164 ** + Policy it 33..Sim ep: 269 +Log Std policy inner: -1.8245994 + Policy it 34..Sim ep: 267 +Log Std policy inner: -1.8532875 + Policy it 35..Sim ep: 265 +Log Std policy inner: -1.8535976 +Iterations: 377 +Simulated test: ** -5.814574957421282 ** -4.680251636309549 ** -4.591258714694414 ** -4.720739745072206 ** -3.8511868333554595 ** -4.562442533986177 ** -4.090359901788179 ** -3.979627114823088 ** -3.8344728461519115 ** + Policy it 36..Sim ep: 268 +Log Std policy inner: -1.8577278 + Policy it 37..Sim ep: 268 +Log Std policy inner: -1.8739562 + Policy it 38..Sim ep: 275 +Log Std policy inner: -1.873275 +Iterations: 380 +Simulated test: ** -5.622999608153477 ** -4.93054307885177 ** -5.6687731479050125 ** -4.73207805308979 ** -4.028035617964342 ** -4.349687304494437 ** -4.717400748729561 ** -4.70861469257623 ** -4.267171429255978 ** +break +============================ 11 ============================ +step 1 state [0.45616667 0.438 0.37233333 0.52716667] a [-0.08333333 0.08333333 0.01495398 0.08333333] r -0.0062617131735462905 +step 1 state [0.419 0.9565 0.15333333 0.92483333] a [-0.08333333 0.08333333 0.08333333 0.06652081] r -0.9908028649847141 +step 2 state [0.365 1. 0.17583333 1. ] a [-0.05395175 0.08333333 0.0225248 0.08333333] r -0.9916296870987517 +step 3 state [0.304 1. 0.25916667 1. ] a [-0.06085023 0.08333333 0.08333333 0.04550561] r -0.9680244672769251 +step 4 state [0.22533333 1. 0.31683333 1. ] a [-0.0785475 0.03728358 0.05780521 0.08333333] r -0.939085585893505 +step 5 state [0.16583333 1. 0.40016667 1. ] a [-0.05934372 0.08333333 0.08333333 0.0202603 ] r -0.8302645298419039 +step 6 state [0.10166667 1. 0.4835 1. ] a [-0.06415744 0.08333333 0.08333333 0.05029067] r -0.6547312155357659 +step 7 state [0.02866667 1. 0.44816667 0.91666667] a [-0.07294869 0.08333333 -0.03523551 -0.08333333] r -0.38304613090599426 +step 8 state [0. 1. 0.41833333 0.88233333] a [-0.08008265 0.08333333 -0.02978705 -0.03432195] r -0.3286815410944621 +step 9 state [0. 1. 0.50166667 0.8575 ] a [-0.08333333 0.08333333 0.08333333 -0.02478381] r -0.20547087591232438 +step 10 state [0.00166667 1. 0.43633333 0.80083333] a [ 0.00178104 0.07671668 -0.06517389 -0.05666539] r -0.1416816411373294 +step 1 state [0.0325 0.45516667 0.95916667 0.63066667] a [-0.03625587 -0.06343679 0.01375022 0.08333333] r -0.997395301911741 +step 2 state [0.0135 0.40416667 1. 0.69866667] a [-0.01889442 -0.05084278 0.05439445 0.06801496] r -0.9995708565795346 +step 3 state [0.08 0.32083333 1. 0.7785 ] a [ 0.0665423 -0.08333333 0.04785701 0.07996275] r -1.0001258253467458 +step 4 state [0.1545 0.2375 1. 0.86183333] a [ 0.07464162 -0.08333333 0.08333333 0.08333333] r -1.0000816242944899 +Saved current buffer default +Ep:11 Rew:-0.94 -- Step:210 +Train set: 168 Valid set: 42 +Log Std policy: [-2.3267365 -2.707216 -0.12601092 -2.3331368 ] -1.873275 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:42 -- Old Val loss:0.010066 New Val loss:0.015823 -- New Train loss:0.002642 +Model:1, iter:116 -- Old Val loss:0.016264 New Val loss:0.023414 -- New Train loss:0.000659 +Model:2, iter:70 -- Old Val loss:0.010028 New Val loss:0.009992 -- New Train loss:0.001142 +Model:3, iter:60 -- Old Val loss:0.011729 New Val loss:0.014682 -- New Train loss:0.001197 +Model:4, iter:76 -- Old Val loss:0.028075 New Val loss:0.018067 -- New Train loss:0.001355 +Model:5, iter:76 -- Old Val loss:0.008586 New Val loss:0.014985 -- New Train loss:0.001045 +Model:6, iter:124 -- Old Val loss:0.024362 New Val loss:0.016724 -- New Train loss:0.000395 +Model:7, iter:73 -- Old Val loss:0.013550 New Val loss:0.014110 -- New Train loss:0.001009 +Model:8, iter:83 -- Old Val loss:0.016758 New Val loss:0.022745 -- New Train loss:0.000681 + Policy it 0..Sim ep: 245 +Log Std policy inner: -1.8850797 + Policy it 1..Sim ep: 240 +Log Std policy inner: -1.8845494 + Policy it 2..Sim ep: 259 +Log Std policy inner: -1.9082427 +Iterations: 383 +Simulated test: ** -5.718183715022169 ** -5.254342783105094 ** -5.0683951183577305 ** -5.085637576290756 ** -4.84326701391954 ** -5.066008973305579 ** -4.850213729423121 ** -5.015544446512358 ** -5.058990795812569 ** + Policy it 3..Sim ep: 242 +Log Std policy inner: -1.9259969 + Policy it 4..Sim ep: 243 +Log Std policy inner: -1.948729 + Policy it 5..Sim ep: 263 +Log Std policy inner: -1.9843848 +Iterations: 386 +Simulated test: ** -5.60170544364606 ** -5.164631131004426 ** -5.020123284736765 ** -5.3185520555533 ** -5.160604337097611 ** -4.225650324174203 ** -4.856894877278246 ** -4.5506112452715755 ** -4.410443361120997 ** + Policy it 6..Sim ep: 246 +Log Std policy inner: -1.983098 + Policy it 7..Sim ep: 248 +Log Std policy inner: -1.9916747 + Policy it 8..Sim ep: 249 +Log Std policy inner: -1.997255 +Iterations: 389 +Simulated test: ** -5.561274182978086 ** -5.543509171389742 ** -4.952332763660234 ** -4.697497327773017 ** -4.9913703377335334 ** -4.497550834626891 ** -4.827987883308669 ** -4.039039079120848 ** -4.4172861578234 ** + Policy it 9..Sim ep: 248 +Log Std policy inner: -1.992902 + Policy it 10..Sim ep: 248 +Log Std policy inner: -1.9869597 + Policy it 11..Sim ep: 258 +Log Std policy inner: -1.9885603 +Iterations: 392 +Simulated test: ** -6.371597522413358 ** -5.0812568877334705 ** -5.08944809151697 ** -4.864748721361393 ** -4.342941975856084 ** -5.179185133402934 ** -4.699580243655946 ** -4.163761088307365 ** -4.746809272859537 ** + Policy it 12..Sim ep: 245 +Log Std policy inner: -2.0147305 + Policy it 13..Sim ep: 254 +Log Std policy inner: -2.033341 + Policy it 14..Sim ep: 250 +Log Std policy inner: -2.0478864 +Iterations: 395 +Simulated test: ** -5.6673416269029255 ** -5.090523396355565 ** -4.688626385611715 ** -4.427618433665484 ** -4.557674385576974 ** -5.278761352300062 ** -5.140607300990959 ** -4.291751820917707 ** -4.886857462825429 ** + Policy it 15..Sim ep: 262 +Log Std policy inner: -2.048203 + Policy it 16..Sim ep: 262 +Log Std policy inner: -2.0506248 + Policy it 17..Sim ep: 248 +Log Std policy inner: -2.054631 +Iterations: 398 +Simulated test: ** -5.438536385409534 ** -4.947882785657421 ** -4.499870726520312 ** -4.881453552695457 ** -4.342739057574509 ** -4.596720599262044 ** -5.133284298470244 ** -4.781146785292076 ** -4.440095089538954 ** + Policy it 18..Sim ep: 257 +Log Std policy inner: -2.0562358 + Policy it 19..Sim ep: 256 +Log Std policy inner: -2.0680192 + Policy it 20..Sim ep: 264 +Log Std policy inner: -2.0900717 +Iterations: 401 +Simulated test: ** -5.469702604252961 ** -5.297280491468264 ** -4.195564593723975 ** -4.040232239518664 ** -4.107419026906136 ** -4.628664613853761 ** -4.237199623855413 ** -4.2529985489521644 ** -4.768516876371577 ** + Policy it 21..Sim ep: 266 +Log Std policy inner: -2.092719 + Policy it 22..Sim ep: 266 +Log Std policy inner: -2.09022 + Policy it 23..Sim ep: 255 +Log Std policy inner: -2.1008115 +Iterations: 404 +Simulated test: ** -5.128587757230271 ** -4.718237626951886 ** -4.368105447883718 ** -4.912903569983318 ** -4.6768267537653445 ** -4.256038590925746 ** -4.4970197102008385 ** -3.9388030780942063 ** -4.357333443951793 ** + Policy it 24..Sim ep: 251 +Log Std policy inner: -2.1169033 + Policy it 25..Sim ep: 258 +Log Std policy inner: -2.123653 + Policy it 26..Sim ep: 274 +Log Std policy inner: -2.1334991 +Iterations: 407 +Simulated test: ** -5.304483771054074 ** -5.418415129364584 ** -4.537420240510255 ** -4.133236787987407 ** -4.485752472779714 ** -4.34975463722425 ** -4.40327157152613 ** -3.914132472618949 ** -3.659904984676978 ** + Policy it 27..Sim ep: 274 +Log Std policy inner: -2.1439269 + Policy it 28..Sim ep: 245 +Log Std policy inner: -2.1490576 + Policy it 29..Sim ep: 268 +Log Std policy inner: -2.1539435 +Iterations: 410 +Simulated test: ** -4.972719817496836 ** -4.250492272621487 ** -4.047979582854896 ** -4.227067542573204 ** -4.699901302815415 ** -4.7433749990956855 ** -3.5966001183213665 ** -4.386570990855689 ** -4.371368859785726 ** + Policy it 30..Sim ep: 264 +Log Std policy inner: -2.189729 + Policy it 31..Sim ep: 259 +Log Std policy inner: -2.1972642 + Policy it 32..Sim ep: 271 +Log Std policy inner: -2.2189531 +Iterations: 413 +Simulated test: ** -5.311906214281916 ** -4.664255052248482 ** -4.158499688562006 ** -3.945029716966674 ** -4.278914213514072 ** -3.6824502215813846 ** -4.471254342263564 ** -4.319594509971794 ** -4.036227583428845 ** + Policy it 33..Sim ep: 254 +Log Std policy inner: -2.2282693 + Policy it 34..Sim ep: 262 +Log Std policy inner: -2.2182095 + Policy it 35..Sim ep: 259 +Log Std policy inner: -2.2178228 +Iterations: 416 +Simulated test: ** -5.422300926547614 ** -4.817795365208294 ** -4.283393646159675 ** -4.133027245961712 ** -4.3739759011240675 ** -3.767156385027338 ** -3.8413291370437945 ** -4.18046609162644 ** -4.292720819510286 ** +break +============================ 12 ============================ +step 1 state [0.92833333 0.9575 0.79316667 0.23683333] a [-0.00563247 0.00254853 -0.06070983 -0.08333333] r -0.5872559438817724 +step 2 state [0.845 0.948 0.8515 0.1535] a [-0.08333333 -0.0093609 0.05846448 -0.08333333] r -0.93569892811174 +step 3 state [0.92833333 0.96983333 0.76816667 0.16216667] a [ 0.08333333 0.02187726 -0.08333333 0.00872206] r -0.5856231137070014 +step 4 state [0.95466667 1. 0.733 0.10516667] a [ 0.02633742 0.07447072 -0.03513967 -0.05689329] r -0.6594630058873234 +step 5 state [0.89416667 1. 0.64966667 0.1885 ] a [-0.06039964 0.08333333 -0.08333333 0.08333333] r -0.399206782736788 +step 6 state [0.91716667 1. 0.66466667 0.19816667] a [0.02310511 0.05292439 0.01509858 0.00974523] r -0.35327051173063895 +step 7 state [0.87016667 1. 0.68383333 0.11483333] a [-0.04685547 0.08333333 0.01926808 -0.08333333] r -0.524354575062854 +step 8 state [0.78683333 1. 0.76716667 0.05183333] a [-0.08333333 0.08333333 0.08333333 -0.06291927] r -0.8811409597040818 +step 9 state [0.85133333 1. 0.8505 0.02133333] a [ 0.06451182 0.08333333 0.08333333 -0.03037987] r -0.9412736245594354 +step 10 state [0.80966667 1. 0.77333333 0. ] a [-0.04156027 0.05522199 -0.07704539 -0.08333333] r -0.9459288797327177 +step 1 state [0.75483333 0.663 0.4375 0.227 ] a [-0.08333333 0.0770627 -0.03094244 0.08333333] r -0.3065724331024866 +step 2 state [0.71516667 0.72383333 0.52083333 0.31033333] a [-0.03962393 0.06088663 0.08333333 0.08333333] r -0.14790354065767364 +step 3 state [0.63483333 0.80716667 0.578 0.349 ] a [-0.08026228 0.08333333 0.05718987 0.03872777] r -0.11432441830306139 +step 4 state [0.58166667 0.8905 0.66133333 0.43233333] a [-0.05312927 0.08333333 0.08333333 0.08333333] r -0.13491931335787855 +step 5 state [0.49833333 0.97383333 0.58466667 0.49 ] a [-0.08333333 0.08333333 -0.07655758 0.05772259] r 0.0024389649963045823 +Saved current buffer default +Ep:12 Rew:-0.01 -- Step:225 +Train set: 180 Valid set: 45 +Log Std policy: [-2.6108477 -2.800721 -1.1367432 -2.3229797] -2.2178228 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:61 -- Old Val loss:0.011175 New Val loss:0.016131 -- New Train loss:0.001244 +Model:1, iter:68 -- Old Val loss:0.013857 New Val loss:0.019771 -- New Train loss:0.000961 +Model:2, iter:43 -- Old Val loss:0.005254 New Val loss:0.017327 -- New Train loss:0.001943 +Model:3, iter:70 -- Old Val loss:0.006733 New Val loss:0.021571 -- New Train loss:0.001019 +Model:4, iter:42 -- Old Val loss:0.009946 New Val loss:0.027417 -- New Train loss:0.001908 +Model:5, iter:45 -- Old Val loss:0.009862 New Val loss:0.020543 -- New Train loss:0.001487 +Model:6, iter:51 -- Old Val loss:0.008169 New Val loss:0.012407 -- New Train loss:0.001352 +Model:7, iter:66 -- Old Val loss:0.005115 New Val loss:0.023171 -- New Train loss:0.001066 +Model:8, iter:48 -- Old Val loss:0.010436 New Val loss:0.018952 -- New Train loss:0.001465 + Policy it 0..Sim ep: 255 +Log Std policy inner: -2.213612 + Policy it 1..Sim ep: 252 +Log Std policy inner: -2.2194147 + Policy it 2..Sim ep: 250 +Log Std policy inner: -2.2160075 +Iterations: 419 +Simulated test: ** -3.6438334113475865 ** -3.9121729277819397 ** -4.163248230307363 ** -3.5067567816883094 ** -3.745637294355547 ** -4.517785567448591 ** -4.000642270582321 ** -4.59419737810269 ** -3.812637417039077 ** + Policy it 3..Sim ep: 256 +Log Std policy inner: -2.2200737 + Policy it 4..Sim ep: 249 +Log Std policy inner: -2.2392282 + Policy it 5..Sim ep: 246 +Log Std policy inner: -2.242082 +Iterations: 422 +Simulated test: ** -4.0371869644708935 ** -3.7666816409514285 ** -3.8951795677491465 ** -4.8586786858737465 ** -4.407423175624571 ** -4.039669733453775 ** -3.7503060459252446 ** -4.516402689559618 ** -4.217775999331497 ** + Policy it 6..Sim ep: 262 +Log Std policy inner: -2.25545 + Policy it 7..Sim ep: 254 +Log Std policy inner: -2.2396858 + Policy it 8..Sim ep: 254 +Log Std policy inner: -2.2562091 +Iterations: 425 +Simulated test: ** -4.062539040186093 ** -3.4936241615458856 ** -4.788889337606961 ** -3.763321932880208 ** -4.447628625461366 ** -3.6861599717638454 ** -3.9274735899036752 ** -3.5038364543014904 ** -4.23532448217622 ** + Policy it 9..Sim ep: 251 +Log Std policy inner: -2.2545986 + Policy it 10..Sim ep: 257 +Log Std policy inner: -2.263117 + Policy it 11..Sim ep: 259 +Log Std policy inner: -2.2653713 +Iterations: 428 +Simulated test: ** -4.123914964079159 ** -4.236529636137566 ** -3.7492185693338977 ** -3.6854383769340346 ** -4.344393762623659 ** -4.963410056879511 ** -3.7292600047827 ** -3.9951870175404474 ** -4.914943377450109 ** + Policy it 12..Sim ep: 257 +Log Std policy inner: -2.257984 + Policy it 13..Sim ep: 260 +Log Std policy inner: -2.2586718 + Policy it 14..Sim ep: 262 +Log Std policy inner: -2.269639 +Iterations: 431 +Simulated test: ** -3.4944708281569183 ** -4.373080707630143 ** -3.67168883656268 ** -4.116918420111761 ** -3.6912588003254494 ** -4.126153245560126 ** -3.829740879997844 ** -4.605185127037112 ** -3.6458408655854875 ** + Policy it 15..Sim ep: 251 +Log Std policy inner: -2.279216 + Policy it 16..Sim ep: 256 +Log Std policy inner: -2.276334 + Policy it 17..Sim ep: 261 +Log Std policy inner: -2.299789 +Iterations: 434 +Simulated test: ** -3.855813453537412 ** -4.133621239713393 ** -4.495736045492813 ** -4.0808854336489455 ** -3.7706041617483423 ** -3.318246359459299 ** -3.7823841968632768 ** -3.680995405644644 ** -3.8486486661597157 ** + Policy it 18..Sim ep: 256 +Log Std policy inner: -2.310961 + Policy it 19..Sim ep: 257 +Log Std policy inner: -2.3207083 + Policy it 20..Sim ep: 254 +Log Std policy inner: -2.3383749 +Iterations: 437 +Simulated test: ** -3.443089796137065 ** -3.750838260507444 ** -3.6019720463082194 ** -3.9448445129487664 ** -3.963688613855047 ** -3.5474403724518195 ** -3.6594584724027666 ** -4.397184645449743 ** -4.051662736868312 ** + Policy it 21..Sim ep: 267 +Log Std policy inner: -2.3580394 + Policy it 22..Sim ep: 262 +Log Std policy inner: -2.3634794 + Policy it 23..Sim ep: 257 +Log Std policy inner: -2.362862 +Iterations: 440 +Simulated test: ** -3.3683316720311995 ** -3.3401652528718113 ** -4.111402468982123 ** -4.229508443290833 ** -4.611523871903919 ** -3.35341975941672 ** -3.8975539215188473 ** -3.8016954489267665 ** -3.9759876890224404 ** + Policy it 24..Sim ep: 265 +Log Std policy inner: -2.3785098 + Policy it 25..Sim ep: 247 +Log Std policy inner: -2.3790283 + Policy it 26..Sim ep: 261 +Log Std policy inner: -2.3916926 +Iterations: 443 +Simulated test: ** -3.7353753707371653 ** -3.041071001761011 ** -4.00983146149636 ** -3.7678710048086943 ** -2.798418278991012 ** -3.5240994712582325 ** -3.893985438523814 ** -3.9797604746581055 ** -3.350092282590922 ** + Policy it 27..Sim ep: 263 +Log Std policy inner: -2.4001074 + Policy it 28..Sim ep: 257 +Log Std policy inner: -2.4069858 + Policy it 29..Sim ep: 265 +Log Std policy inner: -2.4156415 +Iterations: 446 +Simulated test: ** -3.150443958719261 ** -3.6878705937962515 ** -3.6852362195122987 ** -3.6998555886605753 ** -3.9344725412735717 ** -3.6391539225314045 ** -3.5442362088989467 ** -3.6415534083195964 ** -3.196138935931085 ** + Policy it 30..Sim ep: 258 +Log Std policy inner: -2.423258 + Policy it 31..Sim ep: 267 +Log Std policy inner: -2.4478393 + Policy it 32..Sim ep: 259 +Log Std policy inner: -2.4496045 +Iterations: 449 +Simulated test: ** -3.9226918198703786 ** -3.093235887257615 ** -4.186055034287274 ** -3.686276858301135 ** -3.771425253287889 ** -3.417463651387952 ** -4.033772168834694 ** -3.400457639994565 ** -3.4039202606794423 ** + Policy it 33..Sim ep: 263 +Log Std policy inner: -2.45261 + Policy it 34..Sim ep: 264 +Log Std policy inner: -2.4656332 + Policy it 35..Sim ep: 265 +Log Std policy inner: -2.4808033 +Iterations: 452 +Simulated test: ** -3.4849572526686825 ** -3.1851877995143876 ** -3.3203382733976468 ** -3.667385566670855 ** -3.3812336720176974 ** -3.201201836955734 ** -3.597743657710962 ** -3.463193461813207 ** -3.741955748540349 ** + Policy it 36..Sim ep: 261 +Log Std policy inner: -2.4821045 + Policy it 37..Sim ep: 279 +Log Std policy inner: -2.4867525 + Policy it 38..Sim ep: 264 +Log Std policy inner: -2.489933 +Iterations: 455 +Simulated test: ** -3.570570999985648 ** -3.495476631784113 ** -3.042203600470675 ** -3.6669336073100567 ** -3.2496194478739926 ** -3.612904631921556 ** -3.134457896204549 ** -4.163897986500524 ** -3.729168866048567 ** + Policy it 39..Sim ep: 273 +Log Std policy inner: -2.4934878 + Policy it 40..Sim ep: 261 +Log Std policy inner: -2.4999938 + Policy it 41..Sim ep: 276 +Log Std policy inner: -2.5194807 +Iterations: 458 +Simulated test: ** -3.577898368902388 ** -3.583112817504443 ** -4.0656693143583835 ** -3.6322194921039044 ** -3.974237142664788 ** -3.2064483005326476 ** -3.6107521734636974 ** -3.1992079606176413 ** -2.786228362452239 ** + Policy it 42..Sim ep: 256 +Log Std policy inner: -2.5151153 + Policy it 43..Sim ep: 257 +Log Std policy inner: -2.5330908 + Policy it 44..Sim ep: 265 +Log Std policy inner: -2.5347846 +Iterations: 461 +Simulated test: ** -3.6308926033170428 ** -3.3611981333000585 ** -3.2895748114283196 ** -3.8446151557285337 ** -3.4617038912558926 ** -3.5667140400694914 ** -3.501567017850466 ** -3.6126184655714315 ** -3.9330386070348324 ** + Policy it 45..Sim ep: 275 +Log Std policy inner: -2.5287437 + Policy it 46..Sim ep: 273 +Log Std policy inner: -2.5485864 + Policy it 47..Sim ep: 262 +Log Std policy inner: -2.5557716 +Iterations: 464 +Simulated test: ** -3.503360738256015 ** -3.46947609779425 ** -3.7957085039140654 ** -3.772559857630404 ** -3.789750758477894 ** -3.8313934312132187 ** -4.0599185261217645 ** -3.6069768048192783 ** -3.4546776528231566 ** + Policy it 48..Sim ep: 262 +Log Std policy inner: -2.5638773 + Policy it 49..Sim ep: 274 +Log Std policy inner: -2.5621924 +============================ 13 ============================ +step 1 state [0.5165 0.13483333 0.56033333 0.63416667] a [-0.0816634 0.08333333 0.08333333 0.07974407] r -0.2816718361078817 +step 2 state [0.44116667 0.21166667 0.64366667 0.7175 ] a [-0.07527875 0.07688812 0.08333333 0.08333333] r -0.628608830633007 +step 3 state [0.35833333 0.283 0.71166667 0.79516667] a [-0.08275715 0.071361 0.06815823 0.07778848] r -0.9604871847678422 +step 4 state [0.28083333 0.35733333 0.74666667 0.87583333] a [-0.07743168 0.07433631 0.03508573 0.08070745] r -0.9889943096073104 +step 5 state [0.1975 0.44066667 0.68666667 0.95866667] a [-0.08333333 0.08333333 -0.05996782 0.0828865 ] r -0.9869016218285938 +step 6 state [0.12416667 0.524 0.60333333 1. ] a [-0.07318082 0.08333333 -0.08333333 0.06671912] r -0.9441659562433332 +step 7 state [0.04083333 0.60733333 0.57833333 0.91666667] a [-0.08333333 0.08333333 -0.02483771 -0.08333333] r -0.5346733778948034 +step 8 state [0. 0.68766667 0.495 0.847 ] a [-0.08333333 0.08044712 -0.08333333 -0.06959659] r -0.19158210372359785 +step 9 state [0. 0.771 0.45283333 0.82083333] a [-0.08333333 0.08333333 -0.042134 -0.02614494] r -0.13056603265013844 +step 10 state [0. 0.8495 0.46533333 0.841 ] a [-0.07577579 0.07861612 0.01258039 0.02029891] r -0.13306247998930465 +step 1 state [0.53483333 0.92516667 0.15533333 0.4625 ] a [-0.07499632 0.06906182 0.08222613 0.06312318] r -0.9701303232343507 +step 2 state [0.4515 1. 0.23866667 0.54583333] a [-0.08333333 0.08167367 0.08333333 0.08333333] r -0.8017821715829524 +step 3 state [0.37166667 1. 0.31 0.61416667] a [-0.07973625 0.08333333 0.07135879 0.06848479] r -0.35617247901699245 +step 4 state [0.291 1. 0.39333333 0.6975 ] a [-0.08063842 0.08333333 0.08333333 0.08333333] r -0.1419276595754987 +step 5 state [0.212 1. 0.47666667 0.78083333] a [-0.07896869 0.08333333 0.08333333 0.08333333] r -0.09811162306013455 +Saved current buffer default +Ep:13 Rew:-1.56 -- Step:240 +Train set: 192 Valid set: 48 +Log Std policy: [-2.9927533 -3.126045 -1.7507683 -2.3792033] -2.5621924 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:99 -- Old Val loss:0.004141 New Val loss:0.008184 -- New Train loss:0.000524 +Model:1, iter:63 -- Old Val loss:0.004186 New Val loss:0.010282 -- New Train loss:0.000962 +Model:2, iter:68 -- Old Val loss:0.006635 New Val loss:0.007721 -- New Train loss:0.001121 +Model:3, iter:66 -- Old Val loss:0.005667 New Val loss:0.008030 -- New Train loss:0.001020 +Model:4, iter:62 -- Old Val loss:0.011574 New Val loss:0.011715 -- New Train loss:0.001251 +Model:5, iter:71 -- Old Val loss:0.005811 New Val loss:0.009009 -- New Train loss:0.001069 +Model:6, iter:176 -- Old Val loss:0.006780 New Val loss:0.009683 -- New Train loss:0.000182 +Model:7, iter:61 -- Old Val loss:0.008761 New Val loss:0.006969 -- New Train loss:0.001124 +Model:8, iter:71 -- Old Val loss:0.006773 New Val loss:0.008090 -- New Train loss:0.000962 + Policy it 0..Sim ep: 284 +Log Std policy inner: -2.5718122 + Policy it 1..Sim ep: 311 +Log Std policy inner: -2.566557 + Policy it 2..Sim ep: 298 +Log Std policy inner: -2.5674472 +Iterations: 469 +Simulated test: ** -3.9469883162464248 ** -4.52640190266422 ** -3.8610180276085155 ** -3.5774177165556464 ** -3.0901905390998583 ** -3.479818711684784 ** -2.856316750214028 ** -3.0919022284098903 ** -3.5491807903489097 ** + Policy it 3..Sim ep: 297 +Log Std policy inner: -2.5690513 + Policy it 4..Sim ep: 281 +Log Std policy inner: -2.5651004 + Policy it 5..Sim ep: 315 +Log Std policy inner: -2.5689132 +Iterations: 472 +Simulated test: ** -3.7839585672157408 ** -3.779170881784521 ** -3.4896308982453776 ** -3.194062998990994 ** -3.8518375428090805 ** -3.25793729535304 ** -3.158667240632931 ** -3.406143161342479 ** -3.9915855854563413 ** + Policy it 6..Sim ep: 299 +Log Std policy inner: -2.5705795 + Policy it 7..Sim ep: 307 +Log Std policy inner: -2.5663073 + Policy it 8..Sim ep: 291 +Log Std policy inner: -2.57336 +Iterations: 475 +Simulated test: ** -3.21664796555473 ** -4.384362437635428 ** -3.119437176962092 ** -4.087338625221164 ** -3.756697358895326 ** -2.9237698127463227 ** -3.1082140988553872 ** -3.6901629215665164 ** -3.275739953531884 ** + Policy it 9..Sim ep: 297 +Log Std policy inner: -2.5822432 + Policy it 10..Sim ep: 302 +Log Std policy inner: -2.6007824 + Policy it 11..Sim ep: 304 +Log Std policy inner: -2.602827 +Iterations: 478 +Simulated test: ** -4.088489988120273 ** -4.348135365012568 ** -4.222346558257705 ** -3.5365186253096907 ** -3.0674965039500965 ** -2.517180041677784 ** -3.2323128292453474 ** -3.3253694869577886 ** -3.0206762015935964 ** + Policy it 12..Sim ep: 297 +Log Std policy inner: -2.603095 + Policy it 13..Sim ep: 287 +Log Std policy inner: -2.6011162 + Policy it 14..Sim ep: 291 +Log Std policy inner: -2.614026 +Iterations: 481 +Simulated test: ** -3.5416508645634166 ** -3.8973219984944443 ** -4.340629744401667 ** -3.762017271652585 ** -3.01139867328573 ** -3.4968206730182283 ** -3.6107116583775496 ** -3.297147912897635 ** -2.88041867580032 ** + Policy it 15..Sim ep: 294 +Log Std policy inner: -2.6224132 + Policy it 16..Sim ep: 298 +Log Std policy inner: -2.629826 + Policy it 17..Sim ep: 302 +Log Std policy inner: -2.641839 +Iterations: 484 +Simulated test: ** -3.925097329186974 ** -3.5164292218443007 ** -3.4255877509177663 ** -3.3894781926705035 ** -3.2191955109848642 ** -3.465215698049869 ** -4.1327249353789375 ** -3.6864491870976055 ** -3.883102452182211 ** + Policy it 18..Sim ep: 290 +Log Std policy inner: -2.638685 + Policy it 19..Sim ep: 300 +Log Std policy inner: -2.6411264 + Policy it 20..Sim ep: 310 +Log Std policy inner: -2.6419678 +Iterations: 487 +Simulated test: ** -3.4501034445432013 ** -3.2514320461661557 ** -3.5014797476510284 ** -3.4749889812804757 ** -3.446705712322146 ** -3.5827979037119078 ** -3.7770957523223476 ** -3.0039913383265957 ** -3.410199819501431 ** + Policy it 21..Sim ep: 310 +Log Std policy inner: -2.6462395 + Policy it 22..Sim ep: 286 +Log Std policy inner: -2.6493335 + Policy it 23..Sim ep: 305 +Log Std policy inner: -2.6594627 +Iterations: 490 +Simulated test: ** -3.5062188224273267 ** -3.769365360199008 ** -3.516583447358571 ** -4.042180532147176 ** -3.6398613784369083 ** -2.762185446502408 ** -3.231928798640147 ** -3.270422034536023 ** -3.2314751834259368 ** + Policy it 24..Sim ep: 309 +Log Std policy inner: -2.659368 + Policy it 25..Sim ep: 302 +Log Std policy inner: -2.6593308 + Policy it 26..Sim ep: 284 +Log Std policy inner: -2.6578846 +Iterations: 493 +Simulated test: ** -3.5606418255309107 ** -3.2331517496472224 ** -3.6727086128533117 ** -3.7314710776810536 ** -3.5650303634814917 ** -3.2553960695666317 ** -3.4928337350254877 ** -3.710519711629022 ** -3.662992935143411 ** + Policy it 27..Sim ep: 301 +Log Std policy inner: -2.658014 + Policy it 28..Sim ep: 300 +Log Std policy inner: -2.6836998 + Policy it 29..Sim ep: 304 +Log Std policy inner: -2.6955605 +Iterations: 496 +Simulated test: ** -3.3030405843025075 ** -3.017430926350644 ** -3.7152732229582033 ** -3.7389790277226713 ** -3.180388754219748 ** -3.145739043867361 ** -3.1387182493042203 ** -3.489724072095123 ** -3.2157841639677645 ** + Policy it 30..Sim ep: 298 +Log Std policy inner: -2.7158477 + Policy it 31..Sim ep: 298 +Log Std policy inner: -2.7090814 + Policy it 32..Sim ep: 291 +Log Std policy inner: -2.7201564 +Iterations: 499 +Simulated test: ** -3.551787750455551 ** -2.934775304426876 ** -3.0783923650672658 ** -2.750674830896314 ** -3.825673776846379 ** -3.3841845411848044 ** -2.8903250300488437 ** -3.5362460483622273 ** -2.9071507757925428 ** + Policy it 33..Sim ep: 304 +Log Std policy inner: -2.7242947 + Policy it 34..Sim ep: 296 +Log Std policy inner: -2.7359896 + Policy it 35..Sim ep: 291 +Log Std policy inner: -2.7367868 +Iterations: 502 +Simulated test: ** -3.0357439052604605 ** -3.0913601085555276 ** -3.1936973594897426 ** -3.391876415195584 ** -2.9878866311442107 ** -3.3878101678201347 ** -3.795594487360213 ** -3.1683627686521505 ** -3.6571158600196942 ** + Policy it 36..Sim ep: 302 +Log Std policy inner: -2.7396839 + Policy it 37..Sim ep: 298 +Log Std policy inner: -2.7532516 + Policy it 38..Sim ep: 309 +Log Std policy inner: -2.7538176 +Iterations: 505 +Simulated test: ** -3.1153125652324523 ** -3.1434923451347276 ** -3.1329865666187833 ** -3.9841459369892256 ** -3.3135366124031136 ** -3.6400355067336934 ** -3.783496419126168 ** -2.9116882357932625 ** -3.2528697243332862 ** + Policy it 39..Sim ep: 317 +Log Std policy inner: -2.7585783 + Policy it 40..Sim ep: 314 +Log Std policy inner: -2.7571971 + Policy it 41..Sim ep: 296 +Log Std policy inner: -2.7576997 +Iterations: 508 +Simulated test: ** -2.980783074112842 ** -3.6099222155916504 ** -3.289947093115188 ** -3.3269834808073937 ** -3.1714016925018225 ** -2.7379741709912198 ** -3.366587822311558 ** -3.417207530643791 ** -3.329598099932773 ** + Policy it 42..Sim ep: 287 +Log Std policy inner: -2.7618933 + Policy it 43..Sim ep: 307 +Log Std policy inner: -2.7712722 + Policy it 44..Sim ep: 318 +Log Std policy inner: -2.766304 +Iterations: 511 +Simulated test: ** -3.1190461300185417 ** -3.5313485608343034 ** -3.4854918602315594 ** -3.4622558008192574 ** -2.6983176882739643 ** -3.2096447153994814 ** -3.6231036390172084 ** -3.2306827670382336 ** -3.569195263008587 ** + Policy it 45..Sim ep: 282 +Log Std policy inner: -2.7776802 + Policy it 46..Sim ep: 302 +Log Std policy inner: -2.7885654 + Policy it 47..Sim ep: 292 +Log Std policy inner: -2.7937174 +Iterations: 514 +Simulated test: ** -2.9890192233832202 ** -3.0416966395417693 ** -3.532618879158981 ** -2.63706508491945 ** -3.043852892059367 ** -3.6149158249469475 ** -2.3637119553494266 ** -2.947764967395924 ** -3.665385161489539 ** + Policy it 48..Sim ep: 317 +Log Std policy inner: -2.815598 + Policy it 49..Sim ep: 316 +Log Std policy inner: -2.8112853 +============================ 14 ============================ +step 1 state [0.1505 0.16433333 0.66483333 0.6145 ] a [-0.05661877 0.08333333 -0.07730636 0.08333333] r -0.5993682825413795 +step 2 state [0.08133333 0.22616667 0.5815 0.67933333] a [-0.06909154 0.06197502 -0.08333333 0.0649991 ] r -0.2472251325218524 +step 3 state [0. 0.3095 0.52133333 0.76266667] a [-0.08333333 0.08333333 -0.06016628 0.08333333] r -0.03935971713014297 +step 1 state [0.374 0.51233333 0.67733333 0.09533333] a [-0.08333333 0.06843309 -0.08333333 0.07858847] r -0.9665927952226101 +step 2 state [0.32916667 0.57466667 0.595 0.17866667] a [-0.04472226 0.06235061 -0.08229515 0.08333333] r -0.7723105537862044 +step 3 state [0.28966667 0.658 0.51166667 0.262 ] a [-0.03943881 0.08333333 -0.08333333 0.08333333] r -0.3986355638975386 +step 4 state [0.3445 0.6745 0.473 0.17866667] a [ 0.05488839 0.01662285 -0.03862135 -0.08333333] r -0.7320445679825061 +step 5 state [0.3105 0.75783333 0.55 0.262 ] a [-0.03395409 0.08333333 0.07708961 0.08333333] r -0.41255119326738354 +step 6 state [0.22716667 0.84116667 0.46666667 0.34533333] a [-0.08333333 0.08333333 -0.08333333 0.08333333] r -0.2980359736717312 +step 7 state [0.157 0.75783333 0.55 0.42866667] a [-0.07010533 -0.08333333 0.08333333 0.08333333] r -0.0695882462574976 +step 8 state [0.10316667 0.84116667 0.46666667 0.512 ] a [-0.05371612 0.08333333 -0.08333333 0.08333333] r -0.03770822482013991 +step 1 state [0.65433333 0.784 0.31783333 0.9655 ] a [-0.08333333 0.08333333 0.08333333 -0.01432914] r -0.8557949237599327 +step 2 state [0.571 0.86733333 0.3995 0.91016667] a [-0.08333333 0.08333333 0.08171875 -0.05521492] r -0.5096581833750202 +step 3 state [0.48766667 0.95066667 0.48 0.89733333] a [-0.08333333 0.08333333 0.08052524 -0.01270584] r -0.35243766407895305 +step 4 state [0.40716667 1. 0.561 0.83866667] a [-0.08041949 0.08333333 0.08103899 -0.05857286] r -0.2176015389474084 +Saved current buffer default +Ep:14 Rew:0.93 -- Step:255 +Train set: 204 Valid set: 51 +Log Std policy: [-3.2432616 -3.395618 -2.0909147 -2.5153463] -2.8112853 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:100 -- Old Val loss:0.004227 New Val loss:0.010389 -- New Train loss:0.000445 +Model:1, iter:80 -- Old Val loss:0.003754 New Val loss:0.009178 -- New Train loss:0.000695 +Model:2, iter:68 -- Old Val loss:0.004986 New Val loss:0.012253 -- New Train loss:0.001243 +Model:3, iter:66 -- Old Val loss:0.004845 New Val loss:0.008970 -- New Train loss:0.000979 +Model:4, iter:99 -- Old Val loss:0.005157 New Val loss:0.012032 -- New Train loss:0.000455 +Model:5, iter:91 -- Old Val loss:0.006248 New Val loss:0.006707 -- New Train loss:0.000670 +Model:6, iter:44 -- Old Val loss:0.004770 New Val loss:0.009215 -- New Train loss:0.001193 +Model:7, iter:52 -- Old Val loss:0.004247 New Val loss:0.010749 -- New Train loss:0.001400 +Model:8, iter:74 -- Old Val loss:0.004922 New Val loss:0.012855 -- New Train loss:0.000849 + Policy it 0..Sim ep: 314 +Log Std policy inner: -2.8270013 + Policy it 1..Sim ep: 319 +Log Std policy inner: -2.8321123 + Policy it 2..Sim ep: 305 +Log Std policy inner: -2.843063 +Iterations: 519 +Simulated test: ** -2.6423576775347466 ** -3.0673059554077917 ** -3.0403992813429794 ** -3.683336229636916 ** -2.677361501259729 ** -2.6055560770991724 ** -2.286605847583851 ** -2.6224750345770733 ** -2.8342265713447703 ** + Policy it 3..Sim ep: 298 +Log Std policy inner: -2.8451753 + Policy it 4..Sim ep: 309 +Log Std policy inner: -2.8528357 + Policy it 5..Sim ep: 310 +Log Std policy inner: -2.8560822 +Iterations: 522 +Simulated test: ** -3.100537462607026 ** -3.748187848450616 ** -2.9405083770537748 ** -2.5854157609306276 ** -3.3745683143264613 ** -2.5264628712809643 ** -2.8062987797359527 ** -3.5143304921267555 ** -3.0267513636109653 ** + Policy it 6..Sim ep: 314 +Log Std policy inner: -2.8501585 + Policy it 7..Sim ep: 318 +Log Std policy inner: -2.8607383 + Policy it 8..Sim ep: 315 +Log Std policy inner: -2.8578086 +Iterations: 525 +Simulated test: ** -3.4274301270069554 ** -3.274160101602902 ** -2.915170269154478 ** -2.763402680023573 ** -2.7296292657998857 ** -3.253793972795829 ** -2.4794433856004616 ** -2.349631911338656 ** -2.4994710782775655 ** + Policy it 9..Sim ep: 319 +Log Std policy inner: -2.8587947 + Policy it 10..Sim ep: 307 +Log Std policy inner: -2.8673792 + Policy it 11..Sim ep: 315 +Log Std policy inner: -2.8687143 +Iterations: 528 +Simulated test: ** -3.195818093554117 ** -3.4553332094264624 ** -2.9015965092642 ** -2.5735561811411753 ** -2.6912269892002225 ** -2.49985286111827 ** -2.698898084242828 ** -2.4574661544826815 ** -2.8121140048321105 ** + Policy it 12..Sim ep: 295 +Log Std policy inner: -2.8801196 + Policy it 13..Sim ep: 318 +Log Std policy inner: -2.8840334 + Policy it 14..Sim ep: 318 +Log Std policy inner: -2.8844957 +Iterations: 531 +Simulated test: ** -3.304050663942471 ** -2.9266067897027823 ** -3.51973982560914 ** -2.875533469847869 ** -2.6063387075786886 ** -2.154790574251674 ** -3.003315442879684 ** -2.7373663015192142 ** -2.763297718563117 ** + Policy it 15..Sim ep: 308 +Log Std policy inner: -2.8852415 + Policy it 16..Sim ep: 316 +Log Std policy inner: -2.8888853 + Policy it 17..Sim ep: 316 +Log Std policy inner: -2.888096 +Iterations: 534 +Simulated test: ** -2.908583361953788 ** -2.591406906931661 ** -2.5903745192429053 ** -2.5003811236121694 ** -2.4691213247645645 ** -2.119548448701389 ** -2.3117408626804536 ** -2.6440697971597547 ** -2.535301995179616 ** + Policy it 18..Sim ep: 317 +Log Std policy inner: -2.9042203 + Policy it 19..Sim ep: 312 +Log Std policy inner: -2.9165053 + Policy it 20..Sim ep: 306 +Log Std policy inner: -2.921587 +Iterations: 537 +Simulated test: ** -2.5912473630346358 ** -2.856394036086276 ** -2.6551977546187118 ** -2.592151483290363 ** -2.8570366715734417 ** -2.4728485410986467 ** -2.8872685081977396 ** -2.671608108935179 ** -2.718104755510867 ** +break +============================ 15 ============================ +step 1 state [0.68166667 0.76383333 0.80533333 0.323 ] a [-0.08333333 0.08333333 -0.08333333 -0.03599527] r -0.6086725230807633 +step 2 state [0.60766667 0.81983333 0.77116667 0.301 ] a [-0.07392436 0.05604005 -0.03410036 -0.02195187] r -0.5866231518408114 +step 3 state [0.5455 0.90316667 0.68783333 0.3455 ] a [-0.06211446 0.08333333 -0.08333333 0.04466399] r -0.16270339135499257 +step 4 state [0.46216667 0.9865 0.64566667 0.42883333] a [-0.08333333 0.08333333 -0.04215937 0.08333333] r -0.07071947261971778 +step 5 state [0.41133333 1. 0.595 0.47016667] a [-0.05073143 0.08333333 -0.05054699 0.04140529] r -0.06390621060009927 +step 6 state [0.35366667 1. 0.526 0.5035 ] a [-0.05762829 0.07888676 -0.06892031 0.03343596] r -0.01608457882278358 +step 1 state [0.20266667 0.114 0.69016667 0.7665 ] a [-0.07728552 0.08333333 -0.0685293 0.08333333] r -0.8594768763666077 +step 2 state [0.19083333 0.19733333 0.60783333 0.84983333] a [-0.01177871 0.08333333 -0.08224181 0.08333333] r -0.5255153662699308 +step 3 state [0.17433333 0.28066667 0.623 0.92283333] a [-0.0164885 0.08333333 0.01528829 0.07313797] r -0.8669355648768909 +step 4 state [0.091 0.35983333 0.65033333 1. ] a [-0.08333333 0.07925016 0.02748209 0.08333333] r -0.9446980945868251 +step 5 state [0.0855 0.44316667 0.62766667 1. ] a [-0.00537647 0.08333333 -0.02254465 0.08333333] r -0.9001604780084395 +step 6 state [0.00216667 0.468 0.54433333 0.94383333] a [-0.08333333 0.02484144 -0.08333333 -0.05613189] r -0.37631031011397964 +step 7 state [0. 0.55133333 0.461 1. ] a [-0.08333333 0.08333333 -0.08333333 0.08333333] r -0.37617809728356844 +step 8 state [0. 0.63233333 0.54433333 0.99283333] a [-0.08333333 0.08107322 0.08333333 -0.00708335] r -0.4081915048853494 +step 9 state [0. 0.71566667 0.461 0.9095 ] a [-0.01365362 0.08333333 -0.08333333 -0.08333333] r -0.15931146943849728 +Saved current buffer default +Ep:15 Rew:2.98 -- Step:270 +Train set: 216 Valid set: 54 +Log Std policy: [-3.3677573 -3.4691844 -2.289734 -2.5596728] -2.921587 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:62 -- Old Val loss:0.003418 New Val loss:0.009712 -- New Train loss:0.000982 +Model:1, iter:69 -- Old Val loss:0.004874 New Val loss:0.009033 -- New Train loss:0.000834 +Model:2, iter:66 -- Old Val loss:0.007454 New Val loss:0.009622 -- New Train loss:0.000984 +Model:3, iter:118 -- Old Val loss:0.005163 New Val loss:0.009311 -- New Train loss:0.000436 +Model:4, iter:85 -- Old Val loss:0.005648 New Val loss:0.009081 -- New Train loss:0.000515 +Model:5, iter:110 -- Old Val loss:0.003694 New Val loss:0.010727 -- New Train loss:0.000413 +Model:6, iter:58 -- Old Val loss:0.006547 New Val loss:0.013592 -- New Train loss:0.001056 +Model:7, iter:123 -- Old Val loss:0.005872 New Val loss:0.006277 -- New Train loss:0.000336 +Model:8, iter:52 -- Old Val loss:0.005335 New Val loss:0.010688 -- New Train loss:0.001349 + Policy it 0..Sim ep: 335 +Log Std policy inner: -2.9301488 + Policy it 1..Sim ep: 308 +Log Std policy inner: -2.927011 + Policy it 2..Sim ep: 333 +Log Std policy inner: -2.9302807 +Iterations: 540 +Simulated test: ** -2.3479268585983664 ** -2.662405961843906 ** -2.3037762925354763 ** -2.666647812263109 ** -2.822783973517362 ** -2.8422565389424563 ** -3.1270522353018166 ** -2.931773262656061 ** -2.6483247573906556 ** + Policy it 3..Sim ep: 331 +Log Std policy inner: -2.9334373 + Policy it 4..Sim ep: 320 +Log Std policy inner: -2.9284658 + Policy it 5..Sim ep: 318 +Log Std policy inner: -2.9321365 +Iterations: 543 +Simulated test: ** -2.71534201500006 ** -2.4945728179672733 ** -2.7268625424183845 ** -2.5214203820587135 ** -2.503414565264247 ** -2.2434411272290165 ** -2.7130503584921826 ** -2.6889334781793877 ** -3.0718653386455843 ** + Policy it 6..Sim ep: 316 +Log Std policy inner: -2.929199 + Policy it 7..Sim ep: 309 +Log Std policy inner: -2.923627 + Policy it 8..Sim ep: 305 +Log Std policy inner: -2.9279552 +Iterations: 546 +Simulated test: ** -2.262878658562113 ** -3.151811176027404 ** -3.147050856301794 ** -2.554805030528223 ** -2.6013751397456506 ** -2.68535405794275 ** -2.64882214827172 ** -2.5174296020017937 ** -3.0470423154183663 ** + Policy it 9..Sim ep: 299 +Log Std policy inner: -2.9275284 + Policy it 10..Sim ep: 319 +Log Std policy inner: -2.931056 + Policy it 11..Sim ep: 298 +Log Std policy inner: -2.9322994 +Iterations: 549 +Simulated test: ** -3.0240636912570333 ** -2.531999208005 ** -2.9551476870151236 ** -2.8064407643507003 ** -2.552381036684965 ** -2.7952942232892384 ** -2.694933398049325 ** -2.812270139858592 ** -2.9778210772993043 ** + Policy it 12..Sim ep: 306 +Log Std policy inner: -2.9351935 + Policy it 13..Sim ep: 301 +Log Std policy inner: -2.9363217 + Policy it 14..Sim ep: 314 +Log Std policy inner: -2.9402764 +Iterations: 552 +Simulated test: ** -2.5637398890207987 ** -2.8271538140304617 ** -2.5956697616353632 ** -2.2974706526560476 ** -2.993646326326998 ** -2.7667495224950835 ** -2.990275803406257 ** -3.5807273735679335 ** -2.495398789057508 ** + Policy it 15..Sim ep: 311 +Log Std policy inner: -2.9320374 + Policy it 16..Sim ep: 315 +Log Std policy inner: -2.926778 + Policy it 17..Sim ep: 305 +Log Std policy inner: -2.9295616 +Iterations: 555 +Simulated test: ** -2.3168610700126737 ** -2.4284031966980546 ** -3.007397836438322 ** -2.5678020405757707 ** -2.9278381552780046 ** -2.768723042840138 ** -2.6514290554099715 ** -2.7560776914842426 ** -3.211676538311876 ** + Policy it 18..Sim ep: 320 +Log Std policy inner: -2.9248466 + Policy it 19..Sim ep: 323 +Log Std policy inner: -2.9282265 + Policy it 20..Sim ep: 311 +Log Std policy inner: -2.93063 +Iterations: 558 +Simulated test: ** -2.6563664086326026 ** -2.692919684306253 ** -2.8356524105559218 ** -2.61761969856685 ** -2.533296205867082 ** -1.6569362507993355 ** -2.8960091220762116 ** -2.6586527203267907 ** -2.4476355182746192 ** + Policy it 21..Sim ep: 303 +Log Std policy inner: -2.935323 + Policy it 22..Sim ep: 317 +Log Std policy inner: -2.942159 + Policy it 23..Sim ep: 322 +Log Std policy inner: -2.9440086 +Iterations: 561 +Simulated test: ** -2.347147091827355 ** -2.983630000439007 ** -2.6826174236601217 ** -2.69594254404481 ** -3.0916573896864428 ** -2.3226506289298414 ** -3.1770885889488274 ** -2.605382348589046 ** -2.714982801610604 ** + Policy it 24..Sim ep: 307 +Log Std policy inner: -2.9493825 + Policy it 25..Sim ep: 309 +Log Std policy inner: -2.9515254 + Policy it 26..Sim ep: 304 +Log Std policy inner: -2.9496017 +Iterations: 564 +Simulated test: ** -2.419545491543249 ** -2.3061580476537347 ** -2.6293917781248455 ** -3.437743433397263 ** -2.528018160685897 ** -2.803684543017298 ** -2.5305662218807266 ** -3.279451771117747 ** -2.8712282784003764 ** + Policy it 27..Sim ep: 322 +Log Std policy inner: -2.9557855 + Policy it 28..Sim ep: 325 +Log Std policy inner: -2.9543016 + Policy it 29..Sim ep: 308 +Log Std policy inner: -2.9601982 +Iterations: 567 +Simulated test: ** -3.186792955141864 ** -3.2274612258153503 ** -3.1644541203812695 ** -2.9406386247556657 ** -2.9489600025489926 ** -2.4359790161438286 ** -3.4322105090040713 ** -2.555631772925699 ** -2.497139156984631 ** + Policy it 30..Sim ep: 303 +Log Std policy inner: -2.9662015 + Policy it 31..Sim ep: 323 +Log Std policy inner: -2.958907 + Policy it 32..Sim ep: 330 +Log Std policy inner: -2.9506288 +Iterations: 570 +Simulated test: ** -2.607329964938108 ** -1.9610245884605684 ** -2.3863063440448604 ** -2.688834394146688 ** -2.196238176464103 ** -2.542773490638938 ** -3.199126183530316 ** -3.145117052269634 ** -2.7571502539701758 ** + Policy it 33..Sim ep: 303 +Log Std policy inner: -2.963599 + Policy it 34..Sim ep: 319 +Log Std policy inner: -2.9705663 + Policy it 35..Sim ep: 327 +Log Std policy inner: -2.9756057 +Iterations: 573 +Simulated test: ** -2.2486238548438995 ** -2.673347288053483 ** -2.719376058936468 ** -2.8205288805253805 ** -2.4964783710340273 ** -3.0821284205262782 ** -3.0968247464299203 ** -2.7209847956709563 ** -2.5441107157620717 ** + Policy it 36..Sim ep: 329 +Log Std policy inner: -2.9857311 + Policy it 37..Sim ep: 328 +Log Std policy inner: -2.992956 + Policy it 38..Sim ep: 311 +Log Std policy inner: -2.9989142 +Iterations: 576 +Simulated test: ** -2.1429163747838174 ** -2.354985688859597 ** -2.7298124686302616 ** -2.382216800774913 ** -2.5364108474727254 ** -2.4587048824457454 ** -2.3161876110499726 ** -2.74045013690833 ** -2.525272158998996 ** + Policy it 39..Sim ep: 319 +Log Std policy inner: -3.003856 + Policy it 40..Sim ep: 330 +Log Std policy inner: -2.9957309 + Policy it 41..Sim ep: 320 +Log Std policy inner: -2.9918156 +Iterations: 579 +Simulated test: ** -2.5625821034563705 ** -2.5376976901903983 ** -2.469919048214797 ** -2.5616707317886176 ** -2.7812486162921415 ** -2.8271008247928693 ** -2.61814093553694 ** -2.4993895665509624 ** -2.6451414756063603 ** +break +============================ 16 ============================ +step 1 state [0.429 0.9955 0.86433333 0.19066667] a [-0.08102556 0.07057086 -0.07234982 -0.08333333] r -0.9195801131499932 +step 2 state [0.35433333 1. 0.806 0.11866667] a [-0.07451038 0.08333333 -0.05831651 -0.07186034] r -0.9262181807614952 +step 3 state [0.28233333 1. 0.72266667 0.11516667] a [-0.0719455 0.08115052 -0.08333333 -0.00346004] r -0.9257029069307199 +step 4 state [0.211 1. 0.64483333 0.109 ] a [-0.07125413 0.08333333 -0.07773357 -0.00608039] r -0.9478151085695575 +step 5 state [0.131 1. 0.56383333 0.14416667] a [-0.07986807 0.08333333 -0.08083554 0.03530228] r -0.9498683377511469 +step 6 state [0.04766667 1. 0.59083333 0.22583333] a [-0.08333333 0.08333333 0.02700066 0.08167573] r -0.8645680683668818 +step 7 state [0. 1. 0.5525 0.30433333] a [-0.06927983 0.0647785 -0.03824756 0.07863898] r -0.4661262363604729 +step 8 state [0. 1. 0.55716667 0.37116667] a [-0.07677601 0.08333333 0.00482029 0.0669876 ] r -0.28860076521583855 +step 9 state [0. 1. 0.576 0.40116667] a [-0.0704538 0.08262376 0.01893394 0.03014683] r -0.20376173555318688 +step 10 state [0. 1. 0.55083333 0.41783333] a [-0.07300358 0.07802067 -0.02511688 0.01681829] r -0.15229622135915377 +step 1 state [0.74883333 0.20583333 0.8595 0.338 ] a [-0.06998157 0.08333333 0.04944951 0.08333333] r -0.9972253744810599 +step 2 state [0.669 0.26433333 0.9395 0.42133333] a [-0.07970437 0.05854883 0.08005777 0.08333333] r -0.9994877221963225 +step 3 state [0.58566667 0.26816667 0.96416667 0.50466667] a [-0.08333333 0.0039295 0.02478742 0.08333333] r -0.9998361768972936 +step 4 state [0.50233333 0.3415 1. 0.588 ] a [-0.08333333 0.07348599 0.08333333 0.08333333] r -0.9998999784622602 +step 5 state [0.58566667 0.42483333 1. 0.67133333] a [0.08333333 0.08333333 0.03377937 0.08333333] r -0.9999144566804605 +Saved current buffer default +Ep:16 Rew:-3.29 -- Step:285 +Train set: 228 Valid set: 57 +Log Std policy: [-3.3732684 -3.593575 -2.5078123 -2.492606 ] -2.9918156 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:61 -- Old Val loss:0.006243 New Val loss:0.015381 -- New Train loss:0.000802 +Model:1, iter:41 -- Old Val loss:0.005646 New Val loss:0.018456 -- New Train loss:0.001468 +Model:2, iter:44 -- Old Val loss:0.004096 New Val loss:0.010119 -- New Train loss:0.001277 +Model:3, iter:39 -- Old Val loss:0.003768 New Val loss:0.009507 -- New Train loss:0.001212 +Model:4, iter:52 -- Old Val loss:0.007285 New Val loss:0.008848 -- New Train loss:0.001020 +Model:5, iter:26 -- Old Val loss:0.005099 New Val loss:0.014299 -- New Train loss:0.002167 +Model:6, iter:36 -- Old Val loss:0.012628 New Val loss:0.011920 -- New Train loss:0.001542 +Model:7, iter:45 -- Old Val loss:0.003011 New Val loss:0.012786 -- New Train loss:0.001166 +Model:8, iter:60 -- Old Val loss:0.005189 New Val loss:0.013665 -- New Train loss:0.000933 + Policy it 0..Sim ep: 299 +Log Std policy inner: -2.9919188 + Policy it 1..Sim ep: 295 +Log Std policy inner: -3.0062368 + Policy it 2..Sim ep: 300 +Log Std policy inner: -3.0190344 +Iterations: 582 +Simulated test: ** -2.231375291896984 ** -2.6119714981596918 ** -2.6344228242454117 ** -3.033860462750308 ** -3.159921806354541 ** -2.331132266204804 ** -2.538931795040844 ** -2.4723952988954263 ** -2.5757210259418937 ** + Policy it 3..Sim ep: 308 +Log Std policy inner: -3.0165992 + Policy it 4..Sim ep: 303 +Log Std policy inner: -3.0230165 + Policy it 5..Sim ep: 318 +Log Std policy inner: -3.0384507 +Iterations: 585 +Simulated test: ** -2.6499327991803874 ** -2.678066457719542 ** -2.142626016967697 ** -3.036737040937878 ** -2.74348615040537 ** -2.277539870995097 ** -2.7039162466931157 ** -2.4102372086641846 ** -2.5471255710266996 ** + Policy it 6..Sim ep: 293 +Log Std policy inner: -3.0421643 + Policy it 7..Sim ep: 318 +Log Std policy inner: -3.0500643 + Policy it 8..Sim ep: 290 +Log Std policy inner: -3.054415 +Iterations: 588 +Simulated test: ** -2.5028556230463437 ** -2.4532161242875734 ** -2.789890250542667 ** -2.302227779019449 ** -2.5591675629490056 ** -3.156288485636469 ** -2.7654456470021977 ** -2.2306514314218657 ** -2.382239799965173 ** + Policy it 9..Sim ep: 303 +Log Std policy inner: -3.0578165 + Policy it 10..Sim ep: 305 +Log Std policy inner: -3.0592713 + Policy it 11..Sim ep: 295 +Log Std policy inner: -3.0608902 +Iterations: 591 +Simulated test: ** -2.2470838428317803 ** -2.5843195166974327 ** -2.6355358941631857 ** -2.4935876981652108 ** -2.580890544017602 ** -2.237963661509566 ** -2.2686790824495255 ** -2.5813305458053946 ** -2.4706340597523377 ** + Policy it 12..Sim ep: 295 +Log Std policy inner: -3.0679045 + Policy it 13..Sim ep: 318 +Log Std policy inner: -3.0830703 + Policy it 14..Sim ep: 290 +Log Std policy inner: -3.0927567 +Iterations: 594 +Simulated test: ** -2.4241617643786593 ** -2.517558321885299 ** -2.5235468049347403 ** -2.1522587666730395 ** -2.198485501538962 ** -2.453269896991551 ** -2.706752009917691 ** -2.243231746224337 ** -2.5473259765584952 ** + Policy it 15..Sim ep: 313 +Log Std policy inner: -3.104615 + Policy it 16..Sim ep: 290 +Log Std policy inner: -3.1093407 + Policy it 17..Sim ep: 315 +Log Std policy inner: -3.1197877 +Iterations: 597 +Simulated test: ** -2.2133975110833126 ** -2.945112403472012 ** -2.2179195324180183 ** -2.0203810686664654 ** -2.3857405096577713 ** -2.4253234230400995 ** -2.5805321520817235 ** -2.3366170689370485 ** -2.4363248183741235 ** + Policy it 18..Sim ep: 299 +Log Std policy inner: -3.122383 + Policy it 19..Sim ep: 307 +Log Std policy inner: -3.126021 + Policy it 20..Sim ep: 308 +Log Std policy inner: -3.1297474 +Iterations: 600 +Simulated test: ** -2.4648950506735128 ** -2.4539320997695904 ** -2.156627253098413 ** -2.4290226032387 ** -2.5466135790897533 ** -2.7188118174951525 ** -2.1222759110998597 ** -2.621624978331383 ** -2.7116246860544195 ** + Policy it 21..Sim ep: 315 +Log Std policy inner: -3.13171 + Policy it 22..Sim ep: 311 +Log Std policy inner: -3.133547 + Policy it 23..Sim ep: 321 +Log Std policy inner: -3.136992 +Iterations: 603 +Simulated test: ** -2.9515971516811987 ** -2.4070975740323775 ** -2.1080964583251625 ** -2.558024080391042 ** -2.083090943007264 ** -2.2219978326419367 ** -2.0887091750581748 ** -2.2668896853004115 ** -2.5868097024830057 ** + Policy it 24..Sim ep: 300 +Log Std policy inner: -3.1472788 + Policy it 25..Sim ep: 306 +Log Std policy inner: -3.1452959 + Policy it 26..Sim ep: 317 +Log Std policy inner: -3.1532702 +Iterations: 606 +Simulated test: ** -2.0781402732711287 ** -2.457953980928287 ** -2.3245551499724386 ** -2.279364730012603 ** -2.5174168327497317 ** -2.388930069717171 ** -2.641331579670805 ** -2.2534423252532725 ** -2.1457137129228796 ** + Policy it 27..Sim ep: 331 +Log Std policy inner: -3.1433072 + Policy it 28..Sim ep: 323 +Log Std policy inner: -3.1405673 + Policy it 29..Sim ep: 305 +Log Std policy inner: -3.1544504 +Iterations: 609 +Simulated test: ** -2.4963397841947153 ** -2.7004668832733296 ** -2.439323050859384 ** -2.7197882007504814 ** -2.760345186747145 ** -2.2331674122507685 ** -2.1747199005773292 ** -2.713809828774538 ** -2.419601626007352 ** +break +============================ 17 ============================ +step 1 state [0.42466667 0.923 0.67283333 0.4345 ] a [-0.02144686 0.08333333 0.0342488 0.07475173] r -0.049015548143060084 +step 1 state [0.007 0.89366667 0.891 0.3845 ] a [-0.08333333 0.08333333 -0.04356009 0.08333333] r -0.9068419665389965 +step 2 state [0. 0.825 0.80766667 0.46783333] a [-0.02980097 -0.06866656 -0.08333333 0.08333333] r -0.5816647903239474 +step 3 state [0. 0.90833333 0.72433333 0.50533333] a [-0.06325397 0.08333333 -0.08333333 0.03758769] r -0.155236621698438 +step 4 state [0. 0.96383333 0.649 0.55133333] a [-0.08333333 0.05551001 -0.07525029 0.04614612] r 0.025338279235419092 +step 1 state [0. 1. 0.50316667 0.10416667] a [-0.07922803 0.05587028 -0.08333333 0.04964156] r -0.9755010988318251 +step 2 state [0. 1. 0.5865 0.177 ] a [-0.08333333 0.08333333 0.08333333 0.07293935] r -0.8443480637157006 +step 3 state [0. 1. 0.5505 0.21716667] a [-0.07626819 0.08333333 -0.03591592 0.04023884] r -0.7329977659997517 +step 4 state [0. 1. 0.60933333 0.29933333] a [-0.08333333 0.08333333 0.05895792 0.08231709] r -0.43376701160362774 +step 5 state [0. 1. 0.526 0.38266667] a [-0.03275277 0.08333333 -0.08333333 0.08333333] r -0.23250963531354063 +step 6 state [0. 1. 0.5115 0.45066667] a [-0.03945522 0.08333333 -0.01446058 0.0681309 ] r -0.138865722295058 +step 7 state [0. 1. 0.46716667 0.47233333] a [-0.08333333 0.07872819 -0.04423507 0.0217633 ] r -0.10531053371679833 +step 8 state [0. 1. 0.47483333 0.55566667] a [-0.0461756 0.06221706 0.00778691 0.08333333] r -0.059389196866609595 +step 9 state [0. 1. 0.48766667 0.53683333] a [-0.05444478 0.07475596 0.01296194 -0.01876413] r -0.012615033840408962 +step 1 state [0.035 0.26666667 0.46366667 0.69933333] a [-0.07525926 0.03578395 0.08333333 -0.01997582] r 0.03075469048155477 +Saved current buffer default +Ep:17 Rew:1.16 -- Step:300 +Train set: 240 Valid set: 60 +Log Std policy: [-3.5037317 -3.8402 -2.7229948 -2.5508752] -3.1544504 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:73 -- Old Val loss:0.003817 New Val loss:0.008977 -- New Train loss:0.000595 +Model:1, iter:77 -- Old Val loss:0.005653 New Val loss:0.012516 -- New Train loss:0.000736 +Model:2, iter:84 -- Old Val loss:0.005265 New Val loss:0.007438 -- New Train loss:0.000620 +Model:3, iter:47 -- Old Val loss:0.004277 New Val loss:0.010425 -- New Train loss:0.001255 +Model:4, iter:80 -- Old Val loss:0.003623 New Val loss:0.012206 -- New Train loss:0.000635 +Model:5, iter:56 -- Old Val loss:0.008853 New Val loss:0.012496 -- New Train loss:0.001219 +Model:6, iter:58 -- Old Val loss:0.006463 New Val loss:0.007941 -- New Train loss:0.000900 +Model:7, iter:59 -- Old Val loss:0.003956 New Val loss:0.015146 -- New Train loss:0.001000 +Model:8, iter:60 -- Old Val loss:0.005602 New Val loss:0.008573 -- New Train loss:0.000794 + Policy it 0..Sim ep: 341 +Log Std policy inner: -3.1555607 + Policy it 1..Sim ep: 313 +Log Std policy inner: -3.1612709 + Policy it 2..Sim ep: 342 +Log Std policy inner: -3.170781 +Iterations: 612 +Simulated test: ** -2.5483536235801876 ** -2.647346866731532 ** -2.0150757147069087 ** -2.4108244003308936 ** -1.931765500844922 ** -2.175627852311882 ** -2.370094932205975 ** -2.0725382095237728 ** -2.347290336894803 ** + Policy it 3..Sim ep: 321 +Log Std policy inner: -3.16859 + Policy it 4..Sim ep: 323 +Log Std policy inner: -3.177348 + Policy it 5..Sim ep: 336 +Log Std policy inner: -3.1820874 +Iterations: 615 +Simulated test: ** -2.8116339251352476 ** -2.6127369472407738 ** -2.452849561162875 ** -2.918441315330565 ** -2.437569643575698 ** -2.3611900184396655 ** -2.065656886881916 ** -2.3208972553373313 ** -2.2777550233714283 ** + Policy it 6..Sim ep: 336 +Log Std policy inner: -3.182753 + Policy it 7..Sim ep: 335 +Log Std policy inner: -3.1829011 + Policy it 8..Sim ep: 336 +Log Std policy inner: -3.183434 +Iterations: 618 +Simulated test: ** -2.141326265616808 ** -2.9714898444293065 ** -2.950992524911417 ** -2.586592440721579 ** -2.6718333169094692 ** -2.322035712080542 ** -2.3709804980293847 ** -2.6221647873241456 ** -1.9272718122100922 ** + Policy it 9..Sim ep: 312 +Log Std policy inner: -3.1853528 + Policy it 10..Sim ep: 349 +Log Std policy inner: -3.1928163 + Policy it 11..Sim ep: 342 +Log Std policy inner: -3.1948385 +Iterations: 621 +Simulated test: ** -2.25266577741364 ** -2.2837738533318044 ** -2.2228462569834666 ** -2.2227845383575184 ** -2.2438116073701533 ** -2.5984140646224843 ** -2.3816208183206617 ** -2.5022652462276165 ** -2.255397750060074 ** + Policy it 12..Sim ep: 318 +Log Std policy inner: -3.202072 + Policy it 13..Sim ep: 344 +Log Std policy inner: -3.2158287 + Policy it 14..Sim ep: 334 +Log Std policy inner: -3.2178752 +Iterations: 624 +Simulated test: ** -2.4565256620780564 ** -2.8957847292663064 ** -2.272599575020722 ** -2.4751283576898278 ** -2.1670593082346024 ** -2.108645543977618 ** -2.4107247407350223 ** -2.18895981116686 ** -2.3106909939087927 ** + Policy it 15..Sim ep: 333 +Log Std policy inner: -3.2216954 + Policy it 16..Sim ep: 340 +Log Std policy inner: -3.2256312 + Policy it 17..Sim ep: 344 +Log Std policy inner: -3.2267046 +Iterations: 627 +Simulated test: ** -2.6071917517852854 ** -2.3772389114520047 ** -2.4282382612908258 ** -2.381402584729658 ** -2.5867968098539857 ** -2.390349549781531 ** -2.1934152148663997 ** -1.813280059916433 ** -2.5479844658175717 ** + Policy it 18..Sim ep: 335 +Log Std policy inner: -3.2294412 + Policy it 19..Sim ep: 342 +Log Std policy inner: -3.2296014 + Policy it 20..Sim ep: 336 +Log Std policy inner: -3.225849 +Iterations: 630 +Simulated test: ** -2.5946658300590935 ** -2.355302581920987 ** -2.7351280304789545 ** -2.5890434538980482 ** -2.1731527869502316 ** -2.7503504870738835 ** -2.3466825047228483 ** -2.844523450192064 ** -2.511887516307179 ** + Policy it 21..Sim ep: 333 +Log Std policy inner: -3.2280974 + Policy it 22..Sim ep: 334 +Log Std policy inner: -3.23129 + Policy it 23..Sim ep: 339 +Log Std policy inner: -3.2379668 +Iterations: 633 +Simulated test: ** -2.3565363277110736 ** -1.8871060013922396 ** -2.0467283084150405 ** -2.210365958151524 ** -2.3996191865438594 ** -2.0996450795710553 ** -2.3444219547277316 ** -2.231358344979817 ** -2.109437326840125 ** + Policy it 24..Sim ep: 350 +Log Std policy inner: -3.2397985 + Policy it 25..Sim ep: 319 +Log Std policy inner: -3.2475214 + Policy it 26..Sim ep: 343 +Log Std policy inner: -3.246835 +Iterations: 636 +Simulated test: ** -2.2254678743908882 ** -2.700490724091651 ** -2.752473087522667 ** -2.659059954690747 ** -2.4191440701490503 ** -2.365770447917748 ** -2.532480893114116 ** -2.1210170835419557 ** -2.290172014948912 ** +break +============================ 18 ============================ +step 1 state [0.55766667 0.1905 1. 1. ] a [0.0804158 0.08333333 0.07082377 0.08333333] r -1.0001111037343091 +step 2 state [0.59716667 0.12316667 1. 1. ] a [ 0.03964618 -0.06731485 0.08333333 0.01960939] r -1.0001995154449344 +step 3 state [0.59333333 0.2065 1. 1. ] a [-0.00377535 0.08333333 0.0473808 0.04988133] r -1.0000767214023993 +step 4 state [0.51 0.2645 1. 1. ] a [-0.08333333 0.05815839 0.08333333 0.07747955] r -1.0001651331130246 +step 5 state [0.43366667 0.26716667 1. 1. ] a [-0.07619216 0.00266881 0.08333333 0.03927481] r -1.0001160154960105 +step 6 state [0.35033333 0.3505 0.9895 1. ] a [-0.08333333 0.08333333 -0.01046662 0.08333333] r -1.0002142507300387 +step 7 state [0.394 0.43383333 0.90616667 1. ] a [ 0.04379698 0.08333333 -0.08333333 0.08333333] r -1.0001258390194134 +step 8 state [0.31066667 0.51716667 0.82283333 1. ] a [-0.08333333 0.08333333 -0.08333333 0.06920261] r -0.9985344448514509 +step 9 state [0.229 0.6005 0.7985 0.948 ] a [-0.08160828 0.08333333 -0.02429033 -0.05188968] r -0.9890529046304123 +step 10 state [0.14566667 0.68383333 0.71516667 0.86466667] a [-0.08333333 0.08333333 -0.08333333 -0.08333333] r -0.6476999810636195 +step 1 state [0.0235 0.9325 0.54116667 0.82933333] a [-0.05974906 0.08333333 0.07175133 -0.0776036 ] r -0.03450173663495559 +step 1 state [0.88716667 0.441 0.3555 0.26583333] a [-0.08333333 0.08333333 0.06984023 0.08333333] r -0.20259919600670695 +step 2 state [0.80383333 0.52433333 0.43516667 0.34633333] a [-0.08333333 0.08333333 0.07970018 0.08057373] r -0.060836199016026105 +step 3 state [0.7205 0.605 0.5165 0.42 ] a [-0.08333333 0.08072098 0.08139717 0.0738055 ] r 0.07081032629664463 +step 1 state [0.04716667 1. 0.11283333 1. ] a [-0.05094177 0.07671455 0.08011153 0.06514003] r -0.9966033896795913 +Saved current buffer default +Ep:18 Rew:-1.91 -- Step:315 +Train set: 252 Valid set: 63 +Log Std policy: [-3.6109114 -3.867273 -2.9173343 -2.5918205] -3.246835 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:66 -- Old Val loss:0.003820 New Val loss:0.010394 -- New Train loss:0.000806 +Model:1, iter:65 -- Old Val loss:0.002868 New Val loss:0.011792 -- New Train loss:0.000781 +Model:2, iter:64 -- Old Val loss:0.002436 New Val loss:0.011832 -- New Train loss:0.000831 +Model:3, iter:57 -- Old Val loss:0.005393 New Val loss:0.010375 -- New Train loss:0.001002 +Model:4, iter:102 -- Old Val loss:0.003815 New Val loss:0.007815 -- New Train loss:0.000369 +Model:5, iter:92 -- Old Val loss:0.005556 New Val loss:0.010032 -- New Train loss:0.000530 +Model:6, iter:87 -- Old Val loss:0.003333 New Val loss:0.009639 -- New Train loss:0.000546 +Model:7, iter:55 -- Old Val loss:0.005029 New Val loss:0.009861 -- New Train loss:0.001121 +Model:8, iter:76 -- Old Val loss:0.003954 New Val loss:0.011082 -- New Train loss:0.000681 + Policy it 0..Sim ep: 348 +Log Std policy inner: -3.2520943 + Policy it 1..Sim ep: 358 +Log Std policy inner: -3.2516222 + Policy it 2..Sim ep: 371 +Log Std policy inner: -3.2539244 +Iterations: 639 +Simulated test: ** -3.016961703420384 ** -2.5644627951667642 ** -1.9019821654258704 ** -1.7800602231814993 ** -2.452179201520048 ** -2.6437105155782774 ** -2.0473222017928494 ** -2.313569193710573 ** -2.4646141220163553 ** + Policy it 3..Sim ep: 358 +Log Std policy inner: -3.2559178 + Policy it 4..Sim ep: 356 +Log Std policy inner: -3.2598462 + Policy it 5..Sim ep: 346 +Log Std policy inner: -3.2616878 +Iterations: 642 +Simulated test: ** -2.494774153090548 ** -2.2183106884080916 ** -2.3245448188087905 ** -2.335255912723951 ** -2.565719997648848 ** -2.284652263543685 ** -2.2027198667754417 ** -2.334237245018012 ** -2.4974860503472156 ** + Policy it 6..Sim ep: 355 +Log Std policy inner: -3.2649322 + Policy it 7..Sim ep: 349 +Log Std policy inner: -3.2640052 + Policy it 8..Sim ep: 359 +Log Std policy inner: -3.2676735 +Iterations: 645 +Simulated test: ** -2.5039778636943084 ** -2.4588999926950783 ** -1.9095673148380592 ** -2.183129595599603 ** -2.101784368492663 ** -1.976420968385064 ** -2.1151829724386335 ** -2.103205363410525 ** -2.537742689041188 ** + Policy it 9..Sim ep: 335 +Log Std policy inner: -3.2690523 + Policy it 10..Sim ep: 364 +Log Std policy inner: -3.2719798 + Policy it 11..Sim ep: 348 +Log Std policy inner: -3.2761776 +Iterations: 648 +Simulated test: ** -2.7665191850322297 ** -2.1342400352767434 ** -2.413582571364823 ** -2.5682808495534117 ** -2.3789381218203927 ** -2.47570185828954 ** -2.4291791395796465 ** -2.4579436030535726 ** -2.2463106674140727 ** +break +============================ 19 ============================ +step 1 state [0.8905 0.5175 0.3265 0.56833333] a [-0.08333333 0.08333333 0.08333333 0.07514555] r -0.10320027183947222 +step 2 state [0.88116667 0.47766667 0.40983333 0.65166667] a [-0.00930966 -0.03973584 0.08333333 0.08333333] r -0.10596911393082709 +step 3 state [0.79783333 0.51183333 0.49316667 0.73183333] a [-0.08333333 0.03430275 0.08333333 0.0802287 ] r -0.202147654196556 +step 4 state [0.84633333 0.59516667 0.5765 0.81516667] a [0.04861956 0.08333333 0.08333333 0.08333333] r -0.4438321603146773 +step 5 state [0.81133333 0.6785 0.65983333 0.76616667] a [-0.03490995 0.08333333 0.08333333 -0.04892914] r -0.3925368201770383 +step 6 state [0.728 0.71133333 0.7385 0.71033333] a [-0.08333333 0.03288598 0.07866864 -0.05577495] r -0.3073680322802884 +step 7 state [0.64466667 0.79466667 0.6895 0.67283333] a [-0.08333333 0.08333333 -0.04886565 -0.03747376] r -0.12931122439644926 +step 8 state [0.64566667 0.79816667 0.60616667 0.64183333] a [ 0.00102793 0.00350506 -0.08333333 -0.03088895] r -0.009513914368146525 +step 1 state [0.55716667 0.81033333 0.92166667 0.35633333] a [-0.08333333 -0.08333333 -0.05834568 -0.08333333] r -0.927949488759626 +step 2 state [0.47383333 0.79583333 0.83833333 0.3735 ] a [-0.08333333 -0.01449836 -0.08333333 0.01716826] r -0.5934691848055833 +step 3 state [0.3905 0.87916667 0.755 0.41416667] a [-0.08333333 0.08333333 -0.08333333 0.04077021] r -0.24949918054227016 +step 4 state [0.30716667 0.9625 0.67166667 0.463 ] a [-0.08333333 0.08333333 -0.08333333 0.04887077] r -0.03591373426812383 +step 1 state [0.6875 0.88166667 0.24066667 0.8445 ] a [-0.08254442 0.08328059 0.08333333 0.07916225] r -0.9461957736690357 +step 2 state [0.60416667 0.965 0.31966667 0.92266667] a [-0.08333333 0.08333333 0.07903713 0.07817944] r -0.9414007442913582 +step 3 state [0.52083333 1. 0.4015 0.94933333] a [-0.08330696 0.08333333 0.08186145 0.0268226 ] r -0.8699407536384366 +Saved current buffer default +Ep:19 Rew:2.50 -- Step:330 +Train set: 264 Valid set: 66 +Log Std policy: [-3.6399791 -3.8795576 -2.9266484 -2.6585252] -3.2761776 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:59 -- Old Val loss:0.003902 New Val loss:0.010888 -- New Train loss:0.000799 +Model:1, iter:47 -- Old Val loss:0.003901 New Val loss:0.011581 -- New Train loss:0.001186 +Model:2, iter:24 -- Old Val loss:0.004477 New Val loss:0.012627 -- New Train loss:0.002316 +Model:3, iter:32 -- Old Val loss:0.004293 New Val loss:0.010375 -- New Train loss:0.001653 +Model:4, iter:36 -- Old Val loss:0.005631 New Val loss:0.011253 -- New Train loss:0.001374 +Model:5, iter:47 -- Old Val loss:0.004740 New Val loss:0.012492 -- New Train loss:0.001271 +Model:6, iter:65 -- Old Val loss:0.005841 New Val loss:0.014061 -- New Train loss:0.000768 +Model:7, iter:74 -- Old Val loss:0.005111 New Val loss:0.012442 -- New Train loss:0.000777 +Model:8, iter:24 -- Old Val loss:0.005862 New Val loss:0.013281 -- New Train loss:0.002131 + Policy it 0..Sim ep: 342 +Log Std policy inner: -3.275371 + Policy it 1..Sim ep: 327 +Log Std policy inner: -3.2858644 + Policy it 2..Sim ep: 332 +Log Std policy inner: -3.2826157 +Iterations: 651 +Simulated test: ** -3.1917600814494653 ** -2.1734284890675917 ** -2.126794112112839 ** -2.0994068139593582 ** -2.388129200221738 ** -2.076322691444366 ** -2.227303542881273 ** -2.306553944936022 ** -2.0983096589444905 ** + Policy it 3..Sim ep: 341 +Log Std policy inner: -3.2927458 + Policy it 4..Sim ep: 320 +Log Std policy inner: -3.2969327 + Policy it 5..Sim ep: 343 +Log Std policy inner: -3.305934 +Iterations: 654 +Simulated test: ** -2.7694041204079984 ** -2.4319464951101692 ** -2.212550695416285 ** -2.5977575168572367 ** -2.4981091266300064 ** -2.233384975544177 ** -2.0808717367239296 ** -2.0160074968403205 ** -2.519461109599797 ** + Policy it 6..Sim ep: 322 +Log Std policy inner: -3.308638 + Policy it 7..Sim ep: 337 +Log Std policy inner: -3.3089764 + Policy it 8..Sim ep: 337 +Log Std policy inner: -3.3168168 +Iterations: 657 +Simulated test: ** -3.26972483055084 ** -1.6834119047457352 ** -2.5035707038283 ** -2.1788420041254724 ** -2.2325773776334246 ** -2.769370487239212 ** -2.3272962519910654 ** -2.5045742755383253 ** -2.4076461834460496 ** + Policy it 9..Sim ep: 328 +Log Std policy inner: -3.3171089 + Policy it 10..Sim ep: 334 +Log Std policy inner: -3.316123 + Policy it 11..Sim ep: 336 +Log Std policy inner: -3.311607 +Iterations: 660 +Simulated test: ** -2.887409309106879 ** -2.3823620029498125 ** -2.196997964195907 ** -2.524069729485782 ** -1.834017189599108 ** -2.1265873278980143 ** -3.0223118194146084 ** -2.2135274578863755 ** -1.963357459633844 ** + Policy it 12..Sim ep: 332 +Log Std policy inner: -3.3076956 + Policy it 13..Sim ep: 322 +Log Std policy inner: -3.3099227 + Policy it 14..Sim ep: 327 +Log Std policy inner: -3.3126404 +Iterations: 663 +Simulated test: ** -2.385941691286862 ** -2.7685686113516565 ** -2.112297527078772 ** -2.1937671195040456 ** -2.065257940259762 ** -2.203005329193547 ** -2.4299794554145775 ** -2.0533329137274996 ** -2.3524323896312853 ** + Policy it 15..Sim ep: 334 +Log Std policy inner: -3.308537 + Policy it 16..Sim ep: 328 +Log Std policy inner: -3.311698 + Policy it 17..Sim ep: 339 +Log Std policy inner: -3.3132894 +Iterations: 666 +Simulated test: ** -2.4561879859818148 ** -2.6018236701563002 ** -2.404436435396783 ** -1.8810752484062687 ** -1.9431866808701306 ** -2.118188060550019 ** -2.1714568148209947 ** -2.1269623537070586 ** -2.635909912349598 ** + Policy it 18..Sim ep: 331 +Log Std policy inner: -3.310341 + Policy it 19..Sim ep: 334 +Log Std policy inner: -3.3152637 + Policy it 20..Sim ep: 345 +Log Std policy inner: -3.314907 +Iterations: 669 +Simulated test: ** -2.750289137768559 ** -2.466286414403585 ** -2.12261591832852 ** -2.5745484633362503 ** -1.995809208159335 ** -2.6544549722177906 ** -1.845999207375571 ** -2.1280159856658427 ** -2.0608259215834552 ** + Policy it 21..Sim ep: 327 +Log Std policy inner: -3.3135912 + Policy it 22..Sim ep: 345 +Log Std policy inner: -3.312592 + Policy it 23..Sim ep: 352 +Log Std policy inner: -3.31551 +Iterations: 672 +Simulated test: ** -2.5180614742822947 ** -2.3746672137163114 ** -2.0070423695025967 ** -2.529471227163449 ** -2.357474053273327 ** -2.2670082429445757 ** -2.1506391534180147 ** -2.246271066975605 ** -2.1968841856881043 ** + Policy it 24..Sim ep: 339 +Log Std policy inner: -3.3163097 + Policy it 25..Sim ep: 337 +Log Std policy inner: -3.3216715 + Policy it 26..Sim ep: 350 +Log Std policy inner: -3.319253 +Iterations: 675 +Simulated test: ** -2.9409270739392377 ** -2.1796288739470766 ** -2.4780479453346924 ** -2.522285793061601 ** -2.3816448515676893 ** -2.2152379108339666 ** -2.548996354484698 ** -2.317117004597094 ** -2.3621537724137305 ** + Policy it 27..Sim ep: 338 +Log Std policy inner: -3.3255515 + Policy it 28..Sim ep: 342 +Log Std policy inner: -3.3317976 + Policy it 29..Sim ep: 333 +Log Std policy inner: -3.3374186 +Iterations: 678 +Simulated test: ** -2.736311787131708 ** -2.913280643215403 ** -2.0966949561377985 ** -2.774900709389476 ** -2.6374595219244656 ** -2.513193862341577 ** -2.234686649693176 ** -2.190704317020718 ** -2.377378230043687 ** + Policy it 30..Sim ep: 358 +Log Std policy inner: -3.342052 + Policy it 31..Sim ep: 340 +Log Std policy inner: -3.3421755 + Policy it 32..Sim ep: 323 +Log Std policy inner: -3.3387268 +Iterations: 681 +Simulated test: ** -2.8534698508126892 ** -2.254390237198677 ** -2.367297112031374 ** -2.5439099863916637 ** -2.4281158989993856 ** -2.1187426080508156 ** -2.549126916890964 ** -2.4169686446152627 ** -2.5439072671951726 ** + Policy it 33..Sim ep: 337 +Log Std policy inner: -3.3408384 + Policy it 34..Sim ep: 336 +Log Std policy inner: -3.3427331 + Policy it 35..Sim ep: 338 +Log Std policy inner: -3.341651 +Iterations: 684 +Simulated test: ** -2.8197850695927626 ** -2.2586299894348487 ** -2.6616532374016244 ** -1.9773842471186072 ** -2.27748075487063 ** -2.446856690152781 ** -2.151150006574462 ** -2.4384543907648184 ** -2.7978065126297587 ** + Policy it 36..Sim ep: 343 +Log Std policy inner: -3.3397658 + Policy it 37..Sim ep: 337 +Log Std policy inner: -3.3418941 + Policy it 38..Sim ep: 343 +Log Std policy inner: -3.3364978 +Iterations: 687 +Simulated test: ** -2.6809711933297513 ** -2.484384750416502 ** -2.756119981438387 ** -2.9480442945951655 ** -2.64412051795749 ** -1.9640331029333173 ** -2.0877215396356767 ** -1.9053899088356412 ** -1.949021392634604 ** + Policy it 39..Sim ep: 322 +Log Std policy inner: -3.3425155 + Policy it 40..Sim ep: 331 +Log Std policy inner: -3.3476117 + Policy it 41..Sim ep: 327 +Log Std policy inner: -3.349002 +Iterations: 690 +Simulated test: ** -3.2694234514445997 ** -2.398152067969786 ** -2.2670113248704 ** -1.9991955242858968 ** -2.315974964861525 ** -2.4644925802390207 ** -2.0074292592611163 ** -2.2803002169623507 ** -2.0539663677943465 ** + Policy it 42..Sim ep: 332 +Log Std policy inner: -3.3472474 + Policy it 43..Sim ep: 323 +Log Std policy inner: -3.344641 + Policy it 44..Sim ep: 331 +Log Std policy inner: -3.3427932 +Iterations: 693 +Simulated test: ** -2.4900397171010265 ** -2.4426076782122252 ** -2.147744785291725 ** -2.6054605828318746 ** -2.5817459548683837 ** -2.3266759337854457 ** -2.6002697007055393 ** -2.4122533380688402 ** -2.706374979832908 ** + Policy it 45..Sim ep: 341 +Log Std policy inner: -3.342421 + Policy it 46..Sim ep: 343 +Log Std policy inner: -3.3414435 + Policy it 47..Sim ep: 352 +Log Std policy inner: -3.3473897 +Iterations: 696 +Simulated test: ** -2.3452046689251436 ** -2.071634972519241 ** -2.4357257541106083 ** -2.553872936577536 ** -2.4057751039922004 ** -2.1698329858854413 ** -2.341406799503602 ** -2.4485283357894514 ** -2.1050876319257075 ** + Policy it 48..Sim ep: 342 +Log Std policy inner: -3.3503902 + Policy it 49..Sim ep: 336 +Log Std policy inner: -3.34886 +============================ 20 ============================ +step 1 state [0.40666667 0.655 0.66583333 0.79383333] a [-0.08333333 -0.05103077 -0.05623851 0.04551736] r -0.3801109832153411 +step 2 state [0.32333333 0.73833333 0.5825 0.74466667] a [-0.08333333 0.08333333 -0.08333333 -0.04911641] r -0.17720815307390092 +step 3 state [0.32166667 0.82166667 0.52583333 0.66133333] a [-0.00155784 0.08333333 -0.05654266 -0.08333333] r -0.19123112948338805 +step 4 state [0.23833333 0.905 0.4425 0.70383333] a [-0.08333333 0.08333333 -0.08333333 0.04252507] r -0.23270164730893794 +step 5 state [0.19933333 0.98833333 0.4985 0.72083333] a [-0.03891695 0.08333333 0.05611703 0.01706441] r -0.17369213886769774 +step 6 state [0.116 1. 0.4855 0.754 ] a [-0.08333333 0.08333333 -0.01289137 0.03328225] r -0.17908524793649916 +step 7 state [0.03266667 1. 0.40216667 0.731 ] a [-0.08333333 0.08333333 -0.08333333 -0.02285321] r -0.2524962464790931 +step 8 state [0. 1. 0.4855 0.81433333] a [-0.06854791 0.08333333 0.08333333 0.08333333] r -0.13804871921498718 +step 9 state [0. 1. 0.44516667 0.731 ] a [-0.08333333 0.04511188 -0.04026251 -0.08333333] r -0.2275515680323581 +step 10 state [0. 1. 0.47683333 0.7285 ] a [-0.07444013 0.03383382 0.03173964 -0.00239954] r -0.12448370793075314 +step 1 state [0.242 0.29933333 0.7135 0.22616667] a [-0.05025469 0.08333333 -0.08333333 0.08333333] r -0.9237210199454223 +step 2 state [0.15866667 0.38266667 0.6715 0.3095 ] a [-0.08333333 0.08333333 -0.0418955 0.08333333] r -0.63624171226187 +step 3 state [0.07533333 0.466 0.63033333 0.39283333] a [-0.08333333 0.08333333 -0.04102555 0.08333333] r -0.30162770867996547 +step 4 state [0. 0.54933333 0.547 0.47616667] a [-0.08333333 0.08333333 -0.08333333 0.08333333] r -0.0642073224426486 +step 5 state [0. 0.627 0.46366667 0.5455 ] a [-0.08333333 0.07782735 -0.08333333 0.06945008] r -0.13462290312498093 +Saved current buffer default +Ep:20 Rew:5.85 -- Step:345 +Train set: 276 Valid set: 69 +Log Std policy: [-3.6406493 -3.9029372 -3.1495035 -2.70235 ] -3.34886 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:71 -- Old Val loss:0.003377 New Val loss:0.009535 -- New Train loss:0.000765 +Model:1, iter:108 -- Old Val loss:0.003370 New Val loss:0.007301 -- New Train loss:0.000402 +Model:2, iter:44 -- Old Val loss:0.009759 New Val loss:0.013760 -- New Train loss:0.001452 +Model:3, iter:142 -- Old Val loss:0.004856 New Val loss:0.008609 -- New Train loss:0.000295 +Model:4, iter:105 -- Old Val loss:0.005042 New Val loss:0.008950 -- New Train loss:0.000405 +Model:5, iter:43 -- Old Val loss:0.003898 New Val loss:0.008066 -- New Train loss:0.001557 +Model:6, iter:81 -- Old Val loss:0.004072 New Val loss:0.007035 -- New Train loss:0.000654 +Model:7, iter:110 -- Old Val loss:0.003771 New Val loss:0.008443 -- New Train loss:0.000319 +Model:8, iter:62 -- Old Val loss:0.007409 New Val loss:0.008478 -- New Train loss:0.001074 + Policy it 0..Sim ep: 302 +Log Std policy inner: -3.3502178 + Policy it 1..Sim ep: 295 +Log Std policy inner: -3.3485563 + Policy it 2..Sim ep: 299 +Log Std policy inner: -3.3539045 +Iterations: 701 +Simulated test: ** -2.6243613117438507 ** -2.647718599640066 ** -1.8384140597563237 ** -2.3461297220690174 ** -3.003902761628851 ** -2.045807214113884 ** -2.2481774815163225 ** -2.5572252481855684 ** -2.219176571946591 ** + Policy it 3..Sim ep: 296 +Log Std policy inner: -3.3538237 + Policy it 4..Sim ep: 287 +Log Std policy inner: -3.3497682 + Policy it 5..Sim ep: 276 +Log Std policy inner: -3.3480637 +Iterations: 704 +Simulated test: ** -3.0404748481231216 ** -2.4221820550388657 ** -2.5046578419953587 ** -1.7970284648242523 ** -2.248355027996004 ** -1.8620493667278788 ** -2.579328487133607 ** -2.2511802183091643 ** -2.5043743501580322 ** + Policy it 6..Sim ep: 292 +Log Std policy inner: -3.348047 + Policy it 7..Sim ep: 296 +Log Std policy inner: -3.3396719 + Policy it 8..Sim ep: 281 +Log Std policy inner: -3.3414772 +Iterations: 707 +Simulated test: ** -2.639285827850399 ** -2.897656650512945 ** -1.9317367552546785 ** -2.4829258096416016 ** -2.684143108311109 ** -2.6772229807940313 ** -2.004532688073814 ** -2.7023462770599873 ** -2.384596514859877 ** + Policy it 9..Sim ep: 292 +Log Std policy inner: -3.3430166 + Policy it 10..Sim ep: 296 +Log Std policy inner: -3.3502817 + Policy it 11..Sim ep: 313 +Log Std policy inner: -3.3553727 +Iterations: 710 +Simulated test: ** -2.4144614837085827 ** -2.5215881071810147 ** -2.5380501731298866 ** -2.3992507346021013 ** -2.302005228609196 ** -2.28544062667992 ** -1.9689672570046968 ** -2.4481616917735662 ** -2.1975318607420196 ** + Policy it 12..Sim ep: 294 +Log Std policy inner: -3.3579226 + Policy it 13..Sim ep: 294 +Log Std policy inner: -3.3585396 + Policy it 14..Sim ep: 289 +Log Std policy inner: -3.3585649 +Iterations: 713 +Simulated test: ** -2.34405517494597 ** -2.3736334798135794 ** -1.8633030855329706 ** -2.102954781264998 ** -2.2700885009591003 ** -2.372418496552855 ** -2.7578034349367955 ** -2.7336800918047084 ** -2.3781087997811845 ** + Policy it 15..Sim ep: 293 +Log Std policy inner: -3.3560736 + Policy it 16..Sim ep: 287 +Log Std policy inner: -3.3605483 + Policy it 17..Sim ep: 300 +Log Std policy inner: -3.3633916 +Iterations: 716 +Simulated test: ** -2.421822618737933 ** -2.6936721298261546 ** -1.8695126945432277 ** -2.4850320572336204 ** -2.6459696359891676 ** -2.6353838421736144 ** -2.371873870312702 ** -2.120445565984119 ** -2.345745945323724 ** + Policy it 18..Sim ep: 273 +Log Std policy inner: -3.3594708 + Policy it 19..Sim ep: 292 +Log Std policy inner: -3.3617463 + Policy it 20..Sim ep: 288 +Log Std policy inner: -3.3611574 +Iterations: 719 +Simulated test: ** -2.4032110879081303 ** -2.4512845042627305 ** -2.294538823789917 ** -2.6267636344209313 ** -2.4651334162196146 ** -2.3244621373247356 ** -2.4645880804909392 ** -2.021697217999026 ** -2.144812134569511 ** + Policy it 21..Sim ep: 302 +Log Std policy inner: -3.366058 + Policy it 22..Sim ep: 298 +Log Std policy inner: -3.3675823 + Policy it 23..Sim ep: 295 +Log Std policy inner: -3.369791 +Iterations: 722 +Simulated test: ** -2.4152021680941105 ** -2.4043405618320683 ** -2.3715700984327124 ** -1.9993275437690317 ** -2.224296545139514 ** -2.3643162878230215 ** -2.59250006809365 ** -2.1878122125045047 ** -2.068224191692425 ** + Policy it 24..Sim ep: 297 +Log Std policy inner: -3.3688421 + Policy it 25..Sim ep: 290 +Log Std policy inner: -3.3691745 + Policy it 26..Sim ep: 308 +Log Std policy inner: -3.3711493 +Iterations: 725 +Simulated test: ** -2.2654043335746974 ** -2.392518855512608 ** -2.188416635299218 ** -2.0282662933156828 ** -2.3659037309838458 ** -2.2280257762712425 ** -2.081524095053319 ** -2.313634065265069 ** -2.2144728740147546 ** + Policy it 27..Sim ep: 306 +Log Std policy inner: -3.374185 + Policy it 28..Sim ep: 293 +Log Std policy inner: -3.3703074 + Policy it 29..Sim ep: 329 +Log Std policy inner: -3.374783 +Iterations: 728 +Simulated test: ** -2.6020687473053115 ** -2.5982696088100785 ** -2.1559396015200765 ** -2.2537683863937854 ** -2.162027216160204 ** -2.2837393198686184 ** -2.4372376174572854 ** -2.161659500536043 ** -2.2851301821891683 ** + Policy it 30..Sim ep: 313 +Log Std policy inner: -3.380734 + Policy it 31..Sim ep: 309 +Log Std policy inner: -3.3781192 + Policy it 32..Sim ep: 308 +Log Std policy inner: -3.3847394 +Iterations: 731 +Simulated test: ** -2.229772786144167 ** -2.128028354785638 ** -1.8328430824307724 ** -2.065040103509091 ** -2.131699916147627 ** -2.3599774152738973 ** -2.2885543580446392 ** -2.3846544786926827 ** -2.1008762788563033 ** + Policy it 33..Sim ep: 311 +Log Std policy inner: -3.3877244 + Policy it 34..Sim ep: 320 +Log Std policy inner: -3.38688 + Policy it 35..Sim ep: 309 +Log Std policy inner: -3.3852437 +Iterations: 734 +Simulated test: ** -2.258401955080917 ** -2.1112704775156454 ** -2.070664282795042 ** -2.1076731627201664 ** -2.2417611601577665 ** -2.1226506155892277 ** -2.432252213305328 ** -2.334820464523509 ** -2.456169810947031 ** + Policy it 36..Sim ep: 293 +Log Std policy inner: -3.3855045 + Policy it 37..Sim ep: 303 +Log Std policy inner: -3.3844934 + Policy it 38..Sim ep: 328 +Log Std policy inner: -3.384987 +Iterations: 737 +Simulated test: ** -2.3471041898895053 ** -2.2352446198699183 ** -2.1278349281987174 ** -2.4865694609488127 ** -2.2646916553378107 ** -2.139519471651292 ** -1.7918938814569265 ** -2.112193585252389 ** -2.1928953795693813 ** + Policy it 39..Sim ep: 300 +Log Std policy inner: -3.385194 + Policy it 40..Sim ep: 298 +Log Std policy inner: -3.387145 + Policy it 41..Sim ep: 315 +Log Std policy inner: -3.3892756 +Iterations: 740 +Simulated test: ** -1.8487028801999985 ** -2.4030310085532256 ** -1.8658259675372393 ** -2.0303744357579854 ** -2.220309269166319 ** -1.9469892230490222 ** -2.5425576159800403 ** -2.162975719720125 ** -1.9698865318024763 ** + Policy it 42..Sim ep: 306 +Log Std policy inner: -3.3897014 + Policy it 43..Sim ep: 282 +Log Std policy inner: -3.3998995 + Policy it 44..Sim ep: 318 +Log Std policy inner: -3.3993742 +Iterations: 743 +Simulated test: ** -2.6132168227469084 ** -1.9192142856167629 ** -1.6684276926144959 ** -2.2592201872915028 ** -2.637301762253046 ** -2.144716558570508 ** -2.260747806216823 ** -2.337292862291215 ** -2.0156898834137245 ** + Policy it 45..Sim ep: 295 +Log Std policy inner: -3.4031692 + Policy it 46..Sim ep: 317 +Log Std policy inner: -3.4076123 + Policy it 47..Sim ep: 306 +Log Std policy inner: -3.403131 +Iterations: 746 +Simulated test: ** -2.4422960537462495 ** -2.526187767330848 ** -2.5860652675060556 ** -2.073024415106047 ** -2.71665294466191 ** -2.3881705402338413 ** -2.465450993562117 ** -2.015080983210355 ** -2.1481939517403954 ** + Policy it 48..Sim ep: 313 +Log Std policy inner: -3.4001093 + Policy it 49..Sim ep: 321 +Log Std policy inner: -3.395151 +============================ 21 ============================ +step 1 state [0.92216667 0.37966667 0.44566667 0.852 ] a [-0.01125182 0.06466632 0.0580795 0.08333333] r -0.8035638248797453 +step 2 state [0.867 0.45466667 0.529 0.93533333] a [-0.05507681 0.07514712 0.08333333 0.08333333] r -0.959727906426004 +step 3 state [0.78366667 0.5355 0.57033333 1. ] a [-0.08333333 0.08088509 0.04143725 0.08333333] r -0.9821214656625703 +step 4 state [0.70033333 0.61883333 0.629 1. ] a [-0.08333333 0.08333333 0.05881612 0.00272976] r -0.9774744776201988 +step 5 state [0.617 0.6555 0.71233333 0.99733333] a [-0.08333333 0.03672509 0.08333333 -0.00257827] r -0.9900025523918542 +step 6 state [0.53616667 0.73883333 0.79533333 0.91833333] a [-0.08072245 0.08333333 0.0830594 -0.07896492] r -0.9753961812799394 +step 7 state [0.45283333 0.82216667 0.712 0.86533333] a [-0.08333333 0.08333333 -0.08333333 -0.05298867] r -0.6444916959964044 +step 8 state [0.3695 0.89866667 0.62866667 0.782 ] a [-0.08333333 0.07658423 -0.08333333 -0.08333333] r -0.10519239410392245 +step 9 state [0.3585 0.982 0.58966667 0.766 ] a [-0.01084105 0.08333333 -0.03887033 -0.01584494] r -0.01572181426288688 +step 1 state [0.6765 0.94166667 0.3145 0.48383333] a [-0.08333333 0.07749071 0.0785878 0.07499978] r -0.35428642134064536 +step 2 state [0.59316667 1. 0.39783333 0.56716667] a [-0.08333333 0.08089872 0.08333333 0.08333333] r -0.15721184456230564 +step 3 state [0.51216667 1. 0.48116667 0.6505 ] a [-0.08086745 0.08333333 0.08333333 0.08333333] r -0.05925586392591875 +step 4 state [0.42883333 1. 0.5645 0.73083333] a [-0.08333333 0.08333333 0.08333333 0.08039729] r -0.09770844847383486 +step 5 state [0.35266667 1. 0.572 0.65016667] a [-0.07603392 0.08333333 0.00763319 -0.08058336] r -0.008965439889604188 +step 1 state [0.779 1. 0.88166667 0.58733333] a [-0.08333333 0.08333333 -0.07437073 -0.08333333] r -0.9117024975191339 +Saved current buffer default +Ep:21 Rew:-0.13 -- Step:360 +Train set: 288 Valid set: 72 +Log Std policy: [-3.722281 -3.9432414 -3.2131782 -2.7019036] -3.395151 +maximum: 1.1540497887255392 +mb_lr: 0.001 +Model:0, iter:59 -- Old Val loss:0.005085 New Val loss:0.009515 -- New Train loss:0.000778 +Model:1, iter:57 -- Old Val loss:0.003130 New Val loss:0.008305 -- New Train loss:0.000871 +Model:2, iter:56 -- Old Val loss:0.010880 New Val loss:0.009588 -- New Train loss:0.001010 +Model:3, iter:31 -- Old Val loss:0.002519 New Val loss:0.010187 -- New Train loss:0.001735 +Model:4, iter:41 -- Old Val loss:0.003493 New Val loss:0.012202 -- New Train loss:0.001446 +Model:5, iter:40 -- Old Val loss:0.005741 New Val loss:0.010402 -- New Train loss:0.001855 +Model:6, iter:52 -- Old Val loss:0.004238 New Val loss:0.012391 -- New Train loss:0.001008 +Model:7, iter:39 -- Old Val loss:0.003849 New Val loss:0.011295 -- New Train loss:0.001459 +Model:8, iter:33 -- Old Val loss:0.005135 New Val loss:0.013136 -- New Train loss:0.001867 + Policy it 0..Sim ep: 343 +Log Std policy inner: -3.3931553 + Policy it 1..Sim ep: 331 +Log Std policy inner: -3.396313 + Policy it 2..Sim ep: 336 +Log Std policy inner: -3.3973424 +Iterations: 751 +Simulated test: ** -2.2175952958920973 ** -1.822916887057945 ** -2.118875615047291 ** -2.260268873386085 ** -2.332698178421706 ** -1.9749505234183744 ** -1.822292162820231 ** -2.191102231889963 ** -2.104731400106102 ** + Policy it 3..Sim ep: 311 +Log Std policy inner: -3.397681 + Policy it 4..Sim ep: 340 +Log Std policy inner: -3.3930638 + Policy it 5..Sim ep: 327 +Log Std policy inner: -3.394469 +Iterations: 754 +Simulated test: ** -2.4207440432102887 ** -2.1964495508850086 ** -2.2401888233749196 ** -2.356437743571587 ** -1.969913485532161 ** -2.331128437872976 ** -2.168158063304145 ** -2.094313998406287 ** -1.8857133379977313 ** + Policy it 6..Sim ep: 332 +Log Std policy inner: -3.3920765 + Policy it 7..Sim ep: 336 +Log Std policy inner: -3.4012704 + Policy it 8..Sim ep: 333 +Log Std policy inner: -3.4007218 +Iterations: 757 +Simulated test: ** -2.4042683667037634 ** -1.9984634730918334 ** -1.7392884169964236 ** -2.4417755963513628 ** -2.331865753084421 ** -2.0662759877729697 ** -2.22829123315867 ** -2.064059069855139 ** -1.9467860807944088 ** + Policy it 9..Sim ep: 324 +Log Std policy inner: -3.409685 + Policy it 10..Sim ep: 337 +Log Std policy inner: -3.4073653 + Policy it 11..Sim ep: 337 +Log Std policy inner: -3.4139972 +Iterations: 760 +Simulated test: ** -2.330342940221308 ** -2.3210308326945235 ** -2.436210789080942 ** -2.144845584272407 ** -2.115544383263914 ** -1.8108296097046697 ** -2.329937685667537 ** -2.1364475633809343 ** -2.348420703737065 ** + Policy it 12..Sim ep: 344 +Log Std policy inner: -3.4066005 + Policy it 13..Sim ep: 337 +Log Std policy inner: -3.4061685 + Policy it 14..Sim ep: 335 +Log Std policy inner: -3.4187987 +Iterations: 763 +Simulated test: ** -2.003478010240942 ** -1.895730625470169 ** -2.412180485725403 ** -1.643204764709808 ** -2.120502407513559 ** -1.7011597394569253 ** -2.114623009703355 ** -2.292414981722832 ** -2.2739845356484873 ** + Policy it 15..Sim ep: 341 +Log Std policy inner: -3.4177074 + Policy it 16..Sim ep: 349 +Log Std policy inner: -3.4316134 + Policy it 17..Sim ep: 344 +Log Std policy inner: -3.432977 +Iterations: 766 +Simulated test: ** -1.9730099440756022 ** -2.5360127778176684 ** -1.9931293769041076 ** -2.2172613492188975 ** -2.2786572622461243 ** -2.1295466630114244 ** -1.9273329021886456 ** -2.001998034190619 ** -2.138273598673404 ** + Policy it 18..Sim ep: 348 +Log Std policy inner: -3.430513 + Policy it 19..Sim ep: 351 +Log Std policy inner: -3.4259245 + Policy it 20..Sim ep: 332 +Log Std policy inner: -3.4304893 +Iterations: 769 +Simulated test: ** -2.181866058378946 ** -1.8726242688298225 ** -2.1930605536606165 ** -2.2638300247691223 ** -2.3148307770304384 ** -1.7951689223386347 ** -1.4714595610578545 ** -2.017896966636181 ** -1.8646227004399407 ** + Policy it 21..Sim ep: 372 +Log Std policy inner: -3.4242501 + Policy it 22..Sim ep: 338 +Log Std policy inner: -3.418947 + Policy it 23..Sim ep: 356 +Log Std policy inner: -3.4185522 +Iterations: 772 +Simulated test: ** -2.3323920224350876 ** -2.0381051122385543 ** -2.182280695772497 ** -2.118994173894171 ** -1.7888369976390095 ** -2.0813646672596224 ** -2.438806123677641 ** -1.856298132049851 ** -1.9419351191748866 ** + Policy it 24..Sim ep: 332 +Log Std policy inner: -3.4191158 + Policy it 25..Sim ep: 352 +Log Std policy inner: -3.4160905 + Policy it 26..Sim ep: 362 +Log Std policy inner: -3.4139981 +Iterations: 775 +Simulated test: ** -2.0961877836612985 ** -2.0256244319083634 ** -1.966058079278446 ** -1.9309865749115125 ** -2.275079028906766 ** -2.0029221248356044 ** -1.9970677250972948 ** -2.23528712939471 ** -2.2697571958275513 ** + Policy it 27..Sim ep: 351 +Log Std policy inner: -3.4103174 + Policy it 28..Sim ep: 343 +Log Std policy inner: -3.4165106 + Policy it 29..Sim ep: 333 +Log Std policy inner: -3.4209597 +Iterations: 778 +Simulated test: ** -2.3501630927994848 ** -2.23537959093228 ** -1.924993156605633 ** -2.2073864201828837 ** -1.438649620367214 ** -1.7616507234959864 ** -1.8599120635539292 ** -1.8547173063002993 ** -2.181012771311216 ** + Policy it 30..Sim ep: 374 +Log Std policy inner: -3.4265015 + Policy it 31..Sim ep: 346 +Log Std policy inner: -3.4256928 + Policy it 32..Sim ep: 340 +Log Std policy inner: -3.432366 +Iterations: 781 +Simulated test: ** -1.871951794065535 ** -2.092048993819626 ** -1.9670521529851248 ** -2.4908268849132584 ** -1.77624535652576 ** -1.8344185394677333 ** -1.973086235622177 ** -1.8671134974458254 ** -2.169012986519374 ** + Policy it 33..Sim ep: 353 +Log Std policy inner: -3.453247 + Policy it 34..Sim ep: 345 +Log Std policy inner: -3.46283 + Policy it 35..Sim ep: 372 +Log Std policy inner: -3.4652514 +Iterations: 784 +Simulated test: ** -2.3176418815180657 ** -2.164533255196293 ** -2.350897616161965 ** -2.251019838349894 ** -2.23990583216073 ** -1.917163073787815 ** -1.9811282091622706 ** -2.1753861528774725 ** -1.7554497340630042 ** +break +============================ 22 ============================ +step 1 state [0.552 0.82466667 0.47033333 0.3775 ] a [-0.08333333 0.08333333 -0.06812899 0.08333333] r -0.2554044278236184 +step 2 state [0.47016667 0.9045 0.55283333 0.46083333] a [-0.08179574 0.07984232 0.08264307 0.08333333] r -0.07133753542730192 +step 3 state [0.38933333 0.98783333 0.53 0.5435 ] a [-0.08073365 0.08333333 -0.02278632 0.08272374] r -0.07041385484082263 +step 4 state [0.306 1. 0.50516667 0.62033333] a [-0.08333333 0.08110067 -0.02473743 0.07696338] r 0.05004216586896271 +step 1 state [0.744 0.283 0.1355 0.97883333] a [-0.06157878 0.03185571 -0.00081599 0.08333333] r -0.9359196189430143 +step 2 state [0.707 0.30416667 0.12666667 1. ] a [-0.03685654 0.02124366 -0.00867263 0.08333333] r -0.9541183111866951 +step 3 state [0.62366667 0.3875 0.15083333 1. ] a [-0.08333333 0.08333333 0.0242855 0.08333333] r -0.9402754300335321 +step 4 state [0.54833333 0.47083333 0.19016667 1. ] a [-0.07531482 0.08333333 0.03945266 0.06090005] r -0.8974489269347158 +step 5 state [0.465 0.49933333 0.2735 1. ] a [-0.08333333 0.02863302 0.08333333 0.04078023] r -0.8474911941666248 +step 6 state [0.48066667 0.58266667 0.30083333 0.99316667] a [ 0.01575784 0.08333333 0.02749783 -0.00678011] r -0.7959331911584725 +step 7 state [0.41883333 0.6585 0.38416667 0.9105 ] a [-0.06181754 0.07594071 0.08333333 -0.08252388] r -0.3686668192767487 +step 8 state [0.3355 0.74183333 0.4675 0.82716667] a [-0.08333333 0.08333333 0.08333333 -0.08333333] r -0.12122119291084821 +step 9 state [0.25216667 0.82516667 0.5085 0.815 ] a [-0.08333333 0.08333333 0.04101478 -0.01205847] r -0.09667998027909475 +step 10 state [0.16883333 0.9085 0.46383333 0.78866667] a [-0.08333333 0.08333333 -0.04455416 -0.02618141] r -0.01658643562297024 +step 1 state [0.821 0.61783333 0.50066667 0.64916667] a [-0.08333333 0.08333333 0.08333333 0.08333333] r 0.09334019662858828 +Saved current buffer default +Ep:22 Rew:0.85 -- Step:375 +Train set: 300 Valid set: 75 +Log Std policy: [-3.819299 -4.093855 -3.2198148 -2.7280366] -3.4652514 +maximum: 1.1866803932571766 +mb_lr: 0.001 +Model:0, iter:29 -- Old Val loss:0.005420 New Val loss:0.014993 -- New Train loss:0.001891 +Model:1, iter:98 -- Old Val loss:0.005064 New Val loss:0.007133 -- New Train loss:0.000387 +Model:2, iter:50 -- Old Val loss:0.005524 New Val loss:0.012022 -- New Train loss:0.001159 +Model:3, iter:105 -- Old Val loss:0.006880 New Val loss:0.009864 -- New Train loss:0.000329 +Model:4, iter:47 -- Old Val loss:0.007086 New Val loss:0.016842 -- New Train loss:0.001392 +Model:5, iter:57 -- Old Val loss:0.006080 New Val loss:0.010223 -- New Train loss:0.001238 +Model:6, iter:33 -- Old Val loss:0.006443 New Val loss:0.010189 -- New Train loss:0.001639 +Model:7, iter:41 -- Old Val loss:0.004715 New Val loss:0.012703 -- New Train loss:0.001497 +Model:8, iter:47 -- Old Val loss:0.008124 New Val loss:0.009930 -- New Train loss:0.001351 + Policy it 0..Sim ep: 343 +Log Std policy inner: -3.4701517 + Policy it 1..Sim ep: 330 +Log Std policy inner: -3.473372 + Policy it 2..Sim ep: 359 +Log Std policy inner: -3.472413 +Iterations: 787 +Simulated test: ** -2.4765170146245508 ** -1.8031920487899333 ** -2.2398602969525383 ** -1.996797879706719 ** -2.346823542620987 ** -2.072129084619228 ** -1.998194243277103 ** -1.8627374274411705 ** -1.60225420916453 ** + Policy it 3..Sim ep: 336 +Log Std policy inner: -3.4744349 + Policy it 4..Sim ep: 335 +Log Std policy inner: -3.4745336 + Policy it 5..Sim ep: 333 +Log Std policy inner: -3.4763737 +Iterations: 790 +Simulated test: ** -2.2964696102961897 ** -2.0809478130517527 ** -1.7753428535116837 ** -2.0622108936926815 ** -1.8253234704304486 ** -2.0683174218423663 ** -1.9362463956442661 ** -2.0343796252436004 ** -1.9542783742863685 ** + Policy it 6..Sim ep: 352 +Log Std policy inner: -3.4728153 + Policy it 7..Sim ep: 342 +Log Std policy inner: -3.4719057 + Policy it 8..Sim ep: 355 +Log Std policy inner: -3.4729314 +Iterations: 793 +Simulated test: ** -2.4121257715765387 ** -1.910875143322628 ** -2.460219347323291 ** -2.0696758493129166 ** -2.0606535654561595 ** -1.9060401761671528 ** -1.9107098896731622 ** -1.7817268804414197 ** -1.7176941058738158 ** + Policy it 9..Sim ep: 337 +Log Std policy inner: -3.4754286 + Policy it 10..Sim ep: 362 +Log Std policy inner: -3.4903917 + Policy it 11..Sim ep: 340 +Log Std policy inner: -3.488121 +Iterations: 796 +Simulated test: ** -2.4722994556836784 ** -2.162744005457498 ** -2.354458818123676 ** -1.7157129650111893 ** -2.147878789161332 ** -1.8190507514262573 ** -2.18118562168529 ** -1.9138876638934017 ** -1.7791767387736763 ** + Policy it 12..Sim ep: 345 +Log Std policy inner: -3.4895895 + Policy it 13..Sim ep: 333 +Log Std policy inner: -3.4870849 + Policy it 14..Sim ep: 325 +Log Std policy inner: -3.487531 +Iterations: 799 +Simulated test: ** -2.0590786327648676 ** -1.9672088659089058 ** -2.0236708485824058 ** -1.7707423649099656 ** -1.7676231826998992 ** -2.1899190805875697 ** -2.415541490521573 ** -1.8853074718359857 ** -2.0254131152434276 ** + Policy it 15..Sim ep: 343 +Log Std policy inner: -3.4882133 + Policy it 16..Sim ep: 326 +Log Std policy inner: -3.4932723 + Policy it 17..Sim ep: 350 +Log Std policy inner: -3.4953792 +Iterations: 802 +Simulated test: ** -2.308896940575796 ** -2.199863959605864 ** -2.3596004142423044 ** -2.023559906648006 ** -2.219761345035513 ** -1.998951265406795 ** -1.9037765726144427 ** -2.0172236763790714 ** -1.8106755005591548 ** + Policy it 18..Sim ep: 351 +Log Std policy inner: -3.4987695 + Policy it 19..Sim ep: 320 +Log Std policy inner: -3.5015693 + Policy it 20..Sim ep: 343 +Log Std policy inner: -3.5059483 +Iterations: 805 +Simulated test: ** -2.0257023103459506 ** -2.249983078885125 ** -2.20959590976825 ** -1.7265839482098817 ** -2.4320102805271744 ** -1.6514174576004734 ** -2.4432443716959096 ** -1.8034362844843417 ** -2.1718574883486146 ** + Policy it 21..Sim ep: 334 +Log Std policy inner: -3.5077834 + Policy it 22..Sim ep: 355 +Log Std policy inner: -3.5230095 + Policy it 23..Sim ep: 340 +Log Std policy inner: -3.5231442 +Iterations: 808 +Simulated test: ** -1.9673371831455733 ** -1.8279428695328535 ** -2.199580041144509 ** -1.9924203589605167 ** -2.305025169858709 ** -2.1934559274360073 ** -2.477182025592774 ** -2.0682799260714093 ** -2.1554129544101306 ** + Policy it 24..Sim ep: 340 +Log Std policy inner: -3.5232112 + Policy it 25..Sim ep: 350 +Log Std policy inner: -3.52482 + Policy it 26..Sim ep: 338 +Log Std policy inner: -3.5206661 +Iterations: 811 +Simulated test: ** -2.382323235818185 ** -1.8393158049648628 ** -1.8198642913997174 ** -2.2485593857895583 ** -1.8900497018813622 ** -1.7586029492039232 ** -2.1003931611194275 ** -2.1583662034687587 ** -2.1822542842073016 ** + Policy it 27..Sim ep: 357 +Log Std policy inner: -3.520115 + Policy it 28..Sim ep: 334 +Log Std policy inner: -3.524532 + Policy it 29..Sim ep: 352 +Log Std policy inner: -3.52918 +Iterations: 814 +Simulated test: ** -1.6926667015097336 ** -2.086483159295749 ** -2.441854540714994 ** -1.8836088620510418 ** -2.014198721498251 ** -2.098750443388708 ** -1.8995626142434776 ** -1.8797384120128118 ** -1.6565448149770965 ** + Policy it 30..Sim ep: 334 +Log Std policy inner: -3.5317893 + Policy it 31..Sim ep: 354 +Log Std policy inner: -3.535062 + Policy it 32..Sim ep: 342 +Log Std policy inner: -3.5374348 +Iterations: 817 +Simulated test: ** -2.378401393634267 ** -2.1051057447990753 ** -1.9010275324201211 ** -1.9199790982806006 ** -2.084830770297558 ** -1.8284922303329223 ** -2.4416666846303268 ** -2.152342973499908 ** -2.210665801894793 ** +break +============================ 23 ============================ +step 1 state [0.84466667 0.8545 0.65466667 0.56766667] a [-0.05804541 0.03418334 0.04069772 -0.08333333] r -0.03315807860138276 +step 1 state [0.261 1. 0.584 0.2505] a [-0.08333333 0.05813727 -0.08333333 0.07094454] r -0.4920171758785711 +step 2 state [0.17766667 1. 0.66733333 0.323 ] a [-0.08333333 0.07619014 0.08333333 0.07258138] r -0.29153525715217044 +step 3 state [0.09433333 1. 0.61133333 0.40516667] a [-0.08333333 0.08201205 -0.05586062 0.08232299] r -0.15098730392086068 +step 4 state [0.011 1. 0.58566667 0.4885 ] a [-0.08333333 0.08333333 -0.02559145 0.08333333] r -0.07384794983019227 +step 5 state [0. 1. 0.58983333 0.525 ] a [-0.0586755 0.07664322 0.00424175 0.0365108 ] r -0.000532682217845637 +step 1 state [0.288 0.321 0.32866667 0.76783333] a [-0.08333333 0.08333333 0.08333333 0.0557518 ] r -0.08263558872739629 +step 2 state [0.20466667 0.40033333 0.3865 0.84283333] a [-0.08333333 0.07935985 0.05796151 0.07502414] r -0.11089796438639843 +step 3 state [0.12133333 0.48366667 0.46683333 0.92616667] a [-0.08333333 0.08333333 0.0804226 0.08333333] r -0.27559888295250445 +step 4 state [0.038 0.52766667 0.55016667 0.95033333] a [-0.08333333 0.04410198 0.08333333 0.0241962 ] r -0.412093262163037 +step 5 state [0. 0.589 0.5085 0.867 ] a [-0.08333333 0.0614239 -0.04158461 -0.08333333] r -0.09513804362558154 +step 6 state [0. 0.67233333 0.475 0.87333333] a [-0.06352679 0.08333333 -0.03333341 0.00649124] r -0.050418216443505415 +step 7 state [0. 0.75566667 0.47866667 0.84416667] a [-0.05747043 0.08333333 0.00371664 -0.02910867] r 0.006666232057681132 +step 1 state [0. 0.44416667 0.6105 0.89516667] a [-0.0810637 0.08333333 -0.08333333 -0.02405605] r -0.3955205225509727 +step 2 state [0. 0.51216667 0.52716667 0.81183333] a [-0.06510459 0.06812075 -0.08333333 -0.08333333] r -0.009723143472492812 +Saved current buffer default +Ep:23 Rew:2.52 -- Step:390 +Train set: 312 Valid set: 78 +Log Std policy: [-3.8901732 -4.162655 -3.2921891 -2.8047225] -3.5374348 +maximum: 1.1866803932571766 +mb_lr: 0.001 +Model:0, iter:53 -- Old Val loss:0.010305 New Val loss:0.010276 -- New Train loss:0.001012 +Model:1, iter:74 -- Old Val loss:0.002860 New Val loss:0.009121 -- New Train loss:0.000705 +Model:2, iter:66 -- Old Val loss:0.005268 New Val loss:0.010023 -- New Train loss:0.000802 +Model:3, iter:98 -- Old Val loss:0.002509 New Val loss:0.010711 -- New Train loss:0.000479 +Model:4, iter:68 -- Old Val loss:0.005283 New Val loss:0.008764 -- New Train loss:0.000795 +Model:5, iter:64 -- Old Val loss:0.006530 New Val loss:0.008740 -- New Train loss:0.000965 +Model:6, iter:77 -- Old Val loss:0.006864 New Val loss:0.007274 -- New Train loss:0.000848 +Model:7, iter:68 -- Old Val loss:0.007162 New Val loss:0.010577 -- New Train loss:0.000802 +Model:8, iter:99 -- Old Val loss:0.004598 New Val loss:0.006045 -- New Train loss:0.000463 + Policy it 0..Sim ep: 390 +Log Std policy inner: -3.5359936 + Policy it 1..Sim ep: 372 +Log Std policy inner: -3.539153 + Policy it 2..Sim ep: 374 +Log Std policy inner: -3.541706 +Iterations: 820 +Simulated test: ** -2.401341226277873 ** -1.8507894178142306 ** -2.4173368920898066 ** -2.03462296310463 ** -2.0417291042022407 ** -1.938471612300491 ** -1.7256692193541676 ** -2.0638720162294337 ** -1.958126072382438 ** + Policy it 3..Sim ep: 366 +Log Std policy inner: -3.545449 + Policy it 4..Sim ep: 383 +Log Std policy inner: -3.5464776 + Policy it 5..Sim ep: 386 +Log Std policy inner: -3.5478246 +Iterations: 823 +Simulated test: ** -2.447251870308537 ** -1.8765580837056042 ** -1.9609606084763072 ** -1.7454461916466244 ** -2.1292081065289676 ** -1.9935169993311865 ** -1.9583381157112307 ** -2.2676754965865986 ** -1.8964417662401685 ** + Policy it 6..Sim ep: 398 +Log Std policy inner: -3.5494177 + Policy it 7..Sim ep: 374 +Log Std policy inner: -3.554758 + Policy it 8..Sim ep: 382 +Log Std policy inner: -3.5560656 +Iterations: 826 +Simulated test: ** -2.274909118587384 ** -1.9292947906558402 ** -2.0782252433244137 ** -2.1378875872679055 ** -2.1201022182352607 ** -2.8698934771877247 ** -1.9751667275070213 ** -2.2805027468089247 ** -1.71532833740348 ** + Policy it 9..Sim ep: 362 +Log Std policy inner: -3.5475526 + Policy it 10..Sim ep: 370 +Log Std policy inner: -3.5491433 + Policy it 11..Sim ep: 358 +Log Std policy inner: -3.552775 +Iterations: 829 +Simulated test: ** -2.1202561987795345 ** -1.7346103011444212 ** -1.6919955528667197 ** -1.7205084775923751 ** -2.006962534241029 ** -1.6095804066024721 ** -2.3236970893252873 ** -1.9018921183526982 ** -2.06579679518356 ** + Policy it 12..Sim ep: 371 +Log Std policy inner: -3.5535598 + Policy it 13..Sim ep: 384 +Log Std policy inner: -3.5593607 + Policy it 14..Sim ep: 388 +Log Std policy inner: -3.559777 +Iterations: 832 +Simulated test: ** -2.295891202057246 ** -2.119236351554282 ** -2.01840844811406 ** -1.8567588252434506 ** -1.7573643329087645 ** -1.6887695295456797 ** -2.028712381371297 ** -1.873365133581683 ** -1.9702024699375034 ** + Policy it 15..Sim ep: 375 +Log Std policy inner: -3.5664272 + Policy it 16..Sim ep: 394 +Log Std policy inner: -3.5681167 + Policy it 17..Sim ep: 397 +Log Std policy inner: -3.5638642 +Iterations: 835 +Simulated test: ** -2.1905673086928434 ** -1.8729036197601818 ** -2.182492839228362 ** -1.5110037271957844 ** -1.963815587698482 ** -1.773835860849358 ** -2.1141658550314606 ** -1.9898709842399693 ** -2.096238740138942 ** + Policy it 18..Sim ep: 379 +Log Std policy inner: -3.5703897 + Policy it 19..Sim ep: 373 +Log Std policy inner: -3.5715497 + Policy it 20..Sim ep: 411 +Log Std policy inner: -3.577131 +Iterations: 838 +Simulated test: ** -2.18186167165637 ** -2.1108416521688924 ** -2.0570295054186136 ** -1.900428411718458 ** -1.8263892699137796 ** -1.8208383640786634 ** -1.9073852620553224 ** -1.696751033472974 ** -1.9734919008240104 ** + Policy it 21..Sim ep: 394 +Log Std policy inner: -3.5823193 + Policy it 22..Sim ep: 374 +Log Std policy inner: -3.585868 + Policy it 23..Sim ep: 373 +Log Std policy inner: -3.586699 +Iterations: 841 +Simulated test: ** -2.4087189263966864 ** -1.9975680280948291 ** -2.185842767483555 ** -1.719061746528605 ** -1.6950870571612904 ** -1.8187504512025043 ** -2.168879432238173 ** -1.7244370877183974 ** -2.099347461640136 ** + Policy it 24..Sim ep: 368 +Log Std policy inner: -3.5909672 + Policy it 25..Sim ep: 387 +Log Std policy inner: -3.594377 + Policy it 26..Sim ep: 399 +Log Std policy inner: -3.594333 +Iterations: 844 +Simulated test: ** -2.1922223778208716 ** -1.633596255388111 ** -1.9546177581883968 ** -1.725481158909388 ** -1.9580853372847196 ** -1.8247201716713608 ** -1.6933322558924555 ** -1.9526953913690521 ** -1.851918885638006 ** + Policy it 27..Sim ep: 381 +Log Std policy inner: -3.5968924 + Policy it 28..Sim ep: 388 +Log Std policy inner: -3.6012483 + Policy it 29..Sim ep: 369 +Log Std policy inner: -3.6056957 +Iterations: 847 +Simulated test: ** -2.4414683614717796 ** -1.6384382575566996 ** -2.277035132884048 ** -1.9702001113363077 ** -2.0545113545504865 ** -1.6814942992071156 ** -2.1488313026283867 ** -1.858701617133338 ** -1.9577262998884544 ** +break +============================ 24 ============================ +step 1 state [0.7995 0.9445 0.419 0.8705] a [-0.08333333 0.08333333 0.08333333 -0.08141202] r -0.4752472091226603 +step 2 state [0.71616667 1. 0.50233333 0.819 ] a [-0.08333333 0.08223193 0.08333333 -0.05149064] r -0.20747987974302207 +step 3 state [0.63283333 1. 0.56566667 0.75933333] a [-0.08333333 0.07270976 0.06342538 -0.05964991] r -0.028524152823180993 +step 1 state [0.01566667 0.13766667 0.736 0.16133333] a [-0.07464031 0.08333333 -0.08333333 0.07902459] r -0.9938359306564318 +step 2 state [0. 0.221 0.65266667 0.24466667] a [-0.08333333 0.08333333 -0.08333333 0.08333333] r -0.955299375850421 +step 3 state [0. 0.2995 0.56933333 0.3175 ] a [-0.08249166 0.07852924 -0.08333333 0.07287315] r -0.6044112488804836 +step 4 state [0. 0.37516667 0.49216667 0.40083333] a [-0.07581441 0.0757571 -0.07715036 0.08333333] r -0.21607516565089957 +step 5 state [0. 0.4585 0.48066667 0.4765 ] a [-0.08333333 0.08333333 -0.0113537 0.07582202] r -0.02963076822326116 +step 1 state [0.495 1. 0.68433333 0.56866667] a [-0.0790673 0.07842021 -0.07243956 0.08333333] r 0.006799646058516684 +step 1 state [0.30616667 0.34383333 0.4565 0.4475 ] a [-0.08333333 0.08333333 0.03403067 0.08333333] r -0.01421367010404162 +step 1 state [0.87883333 0.72366667 0.099 0.46233333] a [-0.08333333 0.0711944 0.07588102 0.08333333] r -0.9435126417798866 +step 2 state [0.7955 0.80416667 0.18233333 0.54083333] a [-0.08333333 0.08052293 0.08333333 0.07850695] r -0.7606605500277932 +step 3 state [0.71233333 0.8875 0.26566667 0.62066667] a [-0.08316571 0.08333333 0.08333333 0.07985233] r -0.523614742450959 +step 4 state [0.629 0.97083333 0.349 0.69966667] a [-0.08333333 0.08333333 0.08333333 0.07902481] r -0.3267221910199135 +step 5 state [0.5545 1. 0.42683333 0.76716667] a [-0.07446019 0.08333333 0.07789443 0.06751206] r -0.20408531657970908 +Saved current buffer default +Ep:24 Rew:0.74 -- Step:405 +Train set: 324 Valid set: 81 +Log Std policy: [-3.9810405 -4.2045193 -3.3411458 -2.8960767] -3.6056957 +maximum: 1.1866803932571766 +mb_lr: 0.001 +Model:0, iter:49 -- Old Val loss:0.004518 New Val loss:0.009641 -- New Train loss:0.001221 +Model:1, iter:41 -- Old Val loss:0.003119 New Val loss:0.011617 -- New Train loss:0.001674 +Model:2, iter:57 -- Old Val loss:0.003030 New Val loss:0.008963 -- New Train loss:0.000752 +Model:3, iter:32 -- Old Val loss:0.003049 New Val loss:0.011067 -- New Train loss:0.001994 +Model:4, iter:53 -- Old Val loss:0.002526 New Val loss:0.011179 -- New Train loss:0.000917 +Model:5, iter:43 -- Old Val loss:0.004090 New Val loss:0.009816 -- New Train loss:0.001568 +Model:6, iter:54 -- Old Val loss:0.003164 New Val loss:0.009086 -- New Train loss:0.001118 +Model:7, iter:74 -- Old Val loss:0.002936 New Val loss:0.009492 -- New Train loss:0.000486 +Model:8, iter:62 -- Old Val loss:0.001943 New Val loss:0.009914 -- New Train loss:0.000727 + Policy it 0..Sim ep: 376 +Log Std policy inner: -3.6051204 + Policy it 1..Sim ep: 378 +Log Std policy inner: -3.6004872 + Policy it 2..Sim ep: 380 +Log Std policy inner: -3.6053195 +Iterations: 850 +Simulated test: ** -1.73676407371182 ** -1.6973709643480834 ** -1.6594003933412023 ** -1.8604719252965878 ** -2.1598840994632336 ** -1.9472996974131092 ** -1.9817284165136515 ** -2.161504234734457 ** -1.9572259301599115 ** + Policy it 3..Sim ep: 378 +Log Std policy inner: -3.6125393 + Policy it 4..Sim ep: 393 +Log Std policy inner: -3.6126153 + Policy it 5..Sim ep: 364 +Log Std policy inner: -3.6166842 +Iterations: 853 +Simulated test: ** -1.7511046056263149 ** -1.912434743369231 ** -1.6622979986807331 ** -1.2715222836541944 ** -1.9852912102965639 ** -2.123178929418209 ** -2.0766256010090003 ** -1.8468064118089387 ** -1.8350307507580146 ** + Policy it 6..Sim ep: 388 +Log Std policy inner: -3.6136167 + Policy it 7..Sim ep: 376 +Log Std policy inner: -3.6117077 + Policy it 8..Sim ep: 383 +Log Std policy inner: -3.6023436 +Iterations: 856 +Simulated test: ** -1.7190710166294594 ** -1.8646129808913974 ** -1.7381784827262163 ** -1.6770044098235666 ** -1.5738868285552599 ** -1.8932024154812097 ** -1.72084919589106 ** -2.009683965635486 ** -1.6120280062686652 ** + Policy it 9..Sim ep: 405 +Log Std policy inner: -3.6059568 + Policy it 10..Sim ep: 385 +Log Std policy inner: -3.6156402 + Policy it 11..Sim ep: 393 +Log Std policy inner: -3.6165411 +Iterations: 859 +Simulated test: ** -2.0505041265365436 ** -1.7624730877904222 ** -1.6330024194414727 ** -2.2581699266203215 ** -1.5102115553617477 ** -1.9575173439551146 ** -2.114836989362957 ** -1.9968297623937543 ** -1.5903680397151039 ** + Policy it 12..Sim ep: 372 +Log Std policy inner: -3.6169186 + Policy it 13..Sim ep: 404 +Log Std policy inner: -3.6168315 + Policy it 14..Sim ep: 396 +Log Std policy inner: -3.6146235 +Iterations: 862 +Simulated test: ** -2.465060344808735 ** -2.1259087086838555 ** -1.904657136349706 ** -2.0398522015311755 ** -1.5874327218550024 ** -1.843727105554426 ** -2.059273741736251 ** -1.6789311730861665 ** -1.6133068160782569 ** + Policy it 15..Sim ep: 380 +Log Std policy inner: -3.6169722 + Policy it 16..Sim ep: 384 +Log Std policy inner: -3.621307 + Policy it 17..Sim ep: 418 +Log Std policy inner: -3.6200466 +Iterations: 865 +Simulated test: ** -2.6828238370502366 ** -1.8278429168276489 ** -1.9537202491331846 ** -1.9505399031471462 ** -2.0223567933030426 ** -1.9228367516025902 ** -2.007599127367139 ** -1.7571092873765155 ** -1.9334119182662106 ** + Policy it 18..Sim ep: 379 +Log Std policy inner: -3.6198373 + Policy it 19..Sim ep: 379 +Log Std policy inner: -3.6185675 + Policy it 20..Sim ep: 388 +Log Std policy inner: -3.6189985 +Iterations: 868 +Simulated test: ** -2.757377444736194 ** -1.8483911330380942 ** -2.2928294159518554 ** -2.138404347333126 ** -1.862158287903294 ** -1.821789741134271 ** -1.7275417482084594 ** -1.8592746331682428 ** -2.0165160409826783 ** + Policy it 21..Sim ep: 373 +Log Std policy inner: -3.6211138 + Policy it 22..Sim ep: 361 +Log Std policy inner: -3.623718 + Policy it 23..Sim ep: 374 +Log Std policy inner: -3.6235983 +Iterations: 871 +Simulated test: ** -2.1673050223803147 ** -1.937029195050709 ** -2.090074542632792 ** -2.0021079707751053 ** -2.0470684048719705 ** -2.1296544279484078 ** -2.0394509368855505 ** -2.0349667529040016 ** -1.7894210971333087 ** + Policy it 24..Sim ep: 396 +Log Std policy inner: -3.6222997 + Policy it 25..Sim ep: 368 +Log Std policy inner: -3.6260836 + Policy it 26..Sim ep: 378 +Log Std policy inner: -3.6257267 +Iterations: 874 +Simulated test: ** -2.520406721031759 ** -2.2349275585403667 ** -2.0235335385613142 ** -1.7819866472111607 ** -2.160605412092409 ** -1.7623635022807866 ** -2.3992364500672556 ** -2.437139759566635 ** -1.7322734184586444 ** + Policy it 27..Sim ep: 377 +Log Std policy inner: -3.6258864 + Policy it 28..Sim ep: 370 +Log Std policy inner: -3.626684 + Policy it 29..Sim ep: 380 +Log Std policy inner: -3.6269464 +Iterations: 877 +Simulated test: ** -1.673729557292536 ** -1.9475475894892589 ** -1.9574889183044433 ** -1.9146737198508346 ** -1.7364210385130718 ** -2.046972129156347 ** -1.6382369339256548 ** -1.944750321614556 ** -1.8552544141025282 ** + Policy it 30..Sim ep: 368 +Log Std policy inner: -3.6297443 + Policy it 31..Sim ep: 388 +Log Std policy inner: -3.6274838 + Policy it 32..Sim ep: 386 +Log Std policy inner: -3.6270807 +Iterations: 880 +Simulated test: ** -2.0340186327067205 ** -1.9648554828087799 ** -1.71462358916644 ** -1.962434924133122 ** -2.1813363238750028 ** -2.022155905768741 ** -2.121253116361331 ** -2.075785540996585 ** -2.164873461157549 ** +break +============================ 25 ============================ +step 1 state [0.42366667 0.89766667 0.27183333 0.70233333] a [-0.08333333 0.08205603 0.07605954 0.08333333] r -0.477988053229467 +step 2 state [0.34033333 0.96716667 0.35516667 0.778 ] a [-0.08333333 0.06952847 0.08333333 0.07579209] r -0.3051215643559241 +step 3 state [0.26283333 1. 0.404 0.83266667] a [-0.0774033 0.08333333 0.04890896 0.05472151] r -0.2578211920801131 +step 4 state [0.19616667 1. 0.48283333 0.76 ] a [-0.06664539 0.05213043 0.07892544 -0.07261532] r -0.04541331737365373 +step 1 state [0.35516667 0.80083333 0.486 0.66016667] a [-0.08251333 0.03072109 -0.07001088 0.06504428] r 0.07423492022226696 +step 1 state [0.8425 0.4695 0.10016667 0.42416667] a [ 0.01957242 0.08333333 0.08333333 -0.08333333] r -0.8819140440496573 +step 2 state [0.8085 0.4725 0.114 0.5075] a [-0.03385638 0.00305147 0.01385112 0.08333333] r -0.7762033165688617 +step 3 state [0.7465 0.55583333 0.19733333 0.59083333] a [-0.0619392 0.08333333 0.08333333 0.08333333] r -0.6218238747214957 +step 4 state [0.66316667 0.63916667 0.24166667 0.67416667] a [-0.08333333 0.08333333 0.04434947 0.08333333] r -0.4732542041901725 +step 5 state [0.65166667 0.72 0.2185 0.732 ] a [-0.01133451 0.08089658 -0.02307561 0.05783819] r -0.7654566596907069 +step 6 state [0.56833333 0.77416667 0.226 0.81533333] a [-0.08333333 0.05421851 0.00752255 0.08333333] r -0.8951537508676921 +step 7 state [0.55833333 0.79683333 0.30933333 0.76283333] a [-0.00983679 0.02281305 0.08333333 -0.05242934] r -0.3712683729689652 +step 8 state [0.5035 0.88016667 0.39266667 0.79583333] a [-0.05472605 0.08333333 0.08333333 0.0331018 ] r -0.26338624139304323 +step 9 state [0.50266667 0.93133333 0.476 0.79983333] a [-0.00081622 0.05121593 0.08333333 0.00408882] r -0.19574867156849607 +step 10 state [0.47266667 0.948 0.55933333 0.835 ] a [-0.02991862 0.01679739 0.08333333 0.03533141] r -0.3059899249035972 +Saved current buffer default +Ep:25 Rew:0.63 -- Step:420 +Train set: 336 Valid set: 84 +Log Std policy: [-4.011304 -4.2431583 -3.3997793 -2.8540814] -3.6270807 +maximum: 1.1866803932571766 +mb_lr: 0.001 +Model:0, iter:33 -- Old Val loss:0.005114 New Val loss:0.008965 -- New Train loss:0.002277 +Model:1, iter:89 -- Old Val loss:0.005551 New Val loss:0.004549 -- New Train loss:0.000463 +Model:2, iter:92 -- Old Val loss:0.003637 New Val loss:0.004628 -- New Train loss:0.000497 +Model:3, iter:50 -- Old Val loss:0.004708 New Val loss:0.005640 -- New Train loss:0.001300 +Model:4, iter:112 -- Old Val loss:0.005387 New Val loss:0.004836 -- New Train loss:0.000200 +Model:5, iter:63 -- Old Val loss:0.005698 New Val loss:0.005154 -- New Train loss:0.000826 +Model:6, iter:50 -- Old Val loss:0.004025 New Val loss:0.009753 -- New Train loss:0.001468 +Model:7, iter:41 -- Old Val loss:0.003487 New Val loss:0.010242 -- New Train loss:0.001426 +Model:8, iter:102 -- Old Val loss:0.004054 New Val loss:0.007565 -- New Train loss:0.000386 + Policy it 0..Sim ep: 356 +Log Std policy inner: -3.6282208 + Policy it 1..Sim ep: 368 +Log Std policy inner: -3.6327698 + Policy it 2..Sim ep: 378 +Log Std policy inner: -3.6348934 +Iterations: 883 +Simulated test: ** -2.6216174676595254 ** -2.2720676310546697 ** -2.5507096952036954 ** -2.0024032570281998 ** -2.684047848271439 ** -2.000334957151208 ** -1.9444095425191334 ** -2.65079426504788 ** -1.7493276094202883 ** + Policy it 3..Sim ep: 370 +Log Std policy inner: -3.6370375 + Policy it 4..Sim ep: 371 +Log Std policy inner: -3.6377146 + Policy it 5..Sim ep: 375 +Log Std policy inner: -3.6391227 +Iterations: 886 +Simulated test: ** -2.5323580980626867 ** -2.3274556784029117 ** -2.2870422544464235 ** -2.4161419619282243 ** -2.126003421089263 ** -1.8926771032367833 ** -2.1072406332474203 ** -2.0282367860921657 ** -2.204728960134089 ** + Policy it 6..Sim ep: 369 +Log Std policy inner: -3.6399896 + Policy it 7..Sim ep: 377 +Log Std policy inner: -3.6406376 + Policy it 8..Sim ep: 365 +Log Std policy inner: -3.637768 +Iterations: 889 +Simulated test: ** -2.8083635846921244 ** -2.066797376937466 ** -2.5791956474340987 ** -1.8977695326914545 ** -2.0621988303458783 ** -2.1800667385477572 ** -1.8969144094869261 ** -2.249157623080537 ** -1.855825293494854 ** + Policy it 9..Sim ep: 369 +Log Std policy inner: -3.6379883 + Policy it 10..Sim ep: 359 +Log Std policy inner: -3.6401916 + Policy it 11..Sim ep: 365 +Log Std policy inner: -3.6372147 +Iterations: 892 +Simulated test: ** -3.4074893446710486 ** -2.3185315593285485 ** -1.9940210301622574 ** -2.259684220622294 ** -1.9206863453693221 ** -1.966519365357235 ** -1.847774904211983 ** -2.0035669257957487 ** -2.2932401397498325 ** + Policy it 12..Sim ep: 376 +Log Std policy inner: -3.6373866 + Policy it 13..Sim ep: 369 +Log Std policy inner: -3.635632 + Policy it 14..Sim ep: 384 +Log Std policy inner: -3.6390007 +Iterations: 895 +Simulated test: ** -3.001503103557043 ** -1.938612445009494 ** -2.049472407756839 ** -2.2473689596215265 ** -1.9456448989640922 ** -1.8888122944626957 ** -1.8187095682113432 ** -1.8824874012617512 ** -2.158317864080891 ** + Policy it 15..Sim ep: 382 +Log Std policy inner: -3.640999 + Policy it 16..Sim ep: 357 +Log Std policy inner: -3.6433926 + Policy it 17..Sim ep: 368 +Log Std policy inner: -3.6530743 +Iterations: 898 +Simulated test: ** -3.022313779280521 ** -2.5309619059722173 ** -2.182887373845733 ** -1.7300351615532419 ** -1.8065204355958848 ** -2.360378218498081 ** -1.7654711108386982 ** -2.054494947772473 ** -2.016068473807536 ** + Policy it 18..Sim ep: 379 +Log Std policy inner: -3.650301 + Policy it 19..Sim ep: 363 +Log Std policy inner: -3.652017 + Policy it 20..Sim ep: 386 +Log Std policy inner: -3.6499062 +Iterations: 901 +Simulated test: ** -2.463231001459062 ** -1.6973127873102203 ** -2.3809747895092004 ** -2.075924329934642 ** -1.6500587336486205 ** -2.143628571140871 ** -2.114036205098964 ** -1.7920770366699434 ** -1.85760730211623 ** + Policy it 21..Sim ep: 392 +Log Std policy inner: -3.6508932 + Policy it 22..Sim ep: 381 +Log Std policy inner: -3.6500216 + Policy it 23..Sim ep: 379 +Log Std policy inner: -3.6483698 +Iterations: 904 +Simulated test: ** -2.7512581549456807 ** -2.1796784151074826 ** -2.0963836886896754 ** -1.7801483360980637 ** -1.8812876934441738 ** -1.983922576798359 ** -1.8272611530451104 ** -1.6700898667797446 ** -1.9211091115151067 ** + Policy it 24..Sim ep: 370 +Log Std policy inner: -3.6535525 + Policy it 25..Sim ep: 375 +Log Std policy inner: -3.654209 + Policy it 26..Sim ep: 377 +Log Std policy inner: -3.6521926 +Iterations: 907 +Simulated test: ** -3.025152186942869 ** -2.2484873986127787 ** -2.2829358793387655 ** -1.9631744648705354 ** -1.9701919250329956 ** -1.667400250416249 ** -1.9188692386262118 ** -1.927883552549174 ** -1.8113993311091325 ** +break +============================ 26 ============================ +step 1 state [0.06583333 0.53283333 0.545 0.22516667] a [-0.08333333 0.08333333 -0.08333333 0.08333333] r -0.5481859596198758 +step 2 state [0. 0.61183333 0.48016667 0.30566667] a [-0.08333333 0.07904896 -0.06467421 0.08061914] r -0.35544539602908765 +step 3 state [0. 0.69516667 0.48766667 0.389 ] a [-0.07084798 0.08333333 0.00760469 0.08333333] r -0.20440089700614572 +step 4 state [0. 0.7755 0.50666667 0.47233333] a [-0.05467883 0.08043646 0.01910644 0.08333333] r -0.05356596634936939 +step 5 state [0. 0.85166667 0.495 0.55566667] a [-0.04407871 0.07621138 -0.01150803 0.08333333] r -0.03370316563582254 +step 1 state [0.27383333 0.8305 0.18583333 0.1515 ] a [-0.08333333 0.08333333 0.08308511 0.08333333] r -0.9969534213542462 +step 2 state [0.1905 0.89983333 0.26916667 0.23483333] a [-0.08333333 0.06946172 0.08333333 0.08333333] r -0.9760123438898243 +step 3 state [0.19266667 0.98316667 0.3245 0.29583333] a [0.00225558 0.08333333 0.05533515 0.0610821 ] r -0.8785570483366424 +step 4 state [0.16866667 1. 0.40783333 0.32716667] a [-0.02392877 0.08333333 0.08333333 0.0314464 ] r -0.5190145870746246 +step 5 state [0.1625 1. 0.49116667 0.325 ] a [-0.00604462 0.08333333 0.08333333 -0.00211722] r -0.34160445026185793 +step 6 state [0.1425 1. 0.5745 0.40833333] a [-0.01994331 0.08232419 0.08333333 0.08333333] r -0.12816874035677028 +step 7 state [0.06983333 1. 0.65116667 0.49166667] a [-0.07250776 0.05160906 0.07669485 0.08333333] r -0.02484262783735558 +step 1 state [0.184 0.125 0.43783333 0.50816667] a [-0.08333333 0.08333333 -0.08333333 0.08333333] r 0.0024884691310669282 +step 1 state [0.37783333 0.67366667 0.69533333 0.22266667] a [-0.06963227 0.04727885 -0.08333333 0.08333333] r -0.6971532225314346 +step 2 state [0.31533333 0.7245 0.61516667 0.306 ] a [-0.06240156 0.0508706 -0.08004039 0.08333333] r -0.3245680286565925 +Saved current buffer default +Ep:26 Rew:0.96 -- Step:435 +Train set: 348 Valid set: 87 +Log Std policy: [-4.0281825 -4.287364 -3.4102275 -2.882996 ] -3.6521926 +maximum: 1.1866803932571766 +mb_lr: 0.001 +Model:0, iter:49 -- Old Val loss:0.006423 New Val loss:0.012956 -- New Train loss:0.001214 +Model:1, iter:65 -- Old Val loss:0.002114 New Val loss:0.011536 -- New Train loss:0.000583 +Model:2, iter:44 -- Old Val loss:0.002994 New Val loss:0.007918 -- New Train loss:0.001450 +Model:3, iter:37 -- Old Val loss:0.005451 New Val loss:0.009092 -- New Train loss:0.001556 +Model:4, iter:58 -- Old Val loss:0.002212 New Val loss:0.008069 -- New Train loss:0.000783 +Model:5, iter:73 -- Old Val loss:0.003097 New Val loss:0.009225 -- New Train loss:0.000629 +Model:6, iter:51 -- Old Val loss:0.004977 New Val loss:0.005515 -- New Train loss:0.000986 +Model:7, iter:48 -- Old Val loss:0.005711 New Val loss:0.008036 -- New Train loss:0.001126 +Model:8, iter:51 -- Old Val loss:0.003401 New Val loss:0.014419 -- New Train loss:0.000942 + Policy it 0..Sim ep: 385 +Log Std policy inner: -3.6583588 + Policy it 1..Sim ep: 378 +Log Std policy inner: -3.6634874 + Policy it 2..Sim ep: 393 +Log Std policy inner: -3.6601808 +Iterations: 910 +Simulated test: ** -1.774499691114761 ** -1.6834014692182244 ** -2.066882220340194 ** -1.7686797890230082 ** -1.8477986425952986 ** -1.7589812687016093 ** -1.9218897005729376 ** -1.8608485555555672 ** -1.7355650672735647 ** + Policy it 3..Sim ep: 399 +Log Std policy inner: -3.660236 + Policy it 4..Sim ep: 396 +Log Std policy inner: -3.660814 + Policy it 5..Sim ep: 397 +Log Std policy inner: -3.6607966 +Iterations: 913 +Simulated test: ** -1.9174835823266767 ** -1.514293027158128 ** -1.9219845181773416 ** -2.0962756713014095 ** -1.8129439942154568 ** -1.9775448511401192 ** -1.6333154171612114 ** -2.0972858999716117 ** -1.5889713713148377 ** + Policy it 6..Sim ep: 390 +Log Std policy inner: -3.6561966 + Policy it 7..Sim ep: 388 +Log Std policy inner: -3.6587467 + Policy it 8..Sim ep: 390 +Log Std policy inner: -3.6618822 +Iterations: 916 +Simulated test: ** -2.0509413405309895 ** -1.6255645376164467 ** -1.8229183936689515 ** -1.8018242039112373 ** -1.8545674751605838 ** -1.6347359629100537 ** -1.6011271252762527 ** -1.4089987331512384 ** -1.9149118988309055 ** + Policy it 9..Sim ep: 373 +Log Std policy inner: -3.660555 + Policy it 10..Sim ep: 387 +Log Std policy inner: -3.664074 + Policy it 11..Sim ep: 397 +Log Std policy inner: -3.6634667 +Iterations: 919 +Simulated test: ** -2.107854311265983 ** -1.8038555185636505 ** -1.6861450288817286 ** -1.7400290584779576 ** -1.9002931060234551 ** -1.774418179692002 ** -1.9929533868684666 ** -1.812616406364832 ** -2.0222024671453984 ** +break +============================ 27 ============================ +step 1 state [0.32316667 0.82083333 0.0995 0.06366667] a [-0.05249134 0.08333333 0.06094556 0.00148845] r -1.0001208393086825 +step 2 state [0.23983333 0.83583333 0.1715 0.12716667] a [-0.08333333 0.01512097 0.072124 0.06359083] r -0.9993158962862493 +step 3 state [0.1745 0.91916667 0.09416667 0.2105 ] a [-0.06522954 0.08333333 -0.0772766 0.08333333] r -0.9999539285395972 +step 4 state [0.09116667 0.90133333 0.154 0.29383333] a [-0.08333333 -0.01781696 0.05999048 0.08333333] r -0.9986670918528379 +step 5 state [0.00783333 0.98466667 0.23733333 0.37716667] a [-0.08333333 0.08333333 0.08333333 0.08333333] r -0.9899997500173647 +step 6 state [0. 1. 0.2295 0.39816667] a [-0.08333333 0.05453416 -0.00778208 0.02114561] r -0.9802826352953474 +step 7 state [0. 1. 0.24933333 0.4815 ] a [-0.08333333 0.00746612 0.01985451 0.08333333] r -0.9166800421559311 +step 8 state [0. 1. 0.33266667 0.48266667] a [-0.05937026 0.03534173 0.08333333 0.00127228] r -0.608288483800068 +step 9 state [0. 0.93833333 0.416 0.4605 ] a [-0.04725446 -0.06155404 0.08333333 -0.0220111 ] r -0.2878396776553117 +step 10 state [0.0515 1. 0.49933333 0.509 ] a [0.05164123 0.08333333 0.08333333 0.04866383] r -0.10682772206555713 +step 1 state [0.07016667 0.8735 0.57633333 0.38133333] a [-0.08333333 0.08333333 0.08333333 0.07341241] r -0.17846649052641284 +step 2 state [0. 0.95683333 0.62833333 0.45383333] a [-0.08333333 0.08333333 0.052153 0.0725675 ] r -0.06187287249711226 +step 3 state [0. 1. 0.586 0.53716667] a [-0.0794578 0.07690335 -0.04222742 0.08333333] r 0.021071705239154914 +step 1 state [0.57566667 0.76566667 0.10616667 0.27166667] a [-0.04395705 0.07770921 0.08333333 0.08333333] r -0.9977247111748473 +step 2 state [0.50483333 0.8205 0.1895 0.33783333] a [-0.0707332 0.05486006 0.08333333 0.06622623] r -0.9748676317606434 +Saved current buffer default +Ep:27 Rew:-1.61 -- Step:450 +Train set: 360 Valid set: 90 +Log Std policy: [-4.046185 -4.2908835 -3.4226353 -2.894163 ] -3.6634667 +maximum: 1.1866803932571766 +mb_lr: 0.001 +Model:0, iter:32 -- Old Val loss:0.005364 New Val loss:0.007907 -- New Train loss:0.001869 +Model:1, iter:75 -- Old Val loss:0.002438 New Val loss:0.009078 -- New Train loss:0.000507 +Model:2, iter:48 -- Old Val loss:0.004015 New Val loss:0.006354 -- New Train loss:0.001029 +Model:3, iter:46 -- Old Val loss:0.004668 New Val loss:0.007923 -- New Train loss:0.001267 +Model:4, iter:112 -- Old Val loss:0.003712 New Val loss:0.008654 -- New Train loss:0.000244 +Model:5, iter:54 -- Old Val loss:0.002634 New Val loss:0.010385 -- New Train loss:0.000981 +Model:6, iter:43 -- Old Val loss:0.003164 New Val loss:0.007439 -- New Train loss:0.001326 +Model:7, iter:55 -- Old Val loss:0.003595 New Val loss:0.010407 -- New Train loss:0.000850 +Model:8, iter:44 -- Old Val loss:0.004418 New Val loss:0.009515 -- New Train loss:0.001243 + Policy it 0..Sim ep: 383 +Log Std policy inner: -3.6655028 + Policy it 1..Sim ep: 393 +Log Std policy inner: -3.6647477 + Policy it 2..Sim ep: 386 +Log Std policy inner: -3.6651185 +Iterations: 922 +Simulated test: ** -1.5508255729684606 ** -1.5468122677667997 ** -2.465939759039902 ** -1.7857299688248895 ** -1.8008576728054322 ** -1.9239437461225315 ** -1.8135660455585458 ** -1.9682528454624117 ** -1.8304037135222462 ** + Policy it 3..Sim ep: 409 +Log Std policy inner: -3.6681147 + Policy it 4..Sim ep: 406 +Log Std policy inner: -3.6659029 + Policy it 5..Sim ep: 420 +Log Std policy inner: -3.665904 +Iterations: 925 +Simulated test: ** -1.4921718231332488 ** -1.6959829244564752 ** -1.7909503634320572 ** -1.819156021857343 ** -1.776229562230874 ** -1.9606309520400829 ** -1.6958649023179897 ** -1.5787063266069163 ** -1.9402560371067374 ** + Policy it 6..Sim ep: 406 +Log Std policy inner: -3.665955 + Policy it 7..Sim ep: 403 +Log Std policy inner: -3.6682587 + Policy it 8..Sim ep: 399 +Log Std policy inner: -3.6693952 +Iterations: 928 +Simulated test: ** -1.6228906329208985 ** -1.8063832323998212 ** -1.9454554525087588 ** -1.7343211175024045 ** -1.3974480388872326 ** -1.5866546367865522 ** -1.8161315012129489 ** -2.054454194280552 ** -1.6167554441391259 ** + Policy it 9..Sim ep: 407 +Log Std policy inner: -3.6622305 + Policy it 10..Sim ep: 400 +Log Std policy inner: -3.66222 + Policy it 11..Sim ep: 402 +Log Std policy inner: -3.660259 +Iterations: 931 +Simulated test: ** -1.8657274668524042 ** -2.2773700824915433 ** -1.5250140714040026 ** -1.7361899405915755 ** -2.16079599670833 ** -1.8900290812691674 ** -1.7017726097302512 ** -1.6397335488488898 ** -2.0109371428360463 ** + Policy it 12..Sim ep: 415 +Log Std policy inner: -3.6629014 + Policy it 13..Sim ep: 404 +Log Std policy inner: -3.6651547 + Policy it 14..Sim ep: 400 +Log Std policy inner: -3.6644752 +Iterations: 934 +Simulated test: ** -2.3502194832009264 ** -2.655026133948704 ** -1.716331874281168 ** -1.8945331000210717 ** -1.8855408254091162 ** -2.1632029833103297 ** -1.634553713412024 ** -1.7954310884838924 ** -1.8176582570932807 ** + Policy it 15..Sim ep: 389 +Log Std policy inner: -3.6646886 + Policy it 16..Sim ep: 413 +Log Std policy inner: -3.6640742 + Policy it 17..Sim ep: 411 +Log Std policy inner: -3.6665986 +Iterations: 937 +Simulated test: ** -1.9791513838409447 ** -1.9196260284120217 ** -1.9900227825902403 ** -1.8629437162459361 ** -1.8561144199315458 ** -1.797934806975536 ** -1.9989638175800792 ** -1.916650764469523 ** -2.1335523779760113 ** + Policy it 18..Sim ep: 406 +Log Std policy inner: -3.669407 + Policy it 19..Sim ep: 405 +Log Std policy inner: -3.6736104 + Policy it 20..Sim ep: 401 +Log Std policy inner: -3.6721892 +Iterations: 940 +Simulated test: ** -1.8990496575576252 ** -1.7660978822305333 ** -1.5980800891621039 ** -1.7748630277079065 ** -1.9127354065119289 ** -1.9668344577343668 ** -1.423478685421869 ** -1.586537529604975 ** -1.537272749451222 ** + Policy it 21..Sim ep: 398 +Log Std policy inner: -3.6746716 + Policy it 22..Sim ep: 426 +Log Std policy inner: -3.6764178 + Policy it 23..Sim ep: 393 +Log Std policy inner: -3.6765184 +Iterations: 943 +Simulated test: ** -1.910944609809667 ** -1.8129918668884784 ** -1.916696209819056 ** -1.512804224230349 ** -1.7128150102007202 ** -2.053087446762947 ** -2.0075322011949903 ** -1.5335318882763387 ** -1.720833405717276 ** + Policy it 24..Sim ep: 402 +Log Std policy inner: -3.6763954 + Policy it 25..Sim ep: 395 +Log Std policy inner: -3.6768112 + Policy it 26..Sim ep: 387 +Log Std policy inner: -3.6780205 +Iterations: 946 +Simulated test: ** -1.8813065952656325 ** -1.9146436808723957 ** -1.6654469355009496 ** -2.106413031592965 ** -1.7122640944621526 ** -2.200642728379316 ** -1.9405465937498958 ** -1.6163003303925507 ** -2.277071475321427 ** + Policy it 27..Sim ep: 416 +Log Std policy inner: -3.6783319 + Policy it 28..Sim ep: 424 +Log Std policy inner: -3.6782634 + Policy it 29..Sim ep: 378 +Log Std policy inner: -3.6778486 +Iterations: 949 +Simulated test: ** -1.5495983423385769 ** -2.0866971743525937 ** -1.82402654546313 ** -2.218705264184391 ** -1.9536571776599159 ** -1.8521377824014054 ** -1.6218123278324492 ** -1.7483654921629932 ** -1.8225586780149023 ** + Policy it 30..Sim ep: 410 +Log Std policy inner: -3.680456 + Policy it 31..Sim ep: 410 +Log Std policy inner: -3.6791487 + Policy it 32..Sim ep: 384 +Log Std policy inner: -3.678959 +Iterations: 952 +Simulated test: ** -1.844051764300093 ** -1.6037140109483152 ** -1.9244524211110547 ** -1.5071727865934372 ** -1.8029171655187384 ** -2.0575706359092147 ** -1.6105878433398901 ** -1.7278397270105779 ** -1.899806204047054 ** + Policy it 33..Sim ep: 402 +Log Std policy inner: -3.679697 + Policy it 34..Sim ep: 423 +Log Std policy inner: -3.6832366 + Policy it 35..Sim ep: 390 +Log Std policy inner: -3.678832 +Iterations: 955 +Simulated test: ** -1.925438730965834 ** -1.7906979119800963 ** -1.882637138494174 ** -1.9673412131992518 ** -1.9353694846772123 ** -1.8267081440100446 ** -1.8095222982985433 ** -1.6663994473253843 ** -2.0009544567385458 ** + Policy it 36..Sim ep: 425 +Log Std policy inner: -3.6779273 + Policy it 37..Sim ep: 403 +Log Std policy inner: -3.6777196 + Policy it 38..Sim ep: 397 +Log Std policy inner: -3.6780999 +Iterations: 958 +Simulated test: ** -1.9672330280113965 ** -2.3219323246134445 ** -1.8482488815672695 ** -1.6654204018600285 ** -1.7998902111081407 ** -1.7502471003739628 ** -1.7336761108227075 ** -1.4953383958665654 ** -2.026938010237063 ** + Policy it 39..Sim ep: 406 +Log Std policy inner: -3.6786418 + Policy it 40..Sim ep: 406 +Log Std policy inner: -3.6776853 + Policy it 41..Sim ep: 399 +Log Std policy inner: -3.6798987 +Iterations: 961 +Simulated test: ** -1.38134461993468 ** -1.7434138022753176 ** -1.6776132385677192 ** -2.16104920335114 ** -1.8990380262269173 ** -1.4936525132181122 ** -1.626586181767052 ** -1.7768907504179514 ** -2.066321583264507 ** + Policy it 42..Sim ep: 413 +Log Std policy inner: -3.6833324 + Policy it 43..Sim ep: 413 +Log Std policy inner: -3.6857717 + Policy it 44..Sim ep: 396 +Log Std policy inner: -3.6963305 +Iterations: 964 +Simulated test: ** -1.4376985349511961 ** -1.533277785461396 ** -1.6055238168686627 ** -1.5942276833613869 ** -1.7111879899702034 ** -1.6178764420282095 ** -1.91025729957968 ** -1.4501139888574834 ** -1.6194261936983094 ** + Policy it 45..Sim ep: 394 +Log Std policy inner: -3.695979 + Policy it 46..Sim ep: 400 +Log Std policy inner: -3.6998901 + Policy it 47..Sim ep: 393 +Log Std policy inner: -3.697664 +Iterations: 967 +Simulated test: ** -1.7171428254526109 ** -1.6239451345452107 ** -1.8714754409971648 ** -1.385922475710977 ** -1.2433528839051724 ** -1.7014146371488459 ** -1.9425212661758997 ** -1.5617975740786643 ** -1.9287543278327213 ** + Policy it 48..Sim ep: 420 +Log Std policy inner: -3.69843 + Policy it 49..Sim ep: 435 +Log Std policy inner: -3.6980557 +step 1 state [0.10883333 1. 0.36783333 0.2355 ] a [-0.0711749 0.08256018 0.08325612 0.08277228] r -0.9437714191788303 +step 2 state [0.05133333 1. 0.4505 0.31616667] a [-0.05739273 0.08322804 0.08268031 0.08066684] r -0.5000571789239435 +step 3 state [0. 1. 0.52266667 0.396 ] a [-0.06581837 0.08331826 0.07229252 0.07985877] r -0.1966152859814756 +step 4 state [0. 1. 0.555 0.4745] a [-0.07049004 0.08333087 0.03245361 0.07858809] r -0.0824458203179137 +step 5 state [0. 1. 0.5745 0.54433333] a [-0.07496429 0.08333287 0.01951814 0.06992157] r -0.010845348434637359 +step 1 state [0.56583333 0.82083333 0.396 0.84416667] a [-0.08333333 0.08333333 0.08333274 -0.08125578] r -0.33991475414131567 +step 2 state [0.4825 0.90416667 0.47916667 0.76666667] a [-0.08333333 0.08333333 0.08333289 -0.07746832] r -0.09911681524559135 +step 3 state [0.39916667 0.9875 0.56233333 0.70266667] a [-0.08333333 0.08333333 0.08332903 -0.06389842] r -0.02099529116852006 +step 1 state [0.0855 0.82533333 0.11166667 0.50566667] a [-0.08328949 0.08333333 0.08333238 0.08332921] r -0.9906972240914793 +step 2 state [0.00366667 0.90866667 0.19483333 0.58883333] a [-0.08167093 0.08333333 0.0833276 0.0831773 ] r -0.949566516741504 +step 3 state [0. 0.992 0.278 0.66516667] a [-0.06170635 0.08333333 0.08328201 0.07637037] r -0.7789467313660836 +step 4 state [0. 1. 0.361 0.677] a [-0.02752447 0.08333333 0.08315283 0.0119058 ] r -0.3498172310032901 +step 5 state [0. 1. 0.4425 0.646 ] a [-0.00658303 0.08333331 0.08157408 -0.03094853] r -0.13233938451827776 +step 6 state [0. 1. 0.50933333 0.61916667] a [-0.03747712 0.08333327 0.06689891 -0.02674913] r 0.02790104580064723 +step 1 state [0. 0.75783333 0.56083333 0.15033333] a [-0.08314482 0.06840512 -0.08292833 0.08333182] r -0.9139908293009875 +step 2 state [0. 0.84083333 0.53833333 0.2335 ] a [-0.0762892 0.08311828 -0.02238029 0.08333063] r -0.7700547410970243 +step 3 state [0. 0.924 0.57883333 0.31666667] a [-0.06580177 0.08328732 0.04050183 0.08332673] r -0.35922957668433286 +step 4 state [0. 1. 0.6025 0.39983333] a [-0.06948312 0.08331704 0.02377287 0.08327678] r -0.2075554053268842 +step 5 state [0. 1. 0.59033333 0.48033333] a [-0.07642241 0.08333057 -0.01209143 0.08056788] r -0.07726576964864929 +step 6 state [0. 1. 0.5865 0.54783333] a [-0.07735071 0.08333296 -0.00380938 0.06752649] r 0.039276852032427145 +step 1 state [0.07033333 0.6125 0.55033333 0.744 ] a [-0.0833068 0.08333333 -0.0833279 -0.0818173 ] r 0.004627745509671488 +step 1 state [0.86933333 0.46416667 0.3315 0.6655 ] a [-0.08333333 0.08333333 0.08333296 0.07821339] r -0.14527243610219753 +step 2 state [0.786 0.5475 0.41466667 0.7065 ] a [-0.08333333 0.08333333 0.08333293 0.04100521] r -0.10545324457054794 +step 3 state [0.70266667 0.63083333 0.49783333 0.661 ] a [-0.08333333 0.08333333 0.08333063 -0.04540513] r 0.01795354343276845 +step 1 state [0.40883333 0.22016667 0.18516667 0.82716667] a [-0.08333333 0.08333333 0.08333324 0.08333318] r -0.35621214662958034 +step 2 state [0.3255 0.3035 0.26833333 0.91033333] a [-0.08333333 0.08333333 0.08333317 0.08332456] r -0.2794575610340698 +step 3 state [0.24216667 0.38683333 0.3515 0.99183333] a [-0.08333333 0.08333333 0.08333279 0.08163213] r -0.35424289528285735 +step 4 state [0.15883333 0.47016667 0.43466667 0.96133333] a [-0.08333333 0.08333333 0.08332988 -0.03041966] r -0.25889629941551706 +step 5 state [0.0755 0.5535 0.51783333 0.89033333] a [-0.08333329 0.08333333 0.08329652 -0.07099841] r -0.13426857332251285 +step 6 state [0. 0.63683333 0.47333333 0.81716667] a [-0.08331337 0.08333333 -0.04449939 -0.07304739] r 0.03276580840935406 +step 1 state [0.829 0.53416667 0.79983333 0.2435 ] a [-0.08333333 -0.04852201 -0.08333333 0.08333307] r -0.9300435658690013 +step 2 state [0.74566667 0.61316667 0.7165 0.32666667] a [-0.08333333 0.07910927 -0.08333333 0.0833332 ] r -0.4107354509628429 +step 3 state [0.66233333 0.69633333 0.63316667 0.40983333] a [-0.08333333 0.08333272 -0.08333333 0.08333098] r -0.0737092118921453 +step 4 state [0.579 0.77966667 0.54983333 0.493 ] a [-0.08333333 0.08333333 -0.08333329 0.08333266] r 0.02993017631476569 +step 1 state [0.65666667 0.40033333 0.88766667 0.377 ] a [-0.08333326 0.08116775 -0.08333295 0.08333333] r -0.9866786003230533 +step 2 state [0.57333333 0.4785 0.80433333 0.46016667] a [-0.0833333 0.07823168 -0.08333331 0.08333237] r -0.8198838892064884 +step 3 state [0.49 0.56166667 0.721 0.54333333] a [-0.08333333 0.0832736 -0.08333333 0.083322 ] r -0.25070230361874035 +step 4 state [0.40666667 0.645 0.63766667 0.6265 ] a [-0.08333333 0.08333333 -0.08333333 0.08331747] r 0.00648835873339948 +step 1 state [0.46133333 0.28016667 0.15933333 1. ] a [-0.08333333 0.08333333 0.08333332 0.08087797] r -0.9344020492554099 +step 2 state [0.378 0.3635 0.2425 1. ] a [-0.08333333 0.08333333 0.08333326 0.0729677 ] r -0.8696236815898262 +step 3 state [0.29466667 0.44683333 0.32566667 1. ] a [-0.08333333 0.08333333 0.08333289 0.03926032] r -0.7503331411521352 +step 4 state [0.21133333 0.53016667 0.40883333 0.9565 ] a [-0.08333333 0.08333333 0.08333244 -0.04337496] r -0.47602357454581745 +step 5 state [0.128 0.6135 0.492 0.88583333] a [-0.08333331 0.08333333 0.08332738 -0.07057685] r -0.2664393144665852 +step 6 state [0.04466667 0.69683333 0.54216667 0.81833333] a [-0.08333192 0.08333333 0.05029261 -0.06742841] r -0.07457865128392271 +step 7 state [0. 0.78016667 0.47266667 0.7585 ] a [-0.08280262 0.08333333 -0.06936748 -0.05979564] r 0.009724892553468756 +step 1 state [0.841 0.81316667 0.48683333 0.45983333] a [-0.08333333 0.08333333 0.05961201 0.08329707] r 0.03334153123526584 +step 1 state [0. 0.79633333 0.64166667 0.10433333] a [-0.08297692 -0.07394744 -0.08330291 0.0833188 ] r -0.9626818871195612 +step 2 state [0. 0.86783333 0.56116667 0.1875 ] a [-0.08069726 0.07165471 -0.08039963 0.08331943] r -0.8912543540345899 +step 3 state [0. 0.95066667 0.561 0.27066667] a [-6.16028607e-02 8.29617083e-02 -1.90623105e-05 8.33220432e-02] r -0.5335433413182421 +step 4 state [0. 1. 0.58583333 0.35383333] a [-0.05855318 0.0832523 0.02499208 0.0832638 ] r -0.30752470597444004 +step 5 state [0. 1. 0.5855 0.436 ] a [-0.07210295 0.08332283 -0.00019497 0.0822607 ] r -0.16489393178885003 +step 6 state [0. 1. 0.58166667 0.51266667] a [-0.07633815 0.0833323 -0.00367637 0.07671898] r -0.058754543338155996 +step 7 state [0. 1. 0.58666667 0.5675 ] a [-0.07750724 0.08333313 0.00509134 0.05494936] r 0.03973721697250521 +step 1 state [0.10433333 0.81816667 0.86766667 0.68866667] a [-0.0833306 0.08333247 -0.08333333 0.08212262] r -0.7154779302847262 +step 2 state [0.02133333 0.90133333 0.78433333 0.61016667] a [-0.08291226 0.08333332 -0.08333309 -0.07844834] r -0.17909789558065525 +step 3 state [0. 0.9845 0.7045 0.54066667] a [-0.08235813 0.08333332 -0.07969749 -0.06945757] r -0.032130231741978266 +step 1 state [0.53016667 0.518 0.87883333 0.762 ] a [-0.08332647 0.0833333 0.01976905 0.08333333] r -0.9914283707334818 +step 2 state [0.447 0.60116667 0.797 0.84516667] a [-0.08302498 0.08333315 -0.08179089 0.08333322] r -0.9705143530833548 +step 3 state [0.36366667 0.6845 0.71366667 0.78866667] a [-0.0833273 0.08333333 -0.08331883 -0.05648411] r -0.6191240187455025 +step 4 state [0.28033333 0.76783333 0.63033333 0.70533333] a [-0.08333327 0.08333333 -0.08333333 -0.08326318] r -0.07017077464070398 +step 5 state [0.197 0.85116667 0.547 0.64883333] a [-0.08333307 0.08333333 -0.08331934 -0.05648957] r 0.06367434450217369 +step 1 state [0.79133333 0.567 0.83866667 0.60533333] a [-0.08333333 0.08333328 -0.0221193 0.08333319] r -0.9494267865471141 +step 2 state [0.708 0.65016667 0.77483333 0.6885 ] a [-0.08333333 0.0833333 -0.06377833 0.08330686] r -0.7874599765136896 +step 3 state [0.62466667 0.7335 0.70916667 0.72866667] a [-0.08333333 0.08333333 -0.0656395 0.04018831] r -0.4629672749683742 +step 4 state [0.54133333 0.81683333 0.62916667 0.64533333] a [-0.08333333 0.08333333 -0.07995811 -0.08329527] r -0.011391867686360024 +step 1 state [0.444 0.745 0.153 0.342] a [-0.08333333 0.08333333 0.08333327 0.08333324] r -0.9498538321037279 +step 2 state [0.36066667 0.82833333 0.23616667 0.42516667] a [-0.08333333 0.08333333 0.08333201 0.08333269] r -0.7597459153953285 +step 3 state [0.27733333 0.91166667 0.31933333 0.50833333] a [-0.0833332 0.08333333 0.08331739 0.08332881] r -0.3362734993364024 +step 4 state [0.194 0.995 0.4025 0.59116667] a [-0.08329643 0.08333333 0.08320236 0.08299236] r -0.1786267155240998 +step 5 state [0.11216667 1. 0.48333333 0.6545 ] a [-0.08179164 0.08333333 0.08084974 0.06340783] r 0.004589879826319532 +step 1 state [0.8255 0.37133333 0.69433333 0.49816667] a [-0.08333333 0.08333333 0.0792662 0.08333272] r -0.3547931006025049 +step 2 state [0.74216667 0.45466667 0.7745 0.58133333] a [-0.08333333 0.08333333 0.08016799 0.083333 ] r -0.6761155014461094 +step 3 state [0.65883333 0.538 0.83016667 0.6645 ] a [-0.08333333 0.08333333 0.05570912 0.083333 ] r -0.915289917581808 +step 4 state [0.5755 0.62116667 0.76533333 0.74766667] a [-0.08333314 0.08333332 -0.06472413 0.08333113] r -0.6919367411001636 +step 5 state [0.49216667 0.7045 0.68383333 0.73966667] a [-0.08333331 0.08333333 -0.08148673 -0.00783347] r -0.24631570857402685 +step 6 state [0.40883333 0.78783333 0.6005 0.65683333] a [-0.08333333 0.08333333 -0.08332372 -0.08278413] r 0.05150985609783998 +step 1 state [0.46916667 0.44916667 0.55316667 0.518 ] a [-0.08333333 0.08333333 -0.08260267 0.08333309] r 0.015167333298480745 +step 1 state [0.57483333 0.44766667 0.6005 0.53783333] a [-0.08333333 0.08333333 0.05656471 0.08332377] r -0.08501833186044194 +step 2 state [0.4915 0.531 0.56633333 0.621 ] a [-0.08333333 0.08333333 -0.03412911 0.08326178] r -0.030934679644777585 +step 1 state [0.59983333 1. 0.59683333 0.58416667] a [-0.08333333 0.08333333 0.08074763 0.08170312] r 0.06494977427131454 +step 1 state [0.81883333 0.83466667 0.23516667 0.55633333] a [-0.08333333 0.08333333 0.08332383 0.07969472] r -0.7171041784295409 +step 2 state [0.7355 0.918 0.31833333 0.63283333] a [-0.08333333 0.08333333 0.08333181 0.07660664] r -0.3320669990836135 +step 3 state [0.65216667 1. 0.4015 0.70583333] a [-0.08333333 0.08333333 0.08333293 0.07312504] r -0.23308329455043542 +step 4 state [0.56883333 1. 0.48466667 0.74966667] a [-0.08333333 0.08333333 0.08333307 0.04389179] r -0.15072441007335635 +step 5 state [0.4855 1. 0.56783333 0.68566667] a [-0.08333333 0.08333333 0.08332992 -0.06386244] r 0.05308376826377348 +step 1 state [0.1575 0.37066667 0.4125 0.47183333] a [-0.0833315 0.08333333 0.08313429 0.08333333] r -0.02364589769270975 +step 1 state [0.55483333 0.75616667 0.2995 0.65716667] a [-0.08333333 0.08333333 0.08333322 0.08306241] r -0.2652427954992922 +step 2 state [0.4715 0.8395 0.38266667 0.739 ] a [-0.08333333 0.08333333 0.08333325 0.0819567 ] r -0.14218062002892473 +step 3 state [0.38816667 0.92283333 0.46583333 0.79833333] a [-0.08333333 0.08333333 0.08333321 0.0593693 ] r -0.1256639853240391 +step 4 state [0.30483333 1. 0.549 0.72566667] a [-0.08333333 0.08333333 0.08332039 -0.07259961] r 0.043820022595401964 +step 1 state [0.00916667 0.79833333 0.21033333 0.43366667] a [-0.0780465 0.08333333 0.08332759 0.08333253] r -0.8886841575833011 +step 2 state [0. 0.88166667 0.29333333 0.51683333] a [-0.05002781 0.08333333 0.08308843 0.08317084] r -0.4420070630730367 +step 3 state [0. 0.96483333 0.37533333 0.5965 ] a [-0.01301526 0.08333333 0.08215107 0.07967623] r -0.2327673629583199 +step 4 state [0.00266667 1. 0.45383333 0.63433333] a [0.00279518 0.08333327 0.07850794 0.03790488] r -0.05252964074547817 +step 5 state [0. 1. 0.51716667 0.61616667] a [-0.04683709 0.08333326 0.06333772 -0.01816443] r 0.03798429689352889 +step 1 state [0.5525 0.6305 0.85383333 0.88116667] a [-0.08333108 0.08333333 0.04181153 0.08333255] r -0.9937825007152018 +step 2 state [0.46916667 0.71383333 0.77116667 0.964 ] a [-0.08331395 0.08333333 -0.08264671 0.08297077] r -0.9852488999988627 +step 3 state [0.38583333 0.79716667 0.68783333 0.88066667] a [-0.08333326 0.08333333 -0.08330521 -0.08333217] r -0.7016303695788577 +step 4 state [0.3025 0.8805 0.6045 0.79733333] a [-0.08333333 0.08333333 -0.08330137 -0.08333304] r -0.11598559944860498 +step 5 state [0.21916667 0.96383333 0.522 0.71416667] a [-0.08333325 0.08333333 -0.08241011 -0.0830227 ] r 0.0015696080757940933 +step 1 state [0.24416667 0.7885 0.22816667 0.12016667] a [-0.0825229 0.08330895 0.08268541 0.08333331] r -0.9966737788927947 +step 2 state [0.16466667 0.87166667 0.31116667 0.20333333] a [-0.07943744 0.08332443 0.08314676 0.08333329] r -0.978855066670618 +step 3 state [0.09466667 0.95483333 0.39416667 0.2865 ] a [-0.06990086 0.08332781 0.08313699 0.08332922] r -0.8744800638305623 +step 4 state [0.03 1. 0.476 0.369] a [-0.06461297 0.08332012 0.08196658 0.08252166] r -0.4465112283444068 +step 5 state [0. 1. 0.53716667 0.447 ] a [-0.07043976 0.08332928 0.06127908 0.07804857] r -0.15252407340846352 +step 6 state [0. 1. 0.56516667 0.52116667] a [-0.07307414 0.08333251 0.02814153 0.07430787] r -0.03818916134055583 +step 1 state [0. 0.999 0.68366667 0.5535 ] a [-0.08312335 0.08333303 -0.076049 0.08094095] r -0.03381159580825255 +step 1 state [0.31883333 1. 0.4655 0.21916667] a [-0.08286605 0.08255141 0.08133668 0.08333158] r -0.8038590903989866 +step 2 state [0.237 1. 0.546 0.30233333] a [-0.08171536 0.08283182 0.08058754 0.08330697] r -0.3429952010432007 +step 3 state [0.15466667 1. 0.60616667 0.3855 ] a [-0.08227913 0.08324859 0.06025955 0.08325669] r -0.16559583775475772 +step 4 state [0.07183333 1. 0.60583333 0.46816667] a [-0.08271632 0.08332691 -0.00031616 0.08268385] r -0.04977744545141105 +step 1 state [0.07083333 0.82266667 0.501 0.53416667] a [-0.06661459 0.08333333 0.07244825 0.08333185] r 0.056199481602665635 +step 1 state [0.86416667 0.415 0.55316667 0.8985 ] a [-0.08333239 0.08333333 0.08333297 0.08321413] r -0.9389238019330012 +step 2 state [0.78083333 0.49833333 0.63633333 0.98133333] a [-0.0833306 0.08333333 0.0833319 0.0828665 ] r -0.9875293960902569 +step 3 state [0.6975 0.58166667 0.7195 1. ] a [-0.08333158 0.08333333 0.08332457 0.08240556] r -0.9955821222181573 +step 4 state [0.61416667 0.665 0.8025 1. ] a [-0.08333306 0.08333333 0.08301249 0.08175435] r -0.9976127329238639 +step 5 state [0.53083333 0.74833333 0.76933333 1. ] a [-0.08333289 0.08333333 -0.03315442 0.06657834] r -0.9949039219783997 +step 6 state [0.4475 0.83166667 0.68633333 0.91666667] a [-0.0833333 0.08333333 -0.08289204 -0.08333277] r -0.8862445361831434 +step 7 state [0.36416667 0.915 0.60433333 0.83333333] a [-0.08333333 0.08333333 -0.081995 -0.08333328] r -0.32653869257062684 +step 8 state [0.28083333 0.99833333 0.531 0.75 ] a [-0.08333333 0.08333333 -0.07320462 -0.08330747] r -0.0937397380406172 +step 9 state [0.1975 1. 0.5185 0.66966667] a [-0.0833317 0.08333333 -0.01244592 -0.08023321] r 0.02041799772268371 +step 1 state [0.532 0.39933333 0.50616667 0.89733333] a [-0.08333333 0.08333333 0.08333295 0.06533066] r -0.6130715600359786 +step 2 state [0.44866667 0.48266667 0.58933333 0.84033333] a [-0.08333331 0.08333333 0.08331941 -0.05684311] r -0.5272239087497488 +step 3 state [0.36533333 0.566 0.66416667 0.759 ] a [-0.08333333 0.08333333 0.07499863 -0.0812912 ] r -0.46522545582855956 +step 4 state [0.282 0.64933333 0.58083333 0.698 ] a [-0.08333333 0.08333333 -0.08333206 -0.0609801 ] r -0.02447523347078362 +step 1 state [0.10766667 0.53233333 0.421 0.511 ] a [-0.08288528 0.08333333 0.08332048 0.08333333] r 0.018710668454344992 +step 1 state [0.1365 0.43833333 0.35283333 1. ] a [-0.08333333 0.08333333 0.08333322 0.07934557] r -0.4061980778172307 +step 2 state [0.05316667 0.52166667 0.436 0.95133333] a [-0.08333322 0.08333333 0.08333169 -0.04856731] r -0.2596126096484286 +step 3 state [0. 0.605 0.51866667 0.8875 ] a [-0.0833306 0.08333333 0.08267187 -0.06369239] r -0.10774496777304254 +step 4 state [0. 0.68833333 0.47716667 0.81383333] a [-0.08259542 0.08333333 -0.04136716 -0.07353415] r 0.06486065429613563 +step 1 state [0. 0.4045 0.44566667 0.14633333] a [-0.08328721 0.0833332 -0.01810518 0.08333333] r -0.9290206991024075 +step 2 state [0. 0.48766667 0.45066667 0.2295 ] a [-0.08239166 0.08333332 0.00516132 0.08333333] r -0.6992442230376238 +step 3 state [0. 0.57083333 0.46066667 0.31266667] a [-0.07805703 0.08333333 0.01007906 0.08333331] r -0.39871879126357346 +step 4 state [0. 0.654 0.47583333 0.39583333] a [-0.06168877 0.08333333 0.0153228 0.08333326] r -0.20108641858508047 +step 5 state [0. 0.73716667 0.49 0.479 ] a [-0.02961848 0.08333332 0.01430894 0.08333288] r -0.03689134222567192 +step 1 state [0.686 0.087 0.166 0.79783333] a [-0.08333333 0.08333333 0.08333333 0.08333222] r -0.27756503252761267 +step 2 state [0.60266667 0.17033333 0.24933333 0.881 ] a [-0.08333333 0.08333333 0.08333333 0.08332702] r -0.31663548747345616 +step 3 state [0.51933333 0.25366667 0.3325 0.96416667] a [-0.08333333 0.08333333 0.08333333 0.0832552 ] r -0.5173338104199182 +step 4 state [0.436 0.337 0.41566667 1. ] a [-0.08333333 0.08333333 0.08333324 0.07753873] r -0.66507805029028 +step 5 state [0.35266667 0.42033333 0.49883333 0.944 ] a [-0.08333333 0.08333333 0.08333046 -0.05590358] r -0.5614016431281003 +step 6 state [0.26933333 0.50366667 0.58166667 0.86166667] a [-0.08333333 0.08333333 0.08291438 -0.08225141] r -0.33835291405235945 +step 7 state [0.186 0.587 0.51016667 0.78133333] a [-0.08333333 0.08333333 -0.07149864 -0.0802149 ] r -0.0067518731641315055 +step 1 state [0.51483333 0.183 0.75166667 0.72416667] a [-0.08333086 0.08333333 0.0832609 0.08333333] r -0.9744783252391808 +step 2 state [0.4315 0.26633333 0.83466667 0.8075 ] a [-0.083325 0.08333333 0.08300001 0.08333333] r -0.9968308418623132 +step 3 state [0.34833333 0.3495 0.90433333 0.89083333] a [-0.08304806 0.08333333 0.06975784 0.08333333] r -0.9996592807170076 +step 4 state [0.32433333 0.43266667 0.826 0.974 ] a [-0.02387992 0.08333299 -0.07826536 0.08333331] r -0.998181131836617 +step 5 state [0.243 0.516 0.7445 1. ] a [-0.08132999 0.08333333 -0.0813882 0.08332417] r -0.9868528580928684 +step 6 state [0.15966667 0.59933333 0.66116667 0.91833333] a [-0.08323941 0.08333333 -0.08331851 -0.08156617] r -0.695855025573914 +step 7 state [0.07633333 0.68266667 0.57783333 0.835 ] a [-0.08329917 0.08333333 -0.08333146 -0.08332785] r -0.07925157311534314 +step 8 state [0. 0.766 0.49783333 0.75833333] a [-0.08295402 0.08333333 -0.07998537 -0.07662797] r 0.09597197605533836 +step 1 state [0.88916667 0.5055 0.23683333 0.68933333] a [-0.08333333 0.08333333 0.08333284 0.04103056] r -0.3187741044168032 +step 2 state [0.80583333 0.58883333 0.32 0.658 ] a [-0.08333333 0.08333333 0.08333201 -0.03122377] r -0.13957022059854896 +step 3 state [0.7225 0.67216667 0.40316667 0.68033333] a [-0.08333333 0.08333333 0.0833082 0.0224219 ] r -0.02963292718455912 +step 1 state [0.69916667 0.55466667 0.10833333 0.1095 ] a [-0.08333333 0.08333333 0.08333217 0.08333333] r -0.9968948063780161 +step 2 state [0.61583333 0.638 0.1915 0.19283333] a [-0.08333333 0.08333333 0.08333234 0.08333333] r -0.9575044382280397 +step 3 state [0.5325 0.72133333 0.27466667 0.27616667] a [-0.08333333 0.08333333 0.083324 0.08333333] r -0.7157860327700207 +step 4 state [0.44916667 0.80466667 0.35766667 0.35933333] a [-0.08333333 0.08333333 0.08309291 0.08333332] r -0.3450868967972759 +step 5 state [0.36583333 0.888 0.44066667 0.4425 ] a [-0.08333329 0.08333333 0.08315103 0.08333324] r -0.12229924117193813 +step 6 state [0.2825 0.97133333 0.52166667 0.52566667] a [-0.08332562 0.08333333 0.08110993 0.08329011] r -0.010142563754592437 +step 1 state [0.393 0.81316667 0.24016667 0.41816667] a [-0.08333333 0.08333333 0.08333292 0.08333296] r -0.8253241506919908 +step 2 state [0.30966667 0.8965 0.32333333 0.50133333] a [-0.08333331 0.08333333 0.08331814 0.08333047] r -0.3973510193113736 +step 3 state [0.22633333 0.97983333 0.4065 0.5845 ] a [-0.08332645 0.08333333 0.08322362 0.08319599] r -0.15438046778036119 +step 4 state [0.14333333 1. 0.4875 0.65483333] a [-0.08286579 0.08333333 0.08101854 0.07037082] r 0.002304390115411392 +step 1 state [0.49616667 0.24083333 0.897 0.629 ] a [-0.08332805 0.08333333 0.08176339 0.08333333] r -0.9972155363891134 +step 2 state [0.41416667 0.324 0.90833333 0.71233333] a [-0.08195553 0.08333287 0.01146956 0.08333333] r -0.9974559767202241 +step 3 state [0.44783333 0.40716667 0.8255 0.79566667] a [ 0.03367184 0.08329893 -0.08276146 0.08333333] r -0.9831202754117258 +step 4 state [0.36466667 0.49033333 0.77866667 0.87883333] a [-0.08312953 0.08333333 -0.04673545 0.0833333 ] r -0.953880666972175 +step 5 state [0.28133333 0.57366667 0.6955 0.95916667] a [-0.08321985 0.08333333 -0.08303752 0.08043912] r -0.8665091890828723 +step 6 state [0.198 0.657 0.61216667 0.87583333] a [-0.08333296 0.08333333 -0.08331691 -0.08331759] r -0.20848799502061655 +step 7 state [0.11466667 0.74033333 0.52883333 0.7925 ] a [-0.08333175 0.08333333 -0.08317682 -0.08328132] r 0.03863174514718137 +step 1 state [0.27116667 0.3 0.39833333 0.69183333] a [-0.08333333 0.08333333 0.0833268 0.08333333] r 0.0025621828229853705 +step 1 state [0.20533333 0.11266667 0.50116667 0.176 ] a [-0.08333333 0.08333333 -0.08333323 0.08333333] r -0.9089333529942051 +step 2 state [0.122 0.196 0.42516667 0.25933333] a [-0.08333327 0.08333333 -0.07588215 0.08333333] r -0.5239471782049469 +step 3 state [0.03866667 0.27933333 0.50766667 0.34266667] a [-0.08331112 0.08333333 0.08265907 0.08333333] r -0.35429135593455596 +step 4 state [0. 0.36266667 0.5235 0.426 ] a [-0.08326495 0.08333333 0.01587401 0.08333333] r -0.17060076120135614 +step 5 state [0. 0.446 0.5 0.50916667] a [-0.08283222 0.08333333 -0.02344792 0.08333326] r -0.022582988374137747 +step 1 state [0.415 0.31283333 0.6475 0.98783333] a [-0.08332983 0.08333333 0.0833209 0.083316 ] r -0.980703849002451 +step 2 state [0.33166667 0.39616667 0.7305 1. ] a [-0.0833267 0.08333333 0.08304078 0.0833094 ] r -0.9936392495095264 +step 3 state [0.24833333 0.4795 0.70666667 1. ] a [-0.08325179 0.08333333 -0.02370358 0.08311234] r -0.9789586014790665 +step 4 state [0.165 0.56283333 0.6235 0.92 ] a [-0.08331931 0.08333333 -0.08313 -0.07985366] r -0.5035649387061529 +step 5 state [0.08166667 0.64616667 0.54016667 0.83666667] a [-0.08333101 0.08333333 -0.0832949 -0.08330772] r -0.11704253823890742 +step 6 state [0. 0.7295 0.47166667 0.77266667] a [-0.08323589 0.08333333 -0.06835283 -0.06397425] r -0.08704526899980469 +step 7 state [0. 0.81283333 0.47366667 0.777 ] a [-0.08192113 0.08333333 0.00206228 0.00441385] r -0.12731954802985923 +step 8 state [0. 0.89616667 0.4805 0.7295 ] a [-0.08034887 0.08333333 0.00699478 -0.04738316] r -0.007407107367048971 +step 1 state [0.53833333 0.36033333 0.54783333 1. ] a [-0.08332971 0.08333333 0.08333277 0.08249827] r -0.9405199419565046 +step 2 state [0.455 0.44366667 0.631 1. ] a [-0.08333157 0.08333333 0.08332584 0.08091965] r -0.9742326363742081 +step 3 state [0.37166667 0.527 0.713 1. ] a [-0.0833323 0.08333333 0.08207477 0.06051603] r -0.9819853035991216 +step 4 state [0.28833333 0.61033333 0.63316667 0.92283333] a [-0.08333013 0.08333333 -0.07975464 -0.07712944] r -0.586612861835542 +step 5 state [0.205 0.69366667 0.5505 0.8395 ] a [-0.08333333 0.08333333 -0.08266386 -0.08331698] r -0.07132672058822997 +step 6 state [0.12166667 0.777 0.4815 0.7615 ] a [-0.08333294 0.08333333 -0.06896077 -0.07797243] r 0.13156918871142098 +step 1 state [0.79383333 0.11566667 0.20916667 0.9105 ] a [-0.08333333 0.08333333 0.08333333 0.08333269] r -0.5095798861530464 +step 2 state [0.7105 0.199 0.2925 0.99366667] a [-0.08333332 0.08333333 0.08333333 0.08332437] r -0.756394246614001 +step 3 state [0.62716667 0.28233333 0.37566667 1. ] a [-0.08333326 0.08333333 0.08333332 0.08302211] r -0.9318591221290674 +step 4 state [0.54383333 0.36566667 0.45883333 1. ] a [-0.08333325 0.08333333 0.08333323 0.07927557] r -0.9372525234029727 +step 5 state [0.4605 0.449 0.542 1. ] a [-0.08333328 0.08333333 0.08333161 0.01457069] r -0.944439704563117 +step 6 state [0.37716667 0.53233333 0.625 0.9195 ] a [-0.08333331 0.08333333 0.08315511 -0.08037142] r -0.8558992788912105 +step 7 state [0.29383333 0.61566667 0.57716667 0.83616667] a [-0.08333333 0.08333333 -0.04769921 -0.08328245] r -0.2355368168146984 +step 8 state [0.2105 0.699 0.5115 0.75383333] a [-0.08333333 0.08333333 -0.06562027 -0.08228767] r 0.00012944333634301586 +step 1 state [0.54583333 0.25266667 0.31033333 0.993 ] a [-0.08333333 0.08333333 0.08333333 0.0832771 ] r -0.7243833754031913 +step 2 state [0.4625 0.336 0.3935 1. ] a [-0.08333333 0.08333333 0.08333328 0.07879668] r -0.6778547767426255 +step 3 state [0.37916667 0.41933333 0.47666667 0.94783333] a [-0.08333333 0.08333333 0.08333188 -0.05207754] r -0.45465087906983204 +step 4 state [0.29583333 0.50266667 0.55983333 0.8655 ] a [-0.08333333 0.08333333 0.08322394 -0.08230089] r -0.30902957564831923 +step 5 state [0.2125 0.586 0.55583333 0.7855 ] a [-0.08333333 0.08333333 -0.0038813 -0.07998047] r -0.9428976271981788 +step 6 state [0.12916667 0.66933333 0.47516667 0.78116667] a [-0.08333324 0.08333333 -0.08056532 -0.00422702] r 0.04130591456225541 +step 1 state [0.02483333 0.587 0.29483333 0.54933333] a [-0.08171807 0.08333333 0.08333204 0.08333333] r -0.15792298179138298 +step 2 state [0. 0.67033333 0.378 0.6325 ] a [-0.06875887 0.08333333 0.08326248 0.08332843] r 0.027337807074728326 +step 1 state [0.4735 0.71333333 0.2815 0.09583333] a [-0.08333202 0.08332247 -0.05314953 0.08333332] r -0.9889060912631713 +step 2 state [0.39016667 0.7965 0.35966667 0.179 ] a [-0.08332042 0.08333056 0.07817479 0.08333333] r -0.9275422700361694 +step 3 state [0.30716667 0.87966667 0.44216667 0.26216667] a [-0.08299111 0.08332907 0.08258415 0.08333332] r -0.5908781336390578 +step 4 state [0.22566667 0.96283333 0.5225 0.34533333] a [-0.08141705 0.08333162 0.08040787 0.08333307] r -0.24395954269391673 +step 5 state [0.1435 1. 0.58133333 0.4285 ] a [-0.08204269 0.0833249 0.0589366 0.08326126] r -0.13669905551284767 +step 6 state [0.06066667 1. 0.5885 0.509 ] a [-0.08268672 0.08333147 0.00722355 0.08060633] r -0.027066584992431975 + +wait... + +FEL is coming back! +Wait 1 minute more... + +step 1 state [0.829 0.42733333 1. 0.774 ] a [-0.08333321 0.08333333 0.08276999 0.08333333] r -1.0000817009422074 +step 2 state [0.74566667 0.5105 1. 0.85733333] a [-0.08333217 0.08333332 0.08157826 0.08333333] r -1.0001554385395413 +step 3 state [0.66233333 0.59366667 1. 0.94066667] a [-0.08332049 0.0833333 0.07691277 0.08333333] r -1.0000571217430962 +step 4 state [0.57916667 0.67683333 1. 1. ] a [-0.08313001 0.08333321 0.01550999 0.08333333] r -0.9999735524661179 +step 5 state [0.49916667 0.76 0.91733333 1. ] a [-0.07997581 0.08333307 -0.08255008 0.0833317 ] r -0.999614696159093 +step 6 state [0.41583333 0.84333333 0.834 0.9415 ] a [-0.08323039 0.08333333 -0.08333188 -0.05835474] r -0.9874339093213866 +step 7 state [0.3325 0.92666667 0.75066667 0.85816667] a [-0.08333138 0.08333333 -0.08333333 -0.08333324] r -0.7360126865557193 +step 8 state [0.24916667 1. 0.66733333 0.77483333] a [-0.08333228 0.08333333 -0.08333306 -0.08333331] r -0.1413641850806462 +step 9 state [0.16583333 1. 0.5845 0.6915 ] a [-0.0833133 0.08333333 -0.08275019 -0.0832178 ] r 0.11324847344230093 +step 1 state [0. 0.68816667 0.53533333 0.914 ] a [-0.07707935 0.08333333 -0.08214005 -0.08332588] r -0.25898396456162265 +step 2 state [0. 0.7715 0.4875 0.8325] a [-0.0820182 0.08333333 -0.04769822 -0.08146752] r -0.026260474926463884 +now plotting... +QXcbConnection: XCB error: 3 (BadWindow), sequence: 1068, resource id: 21195780, major code: 40 (TranslateCoords), minor code: 0 +QXcbConnection: XCB error: 3 (BadWindow), sequence: 1610, resource id: 21195999, major code: 40 (TranslateCoords), minor code: 0 +QXcbConnection: XCB error: 3 (BadWindow), sequence: 1879, resource id: 21196218, major code: 40 (TranslateCoords), minor code: 0 +Saved current buffer final + diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/ME_TRPO.py b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/ME_TRPO.py new file mode 100644 index 0000000..b361e28 --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/ME_TRPO.py @@ -0,0 +1,1432 @@ +import os +import pickle + +import numpy as np +import tensorflow as tf +import gym +from datetime import datetime +import pandas as pd +import matplotlib.pyplot as plt + +from laser_trajectory_control_env import LaserTrajectoryControlEnv +from tango_connection import TangoConnection + +# set random seed +random_seed = 111 +np.random.seed(random_seed) + +conf_file = '/home/niky/FERMI/2020_10_06/configuration/conf_fel2.json' +tango = TangoConnection(conf_file=conf_file) +real_env = LaserTrajectoryControlEnv(tango=tango) + +# Hyper papameters +steps_per_env = 20 +init_random_steps = 100 +num_epochs = int((500-init_random_steps)/(steps_per_env)) # increase 500 to 700 +# num_epochs = int((125-init_random_steps)/(steps_per_env)) +print('num of epoches', num_epochs) + + +hidden_sizes = [100, 100] +cr_lr = 1e-3 +gamma = 0.9999 +lam = 0.95 + +max_training_iterations = 100 +# max_training_iterations = 10 +delay_before_convergence_check = 2 + +number_envs = 1 +critic_iter = 80 +delta = 0.05 +algorithm = 'TRPO' +conj_iters = 10 +minibatch_size = 500 +simulated_steps = 2000 + +#mb_lr = 5e-4 +mb_lr = 1e-3 +model_batch_size = 100 +num_ensemble_models = 5 +model_iter = 15 + +# How often to check the progress of the network training +# e.g. lambda it, episode: (it + 1) % max(3, (ep+1)*2) == 0 +# dynamic_wait_time = lambda it, ep: (it + 1) % 5 == 0 # +dynamic_wait_time = lambda it, ep: (it + 1) % 3 == 0 # +# dynamic_wait_time = lambda it, ep: (it + 1) % 1 == 0 # + +# Create the logging directory: +project_directory = 'Data_logging/ME_TRPO/mt_1/' + + +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-d_' + str(delta) + '-conj_iters_' + str(conj_iters) + '-n_ep_' + str(num_epochs) + \ + '-mini_bs_' + str(minibatch_size) + '-m_bs_' + str(model_batch_size) + \ + '-mb_lr_' + str(mb_lr) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + '-ensnr_' + str(num_ensemble_models) + '-init_' + str( + init_random_steps) + '/' +project_directory = project_directory + hyp_str_all + +# To label the plots: +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-d_' + str(delta) + '-conj_iters_' + str(conj_iters) + '-n_ep_' + str(num_epochs) + \ + '\n-mini_bs_' + str(minibatch_size) + '-m_bs_' + str(model_batch_size) + \ + '-mb_lr_' + str(mb_lr) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + \ + '\n-ensnr_' + str(num_ensemble_models) +if not os.path.isdir(project_directory): + os.makedirs(project_directory) + print("created folder : ", project_directory) + + +# Class for data storage during the tests +class TrajectoryBuffer(): + '''Class for data storage during the tests''' + + def __init__(self, name, directory): + self.save_frequency = 100000 + self.directory = directory + self.name = name + self.rews = [] + self.obss = [] + self.acts = [] + self.dones = [] + self.info = "" + self.idx = -1 + + def new_trajectory(self, obs): + self.idx += 1 + self.rews.append([]) + self.acts.append([]) + self.obss.append([]) + self.dones.append([]) + self.store_step(obs=obs) + + def store_step(self, obs=None, act=None, rew=None, done=None): + self.rews[self.idx].append(rew) + self.obss[self.idx].append(obs) + self.acts[self.idx].append(act) + self.dones[self.idx].append(done) + + if self.__len__() % self.save_frequency == 0: + self.save_buffer() + + def __len__(self): + assert (len(self.rews) == len(self.obss) == len(self.acts) == len(self.dones)) + return len(self.obss) + + def save_buffer(self, **kwargs): + if 'info' in kwargs: + self.info = kwargs.get('info') + now = datetime.now() + # clock_time = "{}_{}_{}_{}_".format(now.day, now.hour, now.minute, now.second) + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}_' + data = dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + # print('saving...', data) + out_put_writer = open(self.directory + clock_time + self.name, 'wb') + pickle.dump(data, out_put_writer, -1) + # pickle.dump(self.actions, out_put_writer, -1) + out_put_writer.close() + + def get_data(self): + return dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + + +class MonitoringEnv(gym.Wrapper): + ''' + Gym Wrapper to store information for scaling to correct scpace and for post analysis. + ''' + + def __init__(self, env, **kwargs): + gym.Wrapper.__init__(self, env) + self.data_dict = dict() + self.environment_usage = 'default' + self.directory = project_directory + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(self.environment_usage) + + self.test_env_flag = False + if 'test_env' in kwargs: + self.test_env_flag = True + + + def reset(self, **kwargs): + init_obs = self.env.reset(**kwargs) + self.current_buffer.new_trajectory(init_obs) + init_obs = self.scale_state_env(init_obs) + # print('Menv: ', init_obs) + return init_obs + + def step(self, action): + # print('a', action) + action = self.descale_action_env(action) + # print('as', action) + ob, reward, done, info = self.env.step(action) + self.current_buffer.store_step(obs=ob, act=action, rew=reward, done=done) + ob = self.scale_state_env(ob) + reward = self.rew_scale(reward) + # print('Menv: ', ob, reward, done, info) + return ob, reward, done, info + + def set_usage(self, usage): + self.environment_usage = usage + if usage in self.data_dict: + self.current_buffer = self.data_dict.get(usage) + else: + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(usage) + + def close_usage(self, usage): + # Todo: Implement to save complete data + self.current_buffer = self.data_dict.get(usage) + self.current_buffer.save_buffer() + + def scale_state_env(self, ob): + scale = (self.env.observation_space.high - self.env.observation_space.low) + return (2 * ob - (self.env.observation_space.high + self.env.observation_space.low)) / scale + + def descale_action_env(self, act): + scale = (self.env.action_space.high - self.env.action_space.low) + return (scale * act + self.env.action_space.high + self.env.action_space.low) / 2 + + def rew_scale(self, rew): + + if not(self.test_env_flag): + '''Rescale reward from [-1,0] to [-1,1] for the training of the network''' + rew = rew * 2 + 1 + return rew + + def save_current_buffer(self, info=''): + self.current_buffer = self.data_dict.get(self.environment_usage) + self.current_buffer.save_buffer(info=info) + print('Saved current buffer', self.environment_usage) + + def set_directory(self, directory): + self.directory = directory + + +env_monitored = MonitoringEnv(env=real_env) + + +def make_env(**kwargs): + '''Create the environement''' + return MonitoringEnv(env=real_env, **kwargs) + + +def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activation=None): + ''' + Multi-layer perceptron + ''' + for l in hidden_layers: + x = tf.layers.dense(x, units=l, activation=activation) + return tf.layers.dense(x, units=output_layer, activation=last_activation) + + +def softmax_entropy(logits): + ''' + Softmax Entropy + ''' + return -tf.reduce_sum(tf.nn.softmax(logits, axis=-1) * tf.nn.log_softmax(logits, axis=-1), axis=-1) + + +def gaussian_log_likelihood(ac, mean, log_std): + ''' + Gaussian Log Likelihood + ''' + log_p = ((ac - mean) ** 2 / (tf.exp(log_std) ** 2 + 1e-9) + 2 * log_std) + np.log(2 * np.pi) + return -0.5 * tf.reduce_sum(log_p, axis=-1) + + +def conjugate_gradient(A, b, x=None, iters=10): + ''' + Conjugate gradient method: approximate the solution of Ax=b + It solve Ax=b without forming the full matrix, just compute the matrix-vector product (The Fisher-vector product) + NB: A is not the full matrix but is a useful matrix-vector product between the averaged Fisher information matrix and arbitrary vectors + Descibed in Appendix C.1 of the TRPO paper + ''' + if x is None: + x = np.zeros_like(b) + + r = A(x) - b + p = -r + for _ in range(iters): + a = np.dot(r, r) / (np.dot(p, A(p)) + 1e-8) + x += a * p + r_n = r + a * A(p) + b = np.dot(r_n, r_n) / (np.dot(r, r) + 1e-8) + p = -r_n + b * p + r = r_n + return x + + +def gaussian_DKL(mu_q, log_std_q, mu_p, log_std_p): + ''' + Gaussian KL divergence in case of a diagonal covariance matrix + ''' + return tf.reduce_mean(tf.reduce_sum( + 0.5 * (log_std_p - log_std_q + tf.exp(log_std_q - log_std_p) + (mu_q - mu_p) ** 2 / tf.exp(log_std_p) - 1), + axis=1)) + + +def backtracking_line_search(Dkl, delta, old_loss, p=0.8): + ''' + Backtracking line searc. It look for a coefficient s.t. the constraint on the DKL is satisfied + It has both to + - improve the non-linear objective + - satisfy the constraint + ''' + ## Explained in Appendix C of the TRPO paper + a = 1 + it = 0 + + new_dkl, new_loss = Dkl(a) + while (new_dkl > delta) or (new_loss > old_loss): + a *= p + it += 1 + new_dkl, new_loss = Dkl(a) + + return a + + +def GAE(rews, v, v_last, gamma=0.99, lam=0.95): + ''' + Generalized Advantage Estimation + ''' + assert len(rews) == len(v) + vs = np.append(v, v_last) + d = np.array(rews) + gamma * vs[1:] - vs[:-1] + gae_advantage = discounted_rewards(d, 0, gamma * lam) + return gae_advantage + + +def discounted_rewards(rews, last_sv, gamma): + ''' + Discounted reward to go + Parameters: + ---------- + rews: list of rewards + last_sv: value of the last state + gamma: discount value + ''' + rtg = np.zeros_like(rews, dtype=np.float32) + rtg[-1] = rews[-1] + gamma * last_sv + for i in reversed(range(len(rews) - 1)): + rtg[i] = rews[i] + gamma * rtg[i + 1] + return rtg + + +def flatten_list(tensor_list): + ''' + Flatten a list of tensors + ''' + return tf.concat([flatten(t) for t in tensor_list], axis=0) + + +def flatten(tensor): + ''' + Flatten a tensor + ''' + return tf.reshape(tensor, shape=(-1,)) + + +def test_agent(env_test, agent_op, num_games=10): + ''' + Test an agent 'agent_op', 'num_games' times + Return mean and std + ''' + games_r = [] + games_length = [] + for _ in range(num_games): + d = False + game_r = 0 + o = env_test.reset() + game_length = 0 + while not d: + a_s, _ = agent_op([o]) + o, r, d, _ = env_test.step(a_s[0]) + game_r += r + game_length += 1 + + games_r.append(game_r) + games_length.append(game_length) + return np.mean(games_r), np.std(games_r), np.mean(games_length) + + +class Buffer(): + ''' + Class to store the experience from a unique policy + ''' + + def __init__(self, gamma=0.99, lam=0.95): + self.gamma = gamma + self.lam = lam + self.adv = [] + self.ob = [] + self.ac = [] + self.rtg = [] + + def store(self, temp_traj, last_sv): + ''' + Add temp_traj values to the buffers and compute the advantage and reward to go + Parameters: + ----------- + temp_traj: list where each element is a list that contains: observation, reward, action, state-value + last_sv: value of the last state (Used to Bootstrap) + ''' + # store only if there are temporary trajectories + if len(temp_traj) > 0: + self.ob.extend(temp_traj[:, 0]) + rtg = discounted_rewards(temp_traj[:, 1], last_sv, self.gamma) + self.adv.extend(GAE(temp_traj[:, 1], temp_traj[:, 3], last_sv, self.gamma, self.lam)) + self.rtg.extend(rtg) + self.ac.extend(temp_traj[:, 2]) + + def get_batch(self): + # standardize the advantage values + norm_adv = (self.adv - np.mean(self.adv)) / (np.std(self.adv) + 1e-10) + return np.array(self.ob), np.array(np.expand_dims(self.ac, -1)), np.array(norm_adv), np.array(self.rtg) + + def __len__(self): + assert (len(self.adv) == len(self.ob) == len(self.ac) == len(self.rtg)) + return len(self.ob) + + +class FullBuffer(): + def __init__(self): + self.rew = [] + self.obs = [] + self.act = [] + self.nxt_obs = [] + self.done = [] + + self.train_idx = [] + self.valid_idx = [] + self.idx = 0 + + def store(self, obs, act, rew, nxt_obs, done): + self.rew.append(rew) + self.obs.append(obs) + self.act.append(act) + self.nxt_obs.append(nxt_obs) + self.done.append(done) + + self.idx += 1 + + def generate_random_dataset(self): + rnd = np.arange(len(self.obs)) + np.random.shuffle(rnd) + self.valid_idx = rnd[: int(len(self.obs) / 5)] + self.train_idx = rnd[int(len(self.obs) / 5):] + print('Train set:', len(self.train_idx), 'Valid set:', len(self.valid_idx)) + + def get_training_batch(self): + return np.array(self.obs)[self.train_idx], np.array(np.expand_dims(self.act, -1))[self.train_idx], \ + np.array(self.rew)[self.train_idx], np.array(self.nxt_obs)[self.train_idx], np.array(self.done)[ + self.train_idx] + + def get_valid_batch(self): + return np.array(self.obs)[self.valid_idx], np.array(np.expand_dims(self.act, -1))[self.valid_idx], \ + np.array(self.rew)[self.valid_idx], np.array(self.nxt_obs)[self.valid_idx], np.array(self.done)[ + self.valid_idx] + + def __len__(self): + assert (len(self.rew) == len(self.obs) == len(self.act) == len(self.nxt_obs) == len(self.done)) + return len(self.obs) + + +def simulate_environment(env, policy, simulated_steps): + '''Lists to store rewards and length of the trajectories completed''' + buffer = Buffer(0.99, 0.95) + steps = 0 + number_episodes = 0 + + while steps < simulated_steps: + temp_buf = [] + obs = env.reset() + number_episodes += 1 + done = False + + while not done: + act, val = policy([obs]) + + obs2, rew, done, _ = env.step([act]) + + temp_buf.append([obs.copy(), rew, np.squeeze(act), np.squeeze(val)]) + + obs = obs2.copy() + steps += 1 + + if done: + buffer.store(np.array(temp_buf), 0) + temp_buf = [] + + if steps == simulated_steps: + break + + buffer.store(np.array(temp_buf), np.squeeze(policy([obs])[1])) + + print('Sim ep:', number_episodes, end=' \n') + + return buffer.get_batch(), number_episodes + + +class NetworkEnv(gym.Wrapper): + ''' + Wrapper to handle the network interaction + ''' + + def __init__(self, env, model_func=None, done_func=None, number_models=1): + gym.Wrapper.__init__(self, env) + + self.model_func = model_func + self.done_func = done_func + self.number_models = number_models + self.len_episode = 0 + self.threshold = 0 + self.max_steps = env.max_steps + + def reset(self, **kwargs): + self.threshold = -0.05 * 2 + 1 # rescaled [-1,1] + self.len_episode = 0 + # kwargs['simulation'] = True + # action = self.env.reset(**kwargs) + if self.model_func is not None: + obs = np.random.uniform(-1, 1, self.env.observation_space.shape) + # print('reset', obs) + # Todo: remove + # obs = self.env.reset() + else: + # obs = self.env.reset(**kwargs) + pass + # Does this work? + self.obs = np.clip(obs, -1.0, 1.0) + self.obs_init = self.obs.copy() + # if self.test_phase: + # print('test reset', self.obs) + # print('init: ',self.obs) + return self.obs + + def step(self, action): + if self.model_func is not None: + # predict the next state on a random model + obs, rew = self.model_func(self.obs, [np.squeeze(action)], np.random.randint(0, self.number_models)) + # # Todo: remove + # self.env.state = self.obs + # done = rew > self.threshold + + self.len_episode += 1 + done = self.len_episode >= self.max_steps + self.obs = np.clip(obs, -1.0, 1.0) + # # Todo: remove + # obs_true, rew_true, done_true, _ = self.env.step(action) + # print(np.linalg.norm(self.obs-obs_true),np.linalg.norm(rew-rew_true), done, done_true) + # print(self.len_episode, 'state ', obs, 'a ', np.squeeze(action), 'r ', rew) + # print('step:', self.obs, rew, done, "") + rew = (rew - 1) / 2 + if rew > -0.05: + done = True + return self.obs, rew, done, "" + else: + # self.obs, rew, done, _ = real_env.step(action) + # return self.obs, rew, done, "" + pass + # return env.step(action) + + def set_test(self, test_flag): + real_env.test = test_flag + + +class StructEnv(gym.Wrapper): + ''' + Gym Wrapper to store information like number of steps and total reward of the last espisode. + ''' + + def __init__(self, env): + gym.Wrapper.__init__(self, env) + self.n_obs = self.env.reset() + self.total_rew = 0 + self.len_episode = 0 + + def reset(self, **kwargs): + self.n_obs = self.env.reset(**kwargs) + self.total_rew = 0 + self.len_episode = 0 + return self.n_obs.copy() + + def step(self, action): + ob, reward, done, info = self.env.step(action) + # print('reward in struct', reward) + self.total_rew += reward + self.len_episode += 1 + return ob, reward, done, info + + def get_episode_reward(self): + return self.total_rew + + def get_episode_length(self): + return self.len_episode + + +# def episode_done(rew): +# # TODO: new +# threshold = -0.1 +# # if rew> threshold: +# # print('done---'*10, rew) +# # return np.abs(np.arcsin(np.squeeze(ob[3]))) > .2 +# return rew > threshold + + +# def final_reward(ob, ac): +# # TODO: new +# # return -np.sqrt(np.mean(np.square(ob))) +# pass + + +def restore_model(old_model_variables, m_variables): + # variable used as index for restoring the actor's parameters + it_v2 = tf.Variable(0, trainable=False) + restore_m_params = [] + + for m_v in m_variables: + upd_m_rsh = tf.reshape(old_model_variables[it_v2: it_v2 + tf.reduce_prod(m_v.shape)], shape=m_v.shape) + restore_m_params.append(m_v.assign(upd_m_rsh)) + it_v2 += tf.reduce_prod(m_v.shape) + + return tf.group(*restore_m_params) + + +def METRPO(env_name, hidden_sizes=[32, 32], cr_lr=5e-3, num_epochs=50, gamma=0.99, lam=0.95, number_envs=1, + critic_iter=10, steps_per_env=100, delta=0.05, algorithm='TRPO', conj_iters=10, minibatch_size=1000, + mb_lr_start=0.0001, model_batch_size=512, simulated_steps=1000, num_ensemble_models=2, model_iter=15, + init_random_steps=steps_per_env): + ''' + Model Ensemble Trust Region Policy Optimization + Parameters: + ----------- + env_name: Name of the environment + hidden_sizes: list of the number of hidden units for each layer + cr_lr: critic learning rate + num_epochs: number of training epochs + gamma: discount factor + lam: lambda parameter for computing the GAE + number_envs: number of "parallel" synchronous environments + # NB: it isn't distributed across multiple CPUs + critic_iter: Number of SGD iterations on the critic per epoch + steps_per_env: number of steps per environment + # NB: the total number of steps per epoch will be: steps_per_env*number_envs + delta: Maximum KL divergence between two policies. Scalar value + algorithm: type of algorithm. Either 'TRPO' or 'NPO' + conj_iters: number of conjugate gradient iterations + minibatch_size: Batch size used to train the critic + mb_lr: learning rate of the environment model + model_batch_size: batch size of the environment model + simulated_steps: number of simulated steps for each policy update + num_ensemble_models: number of models + model_iter: number of iterations without improvement before stopping training the model + ''' + # TODO: add ME-TRPO hyperparameters + + tf.reset_default_graph() + + # Create a few environments to collect the trajectories + + # envs = [StructEnv(gym.make(env_name)) for _ in range(number_envs)] + envs = [StructEnv(make_env()) for _ in range(number_envs)] + env_test = StructEnv(make_env(test_env = True)) + # env_test = gym.make(env_name) + print('env_test' * 4) + + # env_test = make_env(test=True) + # env_test = gym.wrappers.Monitor(env_test, "VIDEOS/", force=True, video_callable=lambda x: x%10 == 0) + # to be changed in real test + # env_test = FelLocalEnv(tango=tango) + # env_test.test = True + # env_test_1 = FelLocalEnv(tango=tango) + # env_test_1.test = True + + # If the scaling is not perfomed this has to be changed + low_action_space = -1 # envs[0].action_space.low + high_action_space = 1 # envs[0].action_space.high + + obs_dim = envs[0].observation_space.shape + + act_dim = envs[0].action_space.shape[0] + + # print(envs[0].action_space, envs[0].observation_space, low_action_space, + # high_action_space) + + # Placeholders + act_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float32, name='act') + obs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float32, name='obs') + # NEW + nobs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float32, name='nobs') + rew_ph = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='rew') + + ret_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='ret') + adv_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='adv') + old_p_log_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='old_p_log') + old_mu_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float32, name='old_mu') + old_log_std_ph = tf.placeholder(shape=(act_dim), dtype=tf.float32, name='old_log_std') + p_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='p_ph') + + mb_lr_ = tf.placeholder("float", None)#, name='mb_lr') + + + # result of the conjugate gradient algorithm + cg_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='cg') + + ######################################################### + ######################## POLICY ######################### + ######################################################### + + old_model_variables = tf.placeholder(shape=(None,), dtype=tf.float32, name='old_model_variables') + + # Neural network that represent the policy + with tf.variable_scope('actor_nn'): + p_means = mlp(obs_ph, hidden_sizes, act_dim, tf.tanh, last_activation=tf.tanh) + p_means = tf.clip_by_value(p_means, low_action_space, high_action_space) + log_std = tf.get_variable(name='log_std', initializer=np.ones(act_dim, dtype=np.float32)) + + # Neural network that represent the value function + with tf.variable_scope('critic_nn'): + s_values = mlp(obs_ph, hidden_sizes, 1, tf.tanh, last_activation=None) + s_values = tf.squeeze(s_values) + + # Add "noise" to the predicted mean following the Gaussian distribution with standard deviation e^(log_std) + p_noisy = p_means + tf.random_normal(tf.shape(p_means), 0, 1) * tf.exp(log_std) + # Clip the noisy actions + a_sampl = tf.clip_by_value(p_noisy, low_action_space, high_action_space) + + # Compute the gaussian log likelihood + p_log = gaussian_log_likelihood(act_ph, p_means, log_std) + + # Measure the divergence + diverg = tf.reduce_mean(tf.exp(old_p_log_ph - p_log)) + + # ratio + ratio_new_old = tf.exp(p_log - old_p_log_ph) + # TRPO surrogate loss function + p_loss = - tf.reduce_mean(ratio_new_old * adv_ph) + + # MSE loss function + v_loss = tf.reduce_mean((ret_ph - s_values) ** 2) + # Critic optimization + v_opt = tf.train.AdamOptimizer(cr_lr).minimize(v_loss) + + def variables_in_scope(scope): + # get all trainable variables in 'scope' + return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) + + # Gather and flatten the actor parameters + p_variables = variables_in_scope('actor_nn') + p_var_flatten = flatten_list(p_variables) + + # Gradient of the policy loss with respect to the actor parameters + p_grads = tf.gradients(p_loss, p_variables) + p_grads_flatten = flatten_list(p_grads) + + ########### RESTORE ACTOR PARAMETERS ########### + p_old_variables = tf.placeholder(shape=(None,), dtype=tf.float32, name='p_old_variables') + # variable used as index for restoring the actor's parameters + it_v1 = tf.Variable(0, trainable=False) + restore_params = [] + + for p_v in p_variables: + upd_rsh = tf.reshape(p_old_variables[it_v1: it_v1 + tf.reduce_prod(p_v.shape)], shape=p_v.shape) + restore_params.append(p_v.assign(upd_rsh)) + it_v1 += tf.reduce_prod(p_v.shape) + + restore_params = tf.group(*restore_params) + + # gaussian KL divergence of the two policies + dkl_diverg = gaussian_DKL(old_mu_ph, old_log_std_ph, p_means, log_std) + + # Jacobian of the KL divergence (Needed for the Fisher matrix-vector product) + dkl_diverg_grad = tf.gradients(dkl_diverg, p_variables) + + dkl_matrix_product = tf.reduce_sum(flatten_list(dkl_diverg_grad) * p_ph) + print('dkl_matrix_product', dkl_matrix_product.shape) + # Fisher vector product + # The Fisher-vector product is a way to compute the A matrix without the need of the full A + Fx = flatten_list(tf.gradients(dkl_matrix_product, p_variables)) + + ## Step length + beta_ph = tf.placeholder(shape=(), dtype=tf.float32, name='beta') + # NPG update + npg_update = beta_ph * cg_ph + + ## alpha is found through line search + alpha = tf.Variable(1., trainable=False) + # TRPO update + trpo_update = alpha * npg_update + + #################### POLICY UPDATE ################### + # variable used as an index + it_v = tf.Variable(0, trainable=False) + p_opt = [] + # Apply the updates to the policy + for p_v in p_variables: + print(p_v) + upd_rsh = tf.reshape(trpo_update[it_v: it_v + tf.reduce_prod(p_v.shape)], shape=p_v.shape) + p_opt.append(p_v.assign_sub(upd_rsh)) + it_v += tf.reduce_prod(p_v.shape) + + p_opt = tf.group(*p_opt) + + ######################################################### + ######################### MODEL ######################### + ######################################################### + + m_opts = [] + m_losses = [] + + nobs_pred_m = [] + act_obs = tf.concat([obs_ph, act_ph], 1) + # TODO: Variable learning rate injected + + + # computational graph of N models + for i in range(num_ensemble_models): + with tf.variable_scope('model_' + str(i) + '_nn'): + # TODO: Add variable size of network + hidden_sizes = 100 + nobs_pred = mlp(act_obs, [100, 100], obs_dim[0] + 1, tf.nn.tanh, last_activation=None) + nobs_pred_m.append(nobs_pred) + + m_loss = tf.reduce_mean((tf.concat([nobs_ph, rew_ph], 1) - nobs_pred) ** 2) + m_losses.append(m_loss) + + m_opts.append(tf.train.AdamOptimizer(learning_rate=mb_lr_).minimize(m_loss)) + + ##################### RESTORE MODEL ###################### + initialize_models = [] + models_variables = [] + for i in range(num_ensemble_models): + m_variables = variables_in_scope('model_' + str(i) + '_nn') + initialize_models.append(restore_model(old_model_variables, m_variables)) + models_variables.append(flatten_list(m_variables)) + + # Time + now = datetime.now() + clock_time = "{}_{}_{}_{}".format(now.day, now.hour, now.minute, now.second) + print('Time:', clock_time) + + # Set scalars and hisograms for TensorBoard + tf.summary.scalar('p_loss', p_loss, collections=['train']) + tf.summary.scalar('v_loss', v_loss, collections=['train']) + tf.summary.scalar('p_divergence', diverg, collections=['train']) + tf.summary.scalar('ratio_new_old', tf.reduce_mean(ratio_new_old), collections=['train']) + tf.summary.scalar('dkl_diverg', dkl_diverg, collections=['train']) + tf.summary.scalar('alpha', alpha, collections=['train']) + tf.summary.scalar('beta', beta_ph, collections=['train']) + tf.summary.scalar('p_std_mn', tf.reduce_mean(tf.exp(log_std)), collections=['train']) + tf.summary.scalar('s_values_mn', tf.reduce_mean(s_values), collections=['train']) + tf.summary.histogram('p_log', p_log, collections=['train']) + tf.summary.histogram('p_means', p_means, collections=['train']) + tf.summary.histogram('s_values', s_values, collections=['train']) + tf.summary.histogram('adv_ph', adv_ph, collections=['train']) + tf.summary.histogram('log_std', log_std, collections=['train']) + scalar_summary = tf.summary.merge_all('train') + + tf.summary.scalar('old_v_loss', v_loss, collections=['pre_train']) + tf.summary.scalar('old_p_loss', p_loss, collections=['pre_train']) + pre_scalar_summary = tf.summary.merge_all('pre_train') + + hyp_str = '-spe_' + str(steps_per_env) + '-envs_' + str(number_envs) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-delta_' + str(delta) + '-conj_iters_' + str(conj_iters) + + file_writer = tf.summary.FileWriter('log_dir/' + env_name + '/' + algorithm + '_' + clock_time + '_' + hyp_str, + tf.get_default_graph()) + + # create a session + sess = tf.Session() + # initialize the variables + sess.run(tf.global_variables_initializer()) + + def action_op(o): + return sess.run([p_means, s_values], feed_dict={obs_ph: o}) + + def action_op_noise(o): + return sess.run([a_sampl, s_values], feed_dict={obs_ph: o}) + + def model_op(o, a, md_idx): + # TODO: Modified code by Simon + mo = sess.run(nobs_pred_m[md_idx], feed_dict={obs_ph: [o], act_ph: [a[0]]}) + return np.squeeze(mo[:, :-1]), np.squeeze(mo[:, -1]) + + def run_model_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew): + # print({'obs_ph': r_obs.shape, 'act_ph': r_act.shape, 'nobs_ph': r_nxt_obs.shape}) + # TODO: Modified code by Simon + r_act = np.squeeze(r_act, axis=2) + # print(r_act.shape) + r_rew = np.reshape(r_rew, (-1, 1)) + # print(r_rew.shape) + return_val = sess.run(m_losses[model_idx], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew}) + return return_val + + def run_model_opt_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew, mb_lr): + # TODO: Modified code by Simon + r_act = np.squeeze(r_act, axis=2) + r_rew = np.reshape(r_rew, (-1, 1)) + return sess.run([m_opts[model_idx], m_losses[model_idx]], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr }) + def model_assign(i, model_variables_to_assign): + ''' + Update the i-th model's parameters + ''' + return sess.run(initialize_models[i], feed_dict={old_model_variables: model_variables_to_assign}) + + def policy_update(obs_batch, act_batch, adv_batch, rtg_batch, it): + # log probabilities, logits and log std of the "old" policy + # "old" policy refer to the policy to optimize and that has been used to sample from the environment + # TODO: Modified code by Simon + act_batch = np.squeeze(act_batch, axis=2) + old_p_log, old_p_means, old_log_std = sess.run([p_log, p_means, log_std], + feed_dict={obs_ph: obs_batch, act_ph: act_batch, + adv_ph: adv_batch, ret_ph: rtg_batch}) + # get also the "old" parameters + old_actor_params = sess.run(p_var_flatten) + if it < 1: + std_vals = sess.run([log_std], feed_dict={log_std: np.ones(act_dim)}) + # print(std_vals) + # old_p_loss is later used in the line search + # run pre_scalar_summary for a summary before the optimization + old_p_loss, summary = sess.run([p_loss, pre_scalar_summary], + feed_dict={obs_ph: obs_batch, act_ph: act_batch, adv_ph: adv_batch, + ret_ph: rtg_batch, old_p_log_ph: old_p_log}) + file_writer.add_summary(summary, step_count) + + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def H_f(p): + ''' + Run the Fisher-Vector product on 'p' to approximate the Hessian of the DKL + ''' + return sess.run(Fx, + feed_dict={old_mu_ph: old_p_means, old_log_std_ph: old_log_std, p_ph: p, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, ret_ph: rtg_batch}) + + g_f = sess.run(p_grads_flatten, + feed_dict={old_mu_ph: old_p_means, obs_ph: obs_batch, act_ph: act_batch, adv_ph: adv_batch, + ret_ph: rtg_batch, old_p_log_ph: old_p_log}) + ## Compute the Conjugate Gradient so to obtain an approximation of H^(-1)*g + # Where H in reality isn't the true Hessian of the KL divergence but an approximation of it computed via Fisher-Vector Product (F) + conj_grad = conjugate_gradient(H_f, g_f, iters=conj_iters) + + # Compute the step length + beta_np = np.sqrt(2 * delta / (1e-10 + np.sum(conj_grad * H_f(conj_grad)))) + + def DKL(alpha_v): + ''' + Compute the KL divergence. + It optimize the function to compute the DKL. Afterwards it restore the old parameters. + ''' + sess.run(p_opt, feed_dict={beta_ph: beta_np, alpha: alpha_v, cg_ph: conj_grad, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, old_p_log_ph: old_p_log}) + a_res = sess.run([dkl_diverg, p_loss], + feed_dict={old_mu_ph: old_p_means, old_log_std_ph: old_log_std, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, ret_ph: rtg_batch, + old_p_log_ph: old_p_log}) + sess.run(restore_params, feed_dict={p_old_variables: old_actor_params}) + return a_res + + # Actor optimization step + # Different for TRPO or NPG + # Backtracing line search to find the maximum alpha coefficient s.t. the constraint is valid + best_alpha = backtracking_line_search(DKL, delta, old_p_loss, p=0.8) + sess.run(p_opt, feed_dict={beta_ph: beta_np, alpha: best_alpha, + cg_ph: conj_grad, obs_ph: obs_batch, act_ph: act_batch, + adv_ph: adv_batch, old_p_log_ph: old_p_log}) + + lb = len(obs_batch) + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + # Value function optimization steps + for _ in range(critic_iter): + # shuffle the batch on every iteration + np.random.shuffle(shuffled_batch) + for idx in range(0, lb, minibatch_size): + minib = shuffled_batch[idx:min(idx + minibatch_size, lb)] + sess.run(v_opt, feed_dict={obs_ph: obs_batch[minib], ret_ph: rtg_batch[minib]}) + + def train_model(tr_obs, tr_act, tr_nxt_obs, tr_rew, v_obs, v_act, v_nxt_obs, v_rew, step_count, model_idx, mb_lr): + + # Get validation loss on the old model + mb_valid_loss1 = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + # Restore the random weights to have a new, clean neural network + model_assign(model_idx, initial_variables_models[model_idx]) + + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + acc_m_losses = [] + last_m_losses = [] + md_params = sess.run(models_variables[model_idx]) + best_mb = {'iter': 0, 'loss': mb_valid_loss, 'params': md_params} + it = 0 + + lb = len(tr_obs) + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + while best_mb['iter'] > it - model_iter: + + # update the model on each mini-batch + last_m_losses = [] + for idx in range(0, lb, model_batch_size): + minib = shuffled_batch[idx:min(idx + minibatch_size, lb)] + + if len(minib) != minibatch_size: + _, ml = run_model_opt_loss(model_idx, tr_obs[minib], tr_act[minib], tr_nxt_obs[minib], + tr_rew[minib], mb_lr=mb_lr) + acc_m_losses.append(ml) + last_m_losses.append(ml) + else: + pass + # print('Warning!') + + # Check if the loss on the validation set has improved + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + if mb_valid_loss < best_mb['loss']: + best_mb['loss'] = mb_valid_loss + best_mb['iter'] = it + best_mb['params'] = sess.run(models_variables[model_idx]) + + it += 1 + # if it>=10000: + # break + # print('iteration: ', it) + + # Restore the model with the lower validation loss + model_assign(model_idx, best_mb['params']) + + print('Model:{}, iter:{} -- Old Val loss:{:.6f} New Val loss:{:.6f} -- New Train loss:{:.6f}'.format(model_idx, + it, + mb_valid_loss1, + best_mb[ + 'loss'], + np.mean( + last_m_losses))) + summary = tf.Summary() + summary.value.add(tag='supplementary/m_loss', simple_value=np.mean(acc_m_losses)) + summary.value.add(tag='supplementary/iterations', simple_value=it) + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def plot_results(env_wrapper, label, **kwargs): + # plotting + print('now plotting...') + rewards = env_wrapper.env.current_buffer.get_data()['rews'] + + # initial_states = env.initial_conditions + + iterations = [] + finals = [] + means = [] + stds = [] + + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 1): + # finals.append(rewards[i][len(rewards[i]) - 1]) + finals.append(rewards[i][-1]) + means.append(np.mean(rewards[i][1:])) + stds.append(np.std(rewards[i][1:])) + iterations.append(len(rewards[i])) + # print(iterations) + x = range(len(iterations)) + iterations = np.array(iterations) + finals = np.array(finals) + means = np.array(means) + stds = np.array(stds) + + plot_suffix = label # , Fermi time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.plot(x, iterations) + ax.set_ylabel('Iterations (1)') + ax.set_title(plot_suffix) + # fig.suptitle(label, fontsize=12) + if 'data_number' in kwargs: + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(x, kwargs.get('data_number'), color=color) + + ax = axs[1] + color = 'blue' + ax.set_ylabel('Final reward', color=color) # we already handled the x-label with ax1 + ax.tick_params(axis='y', labelcolor=color) + ax.plot(x, finals, color=color) + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.fill_between(x, means - stds, means + stds, + alpha=0.5, edgecolor=color, facecolor='#FF9848') + ax1.plot(x, means, color=color) + + # ax.set_ylim(ax1.get_ylim()) + if 'save_name' in kwargs: + plt.savefig(kwargs.get('save_name') + '.pdf') + # fig.tight_layout() + plt.show() + + def plot_observables(data, label, **kwargs): + """plot observables during the test""" + + sim_rewards_all = np.array(data.get('sim_rewards_all')) + step_counts_all = np.array(data.get('step_counts_all')) + batch_rews_all = np.array(data.get('batch_rews_all')) + tests_all = np.array(data.get('tests_all')) + + fig, axs = plt.subplots(2, 1, sharex=True) + x = np.arange(len(batch_rews_all[0])) + ax = axs[0] + ax.step(x, batch_rews_all[0]) + ax.fill_between(x, batch_rews_all[0] - batch_rews_all[1], batch_rews_all[0] + batch_rews_all[1], + alpha=0.5) + ax.set_ylabel('rews per batch') + + ax.set_title(label) + + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('data points', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.step(x, step_counts_all, color=color) + + ax = axs[1] + ax.plot(sim_rewards_all[0], ls=':') + ax.fill_between(x, sim_rewards_all[0] - sim_rewards_all[1], sim_rewards_all[0] + sim_rewards_all[1], + alpha=0.5) + ax.plot(tests_all[0]) + ax.fill_between(x, tests_all[0] - tests_all[1], tests_all[0] + tests_all[1], + alpha=0.5) + ax.set_ylabel('rewards tests vs. ground truth') + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('entropy', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.plot(entropy_all, color=color) + # fig.show() + plt.show() + + def save_data(data, **kwargs): + '''logging functon''' + # if 'directory_name' in kwargs: + # project_directory = kwargs.get('directory_name') + now = datetime.now() + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}' + out_put_writer = open(project_directory + clock_time + '_training_observables', 'wb') + pickle.dump(data, out_put_writer, -1) + out_put_writer.close() + + # variable to store the total number of steps + step_count = 0 + model_buffer = FullBuffer() + print('Env batch size:', steps_per_env, ' Batch size:', steps_per_env * number_envs) + + # Create a simulated environment + sim_env = NetworkEnv(make_env(), model_op, None, num_ensemble_models) + + # Get the initial parameters of each model + # These are used in later epochs when we aim to re-train the models anew with the new dataset + initial_variables_models = [] + for model_var in models_variables: + initial_variables_models.append(sess.run(model_var)) + total_iterations = 0 + + converged_flag = False + # save_data = save_data(clock_time) + sim_rewards_all = [] + sim_rewards_std_all = [] + entropy_all = [] + tests_all = [] + tests_std_all = [] + batch_rews_all = [] + batch_rews_std_all = [] + step_counts_all = [] + + for ep in range(num_epochs): + if (converged_flag): + print('Converged!!!!') + break + # lists to store rewards and length of the trajectories completed + batch_rew = [] + batch_len = [] + print('============================', ep, '============================') + # Execute in serial the environment, storing temporarily the trajectories. + for env in envs: + # Todo: Test randomization stronger if reward lower...we need a good scheme + # target_threshold ????? + init_log_std = np.ones(act_dim) * np.log(np.random.rand() * 1) + env.reset() + + # iterate over a fixed number of steps + steps_train = init_random_steps if ep == 0 else steps_per_env + # steps_train = steps_per_env + for _ in range(steps_train): + # found = False + # while not(found): + # run the policy + + if ep == 0: + # Sample random action during the first epoch + act = np.random.uniform(-1, 1, size=env.action_space.shape[-1]) + + else: + + act = sess.run(a_sampl, feed_dict={obs_ph: [env.n_obs], log_std: init_log_std}) + act = np.clip(act + np.random.randn(act.shape[0], act.shape[1]) * 0.1, -1, 1) + + act = np.squeeze(act) + # print('act', act*12) + # take a step in the environment + obs2, rew, done, _ = env.step(np.array(act)) + + # add the new transition to the temporary buffer + model_buffer.store(env.n_obs.copy(), act, rew.copy(), obs2.copy(), done) + + env.n_obs = obs2.copy() + step_count += 1 + + if done: + batch_rew.append(env.get_episode_reward()) + batch_len.append(env.get_episode_length()) + + env.reset() + init_log_std = np.ones(act_dim) * np.log(np.random.rand() * 1) + + # if ep == 0: + # # try: + # # Initialize randomly a training and validation set + # model_buffer.generate_random_dataset() + # # get both datasets + # train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + # valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + # target_threshold = max(max(valid_rew), max(train_rew)) + # # print('-- '*38, target_threshold) + # found = target_threshold>=-0.1 and step_count>=191 + # # except: + # # pass + + # save the data for plotting the collected data for the model + env.save_current_buffer() + + print('Ep:%d Rew:%.2f -- Step:%d' % (ep, np.mean(batch_rew), step_count)) + + # env_test.env.set_usage('default') + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + ############################################################ + ###################### MODEL LEARNING ###################### + ############################################################ + + # Initialize randomly a training and validation set + model_buffer.generate_random_dataset() + + # get both datasets + train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + std_vals = sess.run(log_std) + print('Log Std policy:', std_vals, np.mean(std_vals)) + + target_threshold = max(max(valid_rew), max(train_rew)) + sim_env.threshold = target_threshold # min(target_threshold, -0.05) + print('maximum: ', sim_env.threshold) + + # Learning rate as function of ep + lr = lambda ep: 1e-3-ep/num_epochs*(1e-3-5e-4) + mb_lr = 1e-3 #if ep<10 else 5e-5 + # simulated_steps = simulated_steps if ep<10 else 10000 + print('mb_lr: ', mb_lr) + for i in range(num_ensemble_models): + # train the dynamic model on the datasets just sampled + train_model(train_obs, train_act, train_nxt_obs, train_rew, valid_obs, valid_act, valid_nxt_obs, valid_rew, + step_count, i, mb_lr=mb_lr) + + ############################################################ + ###################### POLICY LEARNING ###################### + ############################################################ + + best_sim_test = -1e16 * np.ones(num_ensemble_models) + + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + + for it in range(max_training_iterations): + if converged_flag: + break + total_iterations += 1 + print('\t Policy it', it, end='..') + + ##################### MODEL SIMLUATION ##################### + # obs_batch, act_batch, adv_batch, rtg_batch = simulate_environment(sim_env, action_op_noise, simulated_steps) + batch, ep_length = simulate_environment(sim_env, action_op_noise, simulated_steps) + # verification_simulate_environment(sim_env, env_test, action_op_noise, 50) + obs_batch, act_batch, adv_batch, rtg_batch = batch + + ################# TRPO UPDATE ################ + policy_update(obs_batch, act_batch, adv_batch, rtg_batch, it) + std_vals = sess.run(log_std) + print('Log Std policy inner:', np.mean(std_vals)) + if np.mean(std_vals) < -5: + converged_flag = True + # Testing the policy on a real environment + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=1) + # plot_results(env_test, 'ME-TRPO') + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), np.round(mn_length, 2)) + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=1) + # summary = tf.Summary() + # summary.value.add(tag='test/performance', simple_value=mn_test) + # file_writer.add_summary(summary, step_count) + # file_writer.flush() + + # Test the policy on simulated environment. + # dynamic_wait_time_count = dynamic_wait_time(ep) + if dynamic_wait_time(it, ep): + print('Iterations: ', total_iterations) + + + # perform test! ----------------------------- + # env_test.env.set_usage('test') + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=25) + # perform test! ----------------------------- + label = f'Total {total_iterations}, ' + \ + f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + f'ep: {ep}, it: {it}\n' + hyp_str_all + + # plot results of test ----------------------------- + # plot_results(env_test, label=label) + + # env_test.save_current_buffer(info=label) + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), np.round(mn_length, 2)) + + # save the data for plotting the tests + # tests_all.append(mn_test) + # tests_std_all.append(mn_test_std) + + # perform test end! ----------------------------- + env_test.env.set_usage('default') + + + print('Simulated test:', end=' ** ') + + sim_rewards = [] + for i in range(num_ensemble_models): + sim_m_env = NetworkEnv(make_env(), model_op, None, i + 1) + mn_sim_rew, _, _ = test_agent(sim_m_env, action_op, num_games=10) + sim_rewards.append(mn_sim_rew) + print(mn_sim_rew, end=' ** ') + + print("") + + entropy_all.append(np.mean(std_vals)) + step_counts_all.append(step_count) + + sim_rewards = np.array(sim_rewards) + sim_rewards_all.append(np.mean(sim_rewards)) + sim_rewards_std_all.append(np.std(sim_rewards)) + + batch_rews_all.append(np.mean(batch_rew)) + batch_rews_std_all.append(np.std(batch_rew)) + + data = dict(sim_rewards_all=[sim_rewards_all, sim_rewards_std_all], + entropy_all=entropy_all, + step_counts_all=step_counts_all, + batch_rews_all=[batch_rews_all, batch_rews_std_all], + tests_all=[tests_all, tests_std_all], + info=label) + + # save the data for plotting the progress ------------------- + save_data(data=data) + + # plotting the progress ------------------- + # plot_observables(data=data, label=label) + + # stop training if the policy hasn't improved + if (np.sum(best_sim_test >= sim_rewards) > int(num_ensemble_models * 0.7)): + # or (len(sim_rewards[sim_rewards >= 990]) > int(num_ensemble_models * 0.7)): + if it > delay_before_convergence_check and ep 1): + # finals.append(rewards[i][len(rewards[i]) - 1]) + finals.append(rewards[i][-1]) + means.append(np.mean(rewards[i][1:])) + iterations.append(len(rewards[i])) + print(finals) + plot_suffix = label # , Fermi time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.plot(iterations) + ax.set_title(plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + color = 'blue' + ax.set_ylabel('Final reward', color=color) # we already handled the x-label with ax1 + ax.tick_params(axis='y', labelcolor=color) + ax.plot(finals, color=color) + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(means, color=color) + + # ax.set_ylim(ax1.get_ylim()) + if 'save_name' in kwargs: + plt.savefig(kwargs.get('save_name') + '.pdf') + # fig.tight_layout() + plt.show() + +# plot_results(data=object['rews']) + + +filenames = [] +for file in os.listdir(project_directory): + if 'training_observables' in file: + filenames.append(file) + +filenames.sort() + +# filename = '09_25_19_18_04_training_observables' +filename = filenames[-1] +print(filename) + +filehandler = open(project_directory + filename, 'rb') +object = pickle.load(filehandler) + +sim_rewards_all = object['sim_rewards_all'][0] +entropy_all=object['entropy_all'] +step_counts_all=object['step_counts_all'] +batch_rews_all=object['batch_rews_all'][0] +tests_all = object['tests_all'][0] + +fig, axs = plt.subplots(2, 1, sharex=True) +x = np.arange(len(batch_rews_all)) +ax = axs[0] +ax.step(x, batch_rews_all) +ax.set_ylabel('rews per batch') +# plt.tw +ax2 = ax.twinx() + +color = 'lime' +ax2.set_ylabel('data points', color=color) # we already handled the x-label with ax1 +ax2.tick_params(axis='y', labelcolor=color) +ax2.step(x, step_counts_all, color=color) + +ax = axs[1] +ax.plot(sim_rewards_all, ls=':') +ax.plot(tests_all) +ax.set_ylabel('rewards tests vs. ground truth') +# plt.tw +ax2 = ax.twinx() + +color = 'lime' +ax2.set_ylabel('entropy', color=color) # we already handled the x-label with ax1 +ax2.tick_params(axis='y', labelcolor=color) +ax2.plot(entropy_all, color=color) +fig.show() diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/ME_TRPO_stable.py b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/ME_TRPO_stable.py new file mode 100644 index 0000000..8e1ce4d --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/ME_TRPO_stable.py @@ -0,0 +1,1573 @@ +import os +import pickle +from datetime import datetime + +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import tensorflow as tf + +# from local_fel_simulated_env import FelLocalEnv +# from simulated_tango import SimTangoConnection +from laser_trajectory_control_env import LaserTrajectoryControlEnv +from tango_connection import TangoConnection + +# set random seed +random_seed = 111 +np.random.seed(random_seed) + +conf_file = '/home/niky/FERMI/2020_10_06/configuration/conf_fel2.json' +tango = TangoConnection(conf_file=conf_file) +real_env = LaserTrajectoryControlEnv(tango=tango) + +# Hyper papameters +steps_per_env = 15 +init_random_steps = 45 # 50 # 15 +# For niky: You can change the number of steps ------------------------------------------------------------------- +total_steps = 450 # 400 # 300 # +num_epochs = int((total_steps - init_random_steps) / (steps_per_env)) + 1 + +print('Number of epoches: ', num_epochs) + +hidden_sizes = [100, 100] +cr_lr = 1e-4 # try 1e-3 +gamma = 0.9999 +lam = 0.95 + +max_training_iterations = 50 # 25 # 100 +# max_training_iterations = 10 +delay_before_convergence_check = 2 + +number_envs = 1 +critic_iter = 15 +delta = 0.05 +algorithm = 'TRPO' +conj_iters = 15 +minibatch_size = 500 +simulated_steps = 2000 # 2500 # 2000 # 5000 # in case to increase (Niky) + +mb_lr = 1e-3 # 1e-2 # 1e-4 # 1e-3 +model_batch_size = 5 # in case change to 15 or 25 (Niky) +# For niky: You can change the number of models here ------------------------------------------------------------------- +num_ensemble_models = 9 # 1 # 5 +# ---------------------------------------------------------------------------------------------------------------------- +model_iter = 15 + +# How often to check the progress of the network training +# e.g. lambda it, episode: (it + 1) % max(3, (ep+1)*2) == 0 +dynamic_wait_time = lambda it, ep: (it + 1) % 3 == 0 # +# dynamic_wait_time = lambda it, ep: (it + 1) % 1 == 0 # + +# Create the logging directory: +project_directory = 'Data_logging/ME_TRPO_stable/2020_10_06_ME_TRPO_stable@FERMI/' + +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-d_' + str(delta) + '-conj_iters_' + str(conj_iters) + '-n_ep_' + str(num_epochs) + \ + '-mini_bs_' + str(minibatch_size) + '-m_bs_' + str(model_batch_size) + \ + '-mb_lr_' + str(mb_lr) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + '-ensnr_' + str(num_ensemble_models) + '-init_' + str( + init_random_steps) + '/' +project_directory = project_directory + hyp_str_all + +# To label the plots: +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-d_' + str(delta) + '-conj_iters_' + str(conj_iters) + '-n_ep_' + str(num_epochs) + \ + '\n-mini_bs_' + str(minibatch_size) + '-m_bs_' + str(model_batch_size) + \ + '-mb_lr_' + str(mb_lr) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + \ + '\n-ensnr_' + str(num_ensemble_models) +if not os.path.isdir(project_directory): + os.makedirs(project_directory) + print("created folder : ", project_directory) + + +# Class for data storage during the tests +class TrajectoryBuffer(): + '''Class for data storage during the tests''' + + def __init__(self, name, directory): + self.save_frequency = 100000 + self.directory = directory + self.name = name + self.rews = [] + self.obss = [] + self.acts = [] + self.dones = [] + self.info = "" + self.idx = -1 + + def new_trajectory(self, obs): + self.idx += 1 + self.rews.append([]) + self.acts.append([]) + self.obss.append([]) + self.dones.append([]) + self.store_step(obs=obs) + + def store_step(self, obs=None, act=None, rew=None, done=None): + self.rews[self.idx].append(rew) + self.obss[self.idx].append(obs) + self.acts[self.idx].append(act) + self.dones[self.idx].append(done) + + if self.__len__() % self.save_frequency == 0: + self.save_buffer() + + def __len__(self): + assert (len(self.rews) == len(self.obss) == len(self.acts) == len(self.dones)) + return len(self.obss) + + def save_buffer(self, **kwargs): + if 'info' in kwargs: + self.info = kwargs.get('info') + now = datetime.now() + # clock_time = "{}_{}_{}_{}_".format(now.day, now.hour, now.minute, now.second) + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}_' + data = dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + # print('saving...', data) + out_put_writer = open(self.directory + clock_time + self.name, 'wb') + pickle.dump(data, out_put_writer, -1) + # pickle.dump(self.actions, out_put_writer, -1) + out_put_writer.close() + + def get_data(self): + return dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + + +class MonitoringEnv(gym.Wrapper): + ''' + Gym Wrapper to store information for scaling to correct scpace and for post analysis. + ''' + + def __init__(self, env, **kwargs): + gym.Wrapper.__init__(self, env) + self.data_dict = dict() + self.environment_usage = 'default' + self.directory = project_directory + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(self.environment_usage) + + self.test_env_flag = False + if 'test_env' in kwargs: + self.test_env_flag = True + + def reset(self, **kwargs): + init_obs = self.env.reset(**kwargs) + # print('Reset Env: ', (init_obs),10*'-- ') + self.current_buffer.new_trajectory(init_obs) + init_obs = self.scale_state_env(init_obs) + # print('Reset Menv: ', (init_obs)) + return init_obs + + def step(self, action): + # print('a', action) + action = self.descale_action_env(action) + # print('as', action) + ob, reward, done, info = self.env.step(np.squeeze(action)) + # print('Env: ', reward) + # print('Env: ', ob, 'r:', reward, done) + self.current_buffer.store_step(obs=ob, act=action, rew=reward, done=done) + ob = self.scale_state_env(ob) + reward = self.rew_scale(reward) + # print('Menv: ', ob, 'r:', reward, done) + # print('Menv: ', reward) + return ob, reward, done, info + + def set_usage(self, usage): + self.environment_usage = usage + if usage in self.data_dict: + self.current_buffer = self.data_dict.get(usage) + else: + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(usage) + + def close_usage(self, usage): + # Todo: Implement to save complete data + self.current_buffer = self.data_dict.get(usage) + self.current_buffer.save_buffer() + + def scale_state_env(self, ob): + scale = (self.env.observation_space.high - self.env.observation_space.low) + return (2 * ob - (self.env.observation_space.high + self.env.observation_space.low)) / scale + + def descale_action_env(self, act): + scale = (self.env.action_space.high - self.env.action_space.low) + return (scale * act + self.env.action_space.high + self.env.action_space.low) / 2 + + def rew_scale(self, rew): + # we only scale for the network training: + if not self.test_env_flag: + rew = rew * 2 + 1 + # if self.test_env_flag: + # '''Rescale reward from [-1,0] to [-1,1] for the training of the network in case of tests''' + # rew = rew * 2 + 1 + # if rew < -1: + # print('Hallo was geht: ', rew) + # else: + # print('Okay...', rew) + return rew + + def save_current_buffer(self, info=''): + self.current_buffer = self.data_dict.get(self.environment_usage) + self.current_buffer.save_buffer(info=info) + print('Saved current buffer', self.environment_usage) + + def set_directory(self, directory): + self.directory = directory + + +env_monitored = MonitoringEnv(env=real_env) + + +def make_env(**kwargs): + '''Create the environement''' + return MonitoringEnv(env=real_env, **kwargs) + + +def mlp(x, hidden_layers, output_layer, activation, last_activation=None): + ''' + Multi-layer perceptron with init conditions for anchor method + ''' + + for l in hidden_layers: + x = tf.layers.dense(x, units=l, activation=activation) + return tf.layers.dense(x, units=output_layer, activation=last_activation) + + +def softmax_entropy(logits): + ''' + Softmax Entropy + ''' + return -tf.reduce_sum(tf.nn.softmax(logits, axis=-1) * tf.nn.log_softmax(logits, axis=-1), axis=-1) + + +def gaussian_log_likelihood(ac, mean, log_std): + ''' + Gaussian Log Likelihood + ''' + log_p = ((ac - mean) ** 2 / (tf.exp(log_std) ** 2 + 1e-9) + 2 * log_std) + np.log(2 * np.pi) + return -0.5 * tf.reduce_sum(log_p, axis=-1) + + +def conjugate_gradient(A, b, x=None, iters=10): + ''' + Conjugate gradient method: approximate the solution of Ax=b + It solve Ax=b without forming the full matrix, just compute the matrix-vector product (The Fisher-vector product) + NB: A is not the full matrix but is a useful matrix-vector product between the averaged Fisher information matrix and arbitrary vectors + Descibed in Appendix C.1 of the TRPO paper + ''' + if x is None: + x = np.zeros_like(b) + + r = A(x) - b + p = -r + for _ in range(iters): + a = np.dot(r, r) / (np.dot(p, A(p)) + 1e-8) + x += a * p + r_n = r + a * A(p) + b = np.dot(r_n, r_n) / (np.dot(r, r) + 1e-8) + p = -r_n + b * p + r = r_n + return x + + +def gaussian_DKL(mu_q, log_std_q, mu_p, log_std_p): + ''' + Gaussian KL divergence in case of a diagonal covariance matrix + ''' + return tf.reduce_mean(tf.reduce_sum( + 0.5 * (log_std_p - log_std_q + tf.exp(log_std_q - log_std_p) + (mu_q - mu_p) ** 2 / tf.exp(log_std_p) - 1), + axis=1)) + + +def backtracking_line_search(Dkl, delta, old_loss, p=0.8): + ''' + Backtracking line searc. It look for a coefficient s.t. the constraint on the DKL is satisfied + It has both to + - improve the non-linear objective + - satisfy the constraint + ''' + ## Explained in Appendix C of the TRPO paper + a = 1 + it = 0 + + new_dkl, new_loss = Dkl(a) + while (new_dkl > delta) or (new_loss > old_loss): + a *= p + it += 1 + new_dkl, new_loss = Dkl(a) + + return a + + +def GAE(rews, v, v_last, gamma=0.99, lam=0.95): + ''' + Generalized Advantage Estimation + ''' + assert len(rews) == len(v) + vs = np.append(v, v_last) + d = np.array(rews) + gamma * vs[1:] - vs[:-1] + gae_advantage = discounted_rewards(d, 0, gamma * lam) + return gae_advantage + + +def discounted_rewards(rews, last_sv, gamma): + ''' + Discounted reward to go + Parameters: + ---------- + rews: list of rewards + last_sv: value of the last state + gamma: discount value + ''' + rtg = np.zeros_like(rews, dtype=np.float32) + rtg[-1] = rews[-1] + gamma * last_sv + for i in reversed(range(len(rews) - 1)): + rtg[i] = rews[i] + gamma * rtg[i + 1] + return rtg + + +def flatten_list(tensor_list): + ''' + Flatten a list of tensors + ''' + return tf.concat([flatten(t) for t in tensor_list], axis=0) + + +def flatten(tensor): + ''' + Flatten a tensor + ''' + return tf.reshape(tensor, shape=(-1,)) + + +def test_agent(env_test, agent_op, num_games=10): + ''' + Test an agent 'agent_op', 'num_games' times + Return mean and std + ''' + games_r = [] + games_length = [] + games_dones = [] + for _ in range(num_games): + d = False + game_r = 0 + o = env_test.reset() + game_length = 0 + while not d: + a_s, _ = agent_op([o]) + o, r, d, _ = env_test.step(a_s[0]) + game_r += r + game_length += 1 + + games_r.append(game_r) + games_length.append(game_length) + games_dones.append(d) + return np.mean(games_r), np.std(games_r), np.mean(games_length), np.mean(games_dones) + + +class Buffer(): + ''' + Class to store the experience from a unique policy + ''' + + def __init__(self, gamma=0.99, lam=0.95): + self.gamma = gamma + self.lam = lam + self.adv = [] + self.ob = [] + self.ac = [] + self.rtg = [] + + def store(self, temp_traj, last_sv): + ''' + Add temp_traj values to the buffers and compute the advantage and reward to go + Parameters: + ----------- + temp_traj: list where each element is a list that contains: observation, reward, action, state-value + last_sv: value of the last state (Used to Bootstrap) + ''' + # store only if there are temporary trajectories + if len(temp_traj) > 0: + self.ob.extend(temp_traj[:, 0]) + rtg = discounted_rewards(temp_traj[:, 1], last_sv, self.gamma) + self.adv.extend(GAE(temp_traj[:, 1], temp_traj[:, 3], last_sv, self.gamma, self.lam)) + self.rtg.extend(rtg) + self.ac.extend(temp_traj[:, 2]) + + def get_batch(self): + # standardize the advantage values + norm_adv = (self.adv - np.mean(self.adv)) / (np.std(self.adv) + 1e-10) + return np.array(self.ob), np.array(np.expand_dims(self.ac, -1)), np.array(norm_adv), np.array(self.rtg) + + def __len__(self): + assert (len(self.adv) == len(self.ob) == len(self.ac) == len(self.rtg)) + return len(self.ob) + + +class FullBuffer(): + def __init__(self): + self.rew = [] + self.obs = [] + self.act = [] + self.nxt_obs = [] + self.done = [] + + self.train_idx = [] + self.valid_idx = [] + self.idx = 0 + + def store(self, obs, act, rew, nxt_obs, done): + self.rew.append(rew) + self.obs.append(obs) + self.act.append(act) + self.nxt_obs.append(nxt_obs) + self.done.append(done) + + self.idx += 1 + + def generate_random_dataset(self): + rnd = np.arange(len(self.obs)) + np.random.shuffle(rnd) + self.valid_idx = rnd[: int(len(self.obs) / 5)] + self.train_idx = rnd[int(len(self.obs) / 5):] + print('Train set:', len(self.train_idx), 'Valid set:', len(self.valid_idx)) + + def get_training_batch(self): + return np.array(self.obs)[self.train_idx], np.array(np.expand_dims(self.act, -1))[self.train_idx], \ + np.array(self.rew)[self.train_idx], np.array(self.nxt_obs)[self.train_idx], np.array(self.done)[ + self.train_idx] + + def get_valid_batch(self): + return np.array(self.obs)[self.valid_idx], np.array(np.expand_dims(self.act, -1))[self.valid_idx], \ + np.array(self.rew)[self.valid_idx], np.array(self.nxt_obs)[self.valid_idx], np.array(self.done)[ + self.valid_idx] + + def __len__(self): + assert (len(self.rew) == len(self.obs) == len(self.act) == len(self.nxt_obs) == len(self.done)) + return len(self.obs) + + +def simulate_environment(env, policy, simulated_steps): + '''Lists to store rewards and length of the trajectories completed''' + buffer = Buffer(0.99, 0.95) + steps = 0 + number_episodes = 0 + + while steps < simulated_steps: + temp_buf = [] + obs = env.reset() + number_episodes += 1 + done = False + + while not done: + act, val = policy([obs]) + # act = act[0] + obs2, rew, done, _ = env.step([act]) + + temp_buf.append([obs.copy(), rew, np.squeeze(act), np.squeeze(val)]) + + obs = obs2.copy() + steps += 1 + + if done: + buffer.store(np.array(temp_buf), 0) + temp_buf = [] + + if steps == simulated_steps: + break + + buffer.store(np.array(temp_buf), np.squeeze(policy([obs])[1])) + + print('Sim ep:', number_episodes, end=' \n') + + return buffer.get_batch(), number_episodes + + +class NetworkEnv(gym.Wrapper): + ''' + Wrapper to handle the network interaction + ''' + + def __init__(self, env, model_func=None, done_func=None, number_models=1): + gym.Wrapper.__init__(self, env) + + self.model_func = model_func + self.done_func = done_func + self.number_models = number_models + self.len_episode = 0 + # self.threshold = self.env.threshold + # print('the threshold is: ', self.threshold) + self.max_steps = env.max_steps + + def reset(self, **kwargs): + + # self.threshold = -0.05 * 2 + 1 # rescaled [-1,1] + self.len_episode = 0 + self.done = False + # kwargs['simulation'] = True + # action = self.env.reset(**kwargs) + if self.model_func is not None: + #Niky + obs = np.random.uniform(-1, 1, self.env.observation_space.shape) + # print('reset', obs) + # Todo: remove + # NIKY + #obs = self.env.reset() + # obs = self.env.reset() + else: + # obs = self.env.reset(**kwargs) + pass + # Does this work? + # self.obs = np.clip(obs, -1.0, 1.0) + self.obs = obs.copy() + # if self.test_phase: + # print('test reset', self.obs) + # print('Reset : ',self.obs) + return self.obs + + def step(self, action): + if self.model_func is not None: + # predict the next state on a random model + # obs, rew = self.model_func(self.obs, [np.squeeze(action)], np.random.randint(0, self.number_models)) + obs, rew = self.model_func(self.obs, [np.squeeze(action)], np.random.randint(0, self.number_models)) + self.obs = obs.copy() + # NIKY + #obs_real, rew_real, _, _ = self.env.step(action) + # obs, rew, self.done, _ = self.env.step(action) + # print('Diff: ', np.linalg.norm(obs - obs_real), np.linalg.norm(rew - rew_real)) + # print('MEnv: ', np.linalg.norm(obs ), np.linalg.norm(rew )) + # obs += np.random.randn(obs.shape[-1]) + # # Todo: remove + # self.env.state = self.obs + # done = rew > self.threshold + + self.len_episode += 1 + # print('threshold at:', self.threshold) +# For niky hardcoded reward threshold in [-1,1] space from [0,1] -0.05 => 0.9------------------------------------------------------------ + if rew > 0.9: # self.threshold: TODO: to be changed +# ---------------------------------------------------------------------------------------------------------------------- + self.done = True + # print("Done", rew) + if self.len_episode >= self.max_steps: + self.done = True + return self.obs, (rew - 1) / 2, self.done, "" + else: + # self.obs, rew, done, _ = real_env.step(action) + # return self.obs, rew, done, "" + pass + # return env.step(action) + + +class StructEnv(gym.Wrapper): + ''' + Gym Wrapper to store information like number of steps and total reward of the last espisode. + ''' + + def __init__(self, env): + gym.Wrapper.__init__(self, env) + self.n_obs = self.env.reset() + self.total_rew = 0 + self.len_episode = 0 + + def reset(self, **kwargs): + self.n_obs = self.env.reset(**kwargs) + self.total_rew = 0 + self.len_episode = 0 + return self.n_obs.copy() + + def step(self, action): + ob, reward, done, info = self.env.step(action) + # print('reward in struct', reward) + self.total_rew += reward + self.len_episode += 1 + return ob, reward, done, info + + def get_episode_reward(self): + return self.total_rew + + def get_episode_length(self): + return self.len_episode + + +def restore_model(old_model_variables, m_variables): + # variable used as index for restoring the actor's parameters + it_v2 = tf.Variable(0, trainable=False) + + restore_m_params = [] + for m_v in m_variables: + upd_m_rsh = tf.reshape(old_model_variables[it_v2: it_v2 + tf.reduce_prod(m_v.shape)], shape=m_v.shape) + restore_m_params.append(m_v.assign(upd_m_rsh)) + it_v2 += tf.reduce_prod(m_v.shape) + + return tf.group(*restore_m_params) + + +def METRPO(env_name, hidden_sizes=[32, 32], cr_lr=5e-3, num_epochs=50, gamma=0.99, lam=0.95, number_envs=1, + critic_iter=10, steps_per_env=100, delta=0.05, algorithm='TRPO', conj_iters=10, minibatch_size=1000, + mb_lr_start=0.0001, model_batch_size=512, simulated_steps=1000, num_ensemble_models=2, model_iter=15, + init_random_steps=steps_per_env): + ''' + Model Ensemble Trust Region Policy Optimization + The states and actions are provided by the gym environement with the correct boxs. + The reward has to be between [-1,0]. + Parameters: + ----------- + env_name: Name of the environment + hidden_sizes: list of the number of hidden units for each layer + cr_lr: critic learning rate + num_epochs: number of training epochs + gamma: discount factor + lam: lambda parameter for computing the GAE + number_envs: number of "parallel" synchronous environments + # NB: it isn't distributed across multiple CPUs + critic_iter: Number of SGD iterations on the critic per epoch + steps_per_env: number of steps per environment + # NB: the total number of steps per epoch will be: steps_per_env*number_envs + delta: Maximum KL divergence between two policies. Scalar value + algorithm: type of algorithm. Either 'TRPO' or 'NPO' + conj_iters: number of conjugate gradient iterations + minibatch_size: Batch size used to train the critic + mb_lr: learning rate of the environment model + model_batch_size: batch size of the environment model + simulated_steps: number of simulated steps for each policy update + num_ensemble_models: number of models + model_iter: number of iterations without improvement before stopping training the model + ''' + # TODO: add ME-TRPO hyperparameters + + tf.reset_default_graph() + + # Create a few environments to collect the trajectories + + # envs = [StructEnv(gym.make(env_name)) for _ in range(number_envs)] + envs = [StructEnv(make_env()) for _ in range(number_envs)] + env_test = StructEnv(make_env(test_env=True)) + # env_test = gym.make(env_name) + print('env_test' * 4) + + # env_test = make_env(test=True) + # env_test = gym.wrappers.Monitor(env_test, "VIDEOS/", force=True, video_callable=lambda x: x%10 == 0) + # to be changed in real test + # env_test = FelLocalEnv(tango=tango) + # env_test.test = True + # env_test_1 = FelLocalEnv(tango=tango) + # env_test_1.test = True + + # If the scaling is not perfomed this has to be changed + low_action_space = -1 # envs[0].action_space.low + high_action_space = 1 # envs[0].action_space.high + + obs_dim = envs[0].observation_space.shape + act_dim = envs[0].action_space.shape[0] + + # print(envs[0].action_space, envs[0].observation_space, low_action_space, + # high_action_space) + + # Placeholders for model + act_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float32, name='act') + obs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float32, name='obs') + # NEW + nobs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float32, name='nobs') + rew_ph = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='rew') + + ret_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='ret') + adv_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='adv') + old_p_log_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='old_p_log') + old_mu_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float32, name='old_mu') + old_log_std_ph = tf.placeholder(shape=(act_dim), dtype=tf.float32, name='old_log_std') + p_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='p_ph') + + # Placeholder for learning rate + mb_lr_ = tf.placeholder("float", None) # , name='mb_lr') + + # result of the conjugate gradient algorithm + cg_ph = tf.placeholder(shape=(None,), dtype=tf.float32, name='cg') + + ######################################################### + ######################## POLICY ######################### + ######################################################### + + old_model_variables = tf.placeholder(shape=(None,), dtype=tf.float32, name='old_model_variables') + + # Neural network that represent the policy + with tf.variable_scope('actor_nn'): + p_means = mlp(obs_ph, hidden_sizes, act_dim, tf.tanh, last_activation=tf.tanh) + p_means = tf.clip_by_value(p_means, low_action_space, high_action_space) + log_std = tf.get_variable(name='log_std', initializer=np.ones(act_dim, dtype=np.float32)) + + # Neural network that represent the value function + with tf.variable_scope('critic_nn'): + s_values = mlp(obs_ph, hidden_sizes, 1, tf.tanh, last_activation=None) + s_values = tf.squeeze(s_values) + + # Add "noise" to the predicted mean following the Gaussian distribution with standard deviation e^(log_std) + p_noisy = p_means + tf.random_normal(tf.shape(p_means), 0, 1) * tf.exp(log_std) + # Clip the noisy actions + a_sampl = tf.clip_by_value(p_noisy, low_action_space, high_action_space) + + # Compute the gaussian log likelihood + p_log = gaussian_log_likelihood(act_ph, p_means, log_std) + + # Measure the divergence + diverg = tf.reduce_mean(tf.exp(old_p_log_ph - p_log)) + + # ratio + ratio_new_old = tf.exp(p_log - old_p_log_ph) + # TRPO surrogate loss function + p_loss = - tf.reduce_mean(ratio_new_old * adv_ph) + + # MSE loss function + v_loss = tf.reduce_mean((ret_ph - s_values) ** 2) + # Critic optimization + v_opt = tf.train.AdamOptimizer(cr_lr).minimize(v_loss) + + def variables_in_scope(scope): + # get all trainable variables in 'scope' + return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) + + # Gather and flatten the actor parameters + p_variables = variables_in_scope('actor_nn') + p_var_flatten = flatten_list(p_variables) + + # Gradient of the policy loss with respect to the actor parameters + p_grads = tf.gradients(p_loss, p_variables) + p_grads_flatten = flatten_list(p_grads) + + ########### RESTORE ACTOR PARAMETERS ########### + p_old_variables = tf.placeholder(shape=(None,), dtype=tf.float32, name='p_old_variables') + # variable used as index for restoring the actor's parameters + it_v1 = tf.Variable(0, trainable=False) + restore_params = [] + + for p_v in p_variables: + upd_rsh = tf.reshape(p_old_variables[it_v1: it_v1 + tf.reduce_prod(p_v.shape)], shape=p_v.shape) + restore_params.append(p_v.assign(upd_rsh)) + it_v1 += tf.reduce_prod(p_v.shape) + + restore_params = tf.group(*restore_params) + + # gaussian KL divergence of the two policies + dkl_diverg = gaussian_DKL(old_mu_ph, old_log_std_ph, p_means, log_std) + + # Jacobian of the KL divergence (Needed for the Fisher matrix-vector product) + dkl_diverg_grad = tf.gradients(dkl_diverg, p_variables) + + dkl_matrix_product = tf.reduce_sum(flatten_list(dkl_diverg_grad) * p_ph) + print('dkl_matrix_product', dkl_matrix_product.shape) + # Fisher vector product + # The Fisher-vector product is a way to compute the A matrix without the need of the full A + Fx = flatten_list(tf.gradients(dkl_matrix_product, p_variables)) + + ## Step length + beta_ph = tf.placeholder(shape=(), dtype=tf.float32, name='beta') + # NPG update + npg_update = beta_ph * cg_ph + + ## alpha is found through line search + alpha = tf.Variable(1., trainable=False) + # TRPO update + trpo_update = alpha * npg_update + + #################### POLICY UPDATE ################### + # variable used as an index + it_v = tf.Variable(0, trainable=False) + p_opt = [] + # Apply the updates to the policy + for p_v in p_variables: + print(p_v) + upd_rsh = tf.reshape(trpo_update[it_v: it_v + tf.reduce_prod(p_v.shape)], shape=p_v.shape) + p_opt.append(p_v.assign_sub(upd_rsh)) + it_v += tf.reduce_prod(p_v.shape) + + p_opt = tf.group(*p_opt) + + ######################################################### + ######################### MODEL ######################### + ######################################################### + + # Create a new class for the model: + # NN class + class NN: + def __init__(self, x, y, y_dim, hidden_size, n, learning_rate, init_params): + self.init_params = init_params + + # set up NN + self.inputs = x + self.y_target = y + + self.layer_1_w = tf.layers.Dense(hidden_size, + activation=tf.nn.tanh, # try using tf.nn.relu, tf.erf, tf.nn.tanh etc. + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_w')), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_b'))) + + self.layer_1 = self.layer_1_w.apply(self.inputs) + + self.layer_2_w = tf.layers.Dense(hidden_size, + activation=tf.nn.tanh, # try using tf.nn.relu, tf.erf, tf.nn.tanh etc. + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_w')), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_b'))) + + self.layer_2 = self.layer_2_w.apply(self.layer_1) + + self.output_w = tf.layers.Dense(y_dim, + activation=None, use_bias=False, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_2_w'))) + + self.output = self.output_w.apply(self.layer_2) + + # set up loss and optimiser - we'll modify this later with anchoring regularisation + self.opt_method = tf.train.AdamOptimizer(learning_rate) + self.mse_ = tf.cast(1 / tf.shape(self.inputs, out_type=tf.int64)[0], tf.float32) * \ + tf.reduce_sum(tf.square(self.y_target - self.output)) + self.loss_ = tf.cast(1 / tf.shape(self.inputs, out_type=tf.int64)[0], tf.float32) * \ + tf.reduce_sum(tf.square(self.y_target - self.output)) + self.optimizer = self.opt_method.minimize(self.loss_) + self.optimizer_mse = self.opt_method.minimize(self.mse_) + # return self.mse_ + + def get_weights(self): + '''method to return current params''' + + ops = [self.layer_1_w.kernel, self.layer_1_w.bias, + self.layer_2_w.kernel, self.layer_2_w.bias, + self.output_w.kernel] + w1, b1, w2, b2, w = sess.run(ops) + + return w1, b1, w2, b2, w + + def anchor(self, lambda_anchor): + '''regularise around initialised parameters''' + + w1, b1, w2, b2, w = self.get_weights() + + # get initial params + self.w1_init, self.b1_init, self.w2_init, self.b2_init, self.w_out_init = w1, b1, w2, b2, w + + loss_anchor = lambda_anchor[0] * tf.reduce_sum(tf.square(self.w1_init - self.layer_1_w.kernel)) + loss_anchor += lambda_anchor[1] * tf.reduce_sum(tf.square(self.b1_init - self.layer_1_w.bias)) + + loss_anchor = lambda_anchor[0] * tf.reduce_sum(tf.square(self.w2_init - self.layer_2_w.kernel)) + loss_anchor += lambda_anchor[1] * tf.reduce_sum(tf.square(self.b2_init - self.layer_2_w.bias)) + + loss_anchor += lambda_anchor[2] * tf.reduce_sum(tf.square(self.w_out_init - self.output_w.kernel)) + + # combine with original loss + # norm_val = 1/tf.shape(self.inputs)[0] + self.loss_ = self.loss_ + tf.scalar_mul(tf.cast(1 / tf.shape(self.inputs)[0], tf.float32), loss_anchor) + # self.loss_ = self.loss_ + tf.scalar_mul(1 / 1000, loss_anchor) + self.optimizer = self.opt_method.minimize(self.loss_) + return self.optimizer, self.loss_ + + m_opts = [] + m_losses = [] + + nobs_pred_m = [] + act_obs = tf.concat([obs_ph, act_ph], 1) + target = tf.concat([nobs_ph, rew_ph], 1) + + # Set the priors for the anchor method: + # TODO: How to set these correctly? + init_params = dict(init_stddev_1_w=np.sqrt(100), + init_stddev_1_b=np.sqrt(100), + init_stddev_2_w=1.0 / np.sqrt(100)) + + data_noise = 0.01 # estimated noise variance + lambda_anchor = data_noise / (np.array([init_params['init_stddev_1_w'], + init_params['init_stddev_1_b'], + init_params['init_stddev_2_w']]) ** 2) + # computational graph of N models and the correct losses for the anchor method + m_classes = [] + for i in range(num_ensemble_models): + with tf.variable_scope('model_' + str(i) + '_nn'): + # TODO: Add variable size of network + hidden_sizes = [100, 100] + nobs_pred = mlp(x=act_obs, hidden_layers=hidden_sizes, output_layer=obs_dim[0] + 1, + activation=tf.tanh, last_activation=tf.tanh) + # m_class = NN(x=act_obs, y=target, y_dim=obs_dim[0] + 1, + # learning_rate=1e-4, n=i, + # hidden_size=100, init_params=init_params) + + # nobs_pred = m_class.output + + nobs_pred_m.append(nobs_pred) + + m_loss = tf.reduce_mean((tf.concat([nobs_ph, rew_ph], 1) - nobs_pred) ** 2) + m_opts.append(tf.train.AdamOptimizer(learning_rate=mb_lr_).minimize(m_loss)) + m_losses.append(m_loss) + + # m_classes.append(m_class) + # m_losses.append(m_class.mse_) + # m_opts.append(m_class.optimizer_mse) + + ##################### RESTORE MODEL ###################### + initialize_models = [] + models_variables = [] + for i in range(num_ensemble_models): + m_variables = variables_in_scope('model_' + str(i) + '_nn') + initialize_models.append(restore_model(old_model_variables, m_variables)) + # List of weights as numpy + models_variables.append(flatten_list(m_variables)) + + ######################################################### + ##################### END MODEL ######################### + ######################################################### + # Time + now = datetime.now() + clock_time = "{}_{}_{}_{}".format(now.day, now.hour, now.minute, now.second) + print('Time:', clock_time) + + # Set scalars and hisograms for TensorBoard + tf.summary.scalar('p_loss', p_loss, collections=['train']) + tf.summary.scalar('v_loss', v_loss, collections=['train']) + tf.summary.scalar('p_divergence', diverg, collections=['train']) + tf.summary.scalar('ratio_new_old', tf.reduce_mean(ratio_new_old), collections=['train']) + tf.summary.scalar('dkl_diverg', dkl_diverg, collections=['train']) + tf.summary.scalar('alpha', alpha, collections=['train']) + tf.summary.scalar('beta', beta_ph, collections=['train']) + tf.summary.scalar('p_std_mn', tf.reduce_mean(tf.exp(log_std)), collections=['train']) + tf.summary.scalar('s_values_mn', tf.reduce_mean(s_values), collections=['train']) + tf.summary.histogram('p_log', p_log, collections=['train']) + tf.summary.histogram('p_means', p_means, collections=['train']) + tf.summary.histogram('s_values', s_values, collections=['train']) + tf.summary.histogram('adv_ph', adv_ph, collections=['train']) + tf.summary.histogram('log_std', log_std, collections=['train']) + scalar_summary = tf.summary.merge_all('train') + + tf.summary.scalar('old_v_loss', v_loss, collections=['pre_train']) + tf.summary.scalar('old_p_loss', p_loss, collections=['pre_train']) + pre_scalar_summary = tf.summary.merge_all('pre_train') + + hyp_str = '-spe_' + str(steps_per_env) + '-envs_' + str(number_envs) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-delta_' + str(delta) + '-conj_iters_' + str(conj_iters) + + file_writer = tf.summary.FileWriter('log_dir/' + env_name + '/' + algorithm + '_' + clock_time + '_' + hyp_str, + tf.get_default_graph()) + + ################################################################################################# + # Session start!!!!!!!! + # create a session + sess = tf.Session() + # initialize the variables + sess.run(tf.global_variables_initializer()) + + def action_op(o): + return sess.run([p_means, s_values], feed_dict={obs_ph: o}) + + def action_op_noise(o): + return sess.run([a_sampl, s_values], feed_dict={obs_ph: o}) + + def model_op(o, a, md_idx): + mo = sess.run(nobs_pred_m[md_idx], feed_dict={obs_ph: [o], act_ph: [a[0]]}) + return np.squeeze(mo[:, :-1]), np.squeeze(mo[:, -1]) + + def run_model_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew): + # print({'obs_ph': r_obs.shape, 'act_ph': r_act.shape, 'nobs_ph': r_nxt_obs.shape}) + r_act = np.squeeze(r_act, axis=2) + # print(r_act.shape) + r_rew = np.reshape(r_rew, (-1, 1)) + # print(r_rew.shape) + return_val = sess.run(m_losses[model_idx], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew}) + return return_val + + def run_model_opt_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew, mb_lr): + r_act = np.squeeze(r_act, axis=2) + r_rew = np.reshape(r_rew, (-1, 1)) + return sess.run([m_opts[model_idx], m_losses[model_idx]], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr}) + # return sess.run([m_opts_anchor[model_idx], m_loss_anchor[model_idx]], + # feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr}) + + def model_assign(i, model_variables_to_assign): + ''' + Update the i-th model's parameters + ''' + return sess.run(initialize_models[i], feed_dict={old_model_variables: model_variables_to_assign}) + + # def anchor(model_idx): + # m_classes[model_idx].anchor(lambda_anchor=lambda_anchor) + + def policy_update(obs_batch, act_batch, adv_batch, rtg_batch, it): + # log probabilities, logits and log std of the "old" policy + # "old" policy refer to the policy to optimize and that has been used to sample from the environment + act_batch = np.squeeze(act_batch, axis=2) + old_p_log, old_p_means, old_log_std = sess.run([p_log, p_means, log_std], + feed_dict={obs_ph: obs_batch, act_ph: act_batch, + adv_ph: adv_batch, ret_ph: rtg_batch}) + # get also the "old" parameters + old_actor_params = sess.run(p_var_flatten) + if it < 1: + std_vals = sess.run([log_std], feed_dict={log_std: np.ones(act_dim)}) + # print(std_vals) + # old_p_loss is later used in the line search + # run pre_scalar_summary for a summary before the optimization + old_p_loss, summary = sess.run([p_loss, pre_scalar_summary], + feed_dict={obs_ph: obs_batch, act_ph: act_batch, adv_ph: adv_batch, + ret_ph: rtg_batch, old_p_log_ph: old_p_log}) + file_writer.add_summary(summary, step_count) + + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def H_f(p): + ''' + Run the Fisher-Vector product on 'p' to approximate the Hessian of the DKL + ''' + return sess.run(Fx, + feed_dict={old_mu_ph: old_p_means, old_log_std_ph: old_log_std, p_ph: p, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, ret_ph: rtg_batch}) + + g_f = sess.run(p_grads_flatten, + feed_dict={old_mu_ph: old_p_means, obs_ph: obs_batch, act_ph: act_batch, adv_ph: adv_batch, + ret_ph: rtg_batch, old_p_log_ph: old_p_log}) + ## Compute the Conjugate Gradient so to obtain an approximation of H^(-1)*g + # Where H in reality isn't the true Hessian of the KL divergence but an approximation of it computed via Fisher-Vector Product (F) + conj_grad = conjugate_gradient(H_f, g_f, iters=conj_iters) + + # Compute the step length + beta_np = np.sqrt(2 * delta / (1e-10 + np.sum(conj_grad * H_f(conj_grad)))) + + def DKL(alpha_v): + ''' + Compute the KL divergence. + It optimize the function to compute the DKL. Afterwards it restore the old parameters. + ''' + sess.run(p_opt, feed_dict={beta_ph: beta_np, alpha: alpha_v, cg_ph: conj_grad, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, old_p_log_ph: old_p_log}) + a_res = sess.run([dkl_diverg, p_loss], + feed_dict={old_mu_ph: old_p_means, old_log_std_ph: old_log_std, obs_ph: obs_batch, + act_ph: act_batch, adv_ph: adv_batch, ret_ph: rtg_batch, + old_p_log_ph: old_p_log}) + sess.run(restore_params, feed_dict={p_old_variables: old_actor_params}) + return a_res + + # Actor optimization step + # Different for TRPO or NPG + # Backtracing line search to find the maximum alpha coefficient s.t. the constraint is valid + best_alpha = backtracking_line_search(DKL, delta, old_p_loss, p=0.8) + sess.run(p_opt, feed_dict={beta_ph: beta_np, alpha: best_alpha, + cg_ph: conj_grad, obs_ph: obs_batch, act_ph: act_batch, + adv_ph: adv_batch, old_p_log_ph: old_p_log}) + + lb = len(obs_batch) + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + # Value function optimization steps + for _ in range(critic_iter): + # shuffle the batch on every iteration + np.random.shuffle(shuffled_batch) + for idx in range(0, lb, minibatch_size): + minib = shuffled_batch[idx:min(idx + minibatch_size, lb)] + sess.run(v_opt, feed_dict={obs_ph: obs_batch[minib], ret_ph: rtg_batch[minib]}) + + def train_model(tr_obs, tr_act, tr_nxt_obs, tr_rew, v_obs, v_act, v_nxt_obs, v_rew, step_count, model_idx, mb_lr): + + # Get validation loss on the old model only used for monitoring + mb_valid_loss1 = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + # Restore the initial random weights to have a new, clean neural network + # initial_variables_models - list stored before already in the code below - + # important for the anchor method + model_assign(model_idx, initial_variables_models[model_idx]) + + # Get validation loss on the now initialized model + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + acc_m_losses = [] + last_m_losses = [] + md_params = sess.run(models_variables[model_idx]) + best_mb = {'iter': 0, 'loss': mb_valid_loss, 'params': md_params} + it = 0 + + # Create mini-batch for training + lb = len(tr_obs) + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + # Run until the number of model_iter has passed from the best val loss at it on... + while best_mb['iter'] > it - model_iter: + + # update the model on each mini-batch + last_m_losses = [] + for idx in range(0, lb, model_batch_size): + minib = shuffled_batch[idx:min(idx + minibatch_size, lb)] + + if len(minib) != minibatch_size: + _, ml = run_model_opt_loss(model_idx, tr_obs[minib], tr_act[minib], tr_nxt_obs[minib], + tr_rew[minib], mb_lr=mb_lr) + acc_m_losses.append(ml) + last_m_losses.append(ml) + else: + pass + # print('Warning!') + + # Check if the loss on the validation set has improved + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + if mb_valid_loss < best_mb['loss']: + best_mb['loss'] = mb_valid_loss + best_mb['iter'] = it + # store the parameters to the array + best_mb['params'] = sess.run(models_variables[model_idx]) + + it += 1 + # if it>=10000: + # break + # print('iteration: ', it) + + # Restore the model with the lower validation loss + model_assign(model_idx, best_mb['params']) + + print('Model:{}, iter:{} -- Old Val loss:{:.6f} New Val loss:{:.6f} -- New Train loss:{:.6f}'.format(model_idx, + it, + mb_valid_loss1, + best_mb[ + 'loss'], + np.mean( + last_m_losses))) + summary = tf.Summary() + summary.value.add(tag='supplementary/m_loss', simple_value=np.mean(acc_m_losses)) + summary.value.add(tag='supplementary/iterations', simple_value=it) + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def plot_results(env_wrapper, label, **kwargs): + # plotting + print('now plotting...') + rewards = env_wrapper.env.current_buffer.get_data()['rews'] + + # initial_states = env.initial_conditions + + iterations = [] + finals = [] + means = [] + stds = [] + + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 1): + # finals.append(rewards[i][len(rewards[i]) - 1]) + finals.append(rewards[i][-1]) + means.append(np.mean(rewards[i][1:])) + stds.append(np.std(rewards[i][1:])) + iterations.append(len(rewards[i])) + # print(iterations) + x = range(len(iterations)) + iterations = np.array(iterations) + finals = np.array(finals) + means = np.array(means) + stds = np.array(stds) + + plot_suffix = label # , Fermi time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.plot(x, iterations) + ax.set_ylabel('Iterations (1)') + ax.set_title(plot_suffix) + # fig.suptitle(label, fontsize=12) + if 'data_number' in kwargs: + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(x, kwargs.get('data_number'), color=color) + + ax = axs[1] + color = 'blue' + ax.set_ylabel('Final reward', color=color) # we already handled the x-label with ax1 + ax.tick_params(axis='y', labelcolor=color) + ax.plot(x, finals, color=color) + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.fill_between(x, means - stds, means + stds, + alpha=0.5, edgecolor=color, facecolor='#FF9848') + ax1.plot(x, means, color=color) + + # ax.set_ylim(ax1.get_ylim()) + if 'save_name' in kwargs: + plt.savefig(kwargs.get('save_name') + '.pdf') + # fig.tight_layout() + plt.show() + + def plot_observables(data, label, **kwargs): + """plot observables during the test""" + + sim_rewards_all = np.array(data.get('sim_rewards_all')) + step_counts_all = np.array(data.get('step_counts_all')) + batch_rews_all = np.array(data.get('batch_rews_all')) + tests_all = np.array(data.get('tests_all')) + + fig, axs = plt.subplots(2, 1, sharex=True) + x = np.arange(len(batch_rews_all[0])) + ax = axs[0] + ax.step(x, batch_rews_all[0]) + ax.fill_between(x, batch_rews_all[0] - batch_rews_all[1], batch_rews_all[0] + batch_rews_all[1], + alpha=0.5) + ax.set_ylabel('rews per batch') + + ax.set_title(label) + + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('data points', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.step(x, step_counts_all, color=color) + + ax = axs[1] + ax.plot(sim_rewards_all[0], ls=':') + ax.fill_between(x, sim_rewards_all[0] - sim_rewards_all[1], sim_rewards_all[0] + sim_rewards_all[1], + alpha=0.5) + try: + ax.plot(tests_all[0]) + ax.fill_between(x, tests_all[0] - tests_all[1], tests_all[0] + tests_all[1], + alpha=0.5) + except: + pass + ax.set_ylabel('rewards tests') + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('entropy', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.plot(entropy_all, color=color) + plt.show() + + def save_data(data, **kwargs): + '''logging functon''' + # if 'directory_name' in kwargs: + # project_directory = kwargs.get('directory_name') + now = datetime.now() + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}' + out_put_writer = open(project_directory + clock_time + '_training_observables', 'wb') + pickle.dump(data, out_put_writer, -1) + out_put_writer.close() + + # variable to store the total number of steps + step_count = 0 + model_buffer = FullBuffer() + print('Env batch size:', steps_per_env, ' Batch size:', steps_per_env * number_envs) + + # Create a simulated environment + sim_env = NetworkEnv(make_env(), model_op, None, num_ensemble_models) + + # ------------------------------------------------------------------------------------------------------ + # -------------------------------------Try to set correct anchors--------------------------------------- + # Get the initial parameters of each model + # These are used in later epochs when we aim to re-train the models anew with the new dataset + initial_variables_models = [] + for model_var in models_variables: + initial_variables_models.append(sess.run(model_var)) + + # update the anchor model losses: + # m_opts_anchor = [] + # m_loss_anchor = [] + # for i in range(num_ensemble_models): + # opt, loss = m_classes[i].anchor(lambda_anchor=lambda_anchor) + # m_opts_anchor.append(opt) + # m_loss_anchor.append(loss) + + # ------------------------------------------------------------------------------------------------------ + # -------------------------------------Try to set correct anchors--------------------------------------- + + total_iterations = 0 + + converged_flag = False + # save_data = save_data(clock_time) + sim_rewards_all = [] + sim_rewards_std_all = [] + entropy_all = [] + tests_all = [] + tests_std_all = [] + batch_rews_all = [] + batch_rews_std_all = [] + step_counts_all = [] + for ep in range(num_epochs): + if (converged_flag): + print('Converged!!!!') + break + # lists to store rewards and length of the trajectories completed + batch_rew = [] + batch_len = [] + print('============================', ep, '============================') + # Execute in serial the environment, storing temporarily the trajectories. + for env in envs: + # Todo: Test randomization stronger if reward lower...we need a good scheme + # target_threshold ????? + init_log_std = np.ones(act_dim) * np.log(np.random.rand() * 1) + env.reset() + + # iterate over a fixed number of steps + steps_train = init_random_steps if ep == 0 else steps_per_env + # steps_train = steps_per_env + for _ in range(steps_train): + # found = False + # while not(found): + # run the policy + + if ep == 0: + # Sample random action during the first epoch + act = np.random.uniform(-1, 1, size=env.action_space.shape[-1]) + + else: + + act = sess.run(a_sampl, feed_dict={obs_ph: [env.n_obs], log_std: init_log_std}) + # act = np.clip(act + np.random.randn(act.shape[0], act.shape[1]) * 0.1, -1, 1) + + act = np.squeeze(act) + # print('act', act*12) + # take a step in the environment + obs2, rew, done, _ = env.step(np.array(act)) + + # add the new transition to the temporary buffer + model_buffer.store(env.n_obs.copy(), act, rew.copy(), obs2.copy(), done) + + env.n_obs = obs2.copy() + step_count += 1 + + if done: + batch_rew.append(env.get_episode_reward()) + batch_len.append(env.get_episode_length()) + + env.reset() + init_log_std = np.ones(act_dim) * np.log(np.random.rand() * 1) + + # if ep == 0: + # # try: + # # Initialize randomly a training and validation set + # model_buffer.generate_random_dataset() + # # get both datasets + # train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + # valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + # target_threshold = max(max(valid_rew), max(train_rew)) + # # print('-- '*38, target_threshold) + # found = target_threshold>=-0.1 and step_count>=191 + # # except: + # # pass + + # save the data for plotting the collected data for the model + env.save_current_buffer() + + print('Ep:%d Rew:%.2f -- Step:%d' % (ep, np.mean(batch_rew), step_count)) + + # env_test.env.set_usage('default') + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + ############################################################ + ###################### MODEL LEARNING ###################### + ############################################################ + + # Initialize randomly a training and validation set + model_buffer.generate_random_dataset() + + # get both datasets + train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + std_vals = sess.run(log_std) + print('Log Std policy:', std_vals, np.mean(std_vals)) + + target_threshold = max(max(valid_rew), max(train_rew)) + sim_env.threshold = target_threshold # min(target_threshold, -0.05) + print('maximum: ', sim_env.threshold) + + # Learning rate as function of ep + lr = lambda ep: 1e-3 - ep / num_epochs * (1e-3 - 5e-4) + # for niky learning rate ----------------- + mb_lr = 1e-3 # 1e-2 # 1e-4 # 1e-3 # if ep < 1 else 1e-3 + # simulated_steps = simulated_steps if ep<10 else 10000 + print('mb_lr: ', mb_lr) + for i in range(num_ensemble_models): + # train the dynamic model on the datasets just sampled + train_model(train_obs, train_act, train_nxt_obs, train_rew, valid_obs, valid_act, valid_nxt_obs, valid_rew, + step_count, i, mb_lr=mb_lr) + + ############################################################ + ###################### POLICY LEARNING ###################### + ############################################################ + + best_sim_test = -1e16 * np.ones(num_ensemble_models) + + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + + for it in range(max_training_iterations): + if converged_flag: + break + total_iterations += 1 + print('\t Policy it', it, end='..') + + ##################### MODEL SIMLUATION ##################### + # obs_batch, act_batch, adv_batch, rtg_batch = simulate_environment(sim_env, action_op_noise, simulated_steps) + batch, ep_length = simulate_environment(sim_env, action_op_noise, simulated_steps) + # verification_simulate_environment(sim_env, env_test, action_op_noise, 50) + obs_batch, act_batch, adv_batch, rtg_batch = batch + + ################# TRPO UPDATE ################ + policy_update(obs_batch, act_batch, adv_batch, rtg_batch, it) + std_vals = sess.run(log_std) + print('Log Std policy inner:', np.mean(std_vals)) + if np.mean(std_vals) < -5: + converged_flag = True + # Testing the policy on a real environment + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=1) + # plot_results(env_test, 'ME-TRPO') + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), np.round(mn_length, 2)) + # mn_test, mn_test_std, mn_length = test_agent(env_test, action_op, num_games=1) + # summary = tf.Summary() + # summary.value.add(tag='test/performance', simple_value=mn_test) + # file_writer.add_summary(summary, step_count) + # file_writer.flush() + + # Test the policy on simulated environment. + # dynamic_wait_time_count = dynamic_wait_time(ep) + if dynamic_wait_time(it, ep): + print('Iterations: ', total_iterations) + + # for niky perform test! ----------------------------- + # env_test.env.set_usage('test') + # mn_test, mn_test_std, mn_length, mn_success = test_agent(env_test, action_op, num_games=50) + # perform test! ----------------------------- + label = f'Total {total_iterations}, ' + \ + f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + f'ep: {ep}, it: {it}\n' + hyp_str_all + + # for niky plot results of test ----------------------------- + # plot_results(env_test, label=label) + + # env_test.save_current_buffer(info=label) + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), + # np.round(mn_length, 2), np.round(mn_success, 2)) + # + # # save the data for plotting the tests + # tests_all.append(mn_test) + # tests_std_all.append(mn_test_std) + + # perform test end! ----------------------------- + env_test.env.set_usage('default') + + print('Simulated test:', end=' ** ') + + sim_rewards = [] + for i in range(num_ensemble_models): + sim_m_env = NetworkEnv(make_env(), model_op, None, i + 1) + mn_sim_rew, _, _, _ = test_agent(sim_m_env, action_op, num_games=100) + sim_rewards.append(mn_sim_rew) + print(mn_sim_rew, end=' ** ') + + print("") + + entropy_all.append(np.mean(std_vals)) + step_counts_all.append(step_count) + + sim_rewards = np.array(sim_rewards) + sim_rewards_all.append(np.mean(sim_rewards)) + sim_rewards_std_all.append(np.std(sim_rewards)) + + batch_rews_all.append(np.mean(batch_rew)) + batch_rews_std_all.append(np.std(batch_rew)) + + data = dict(sim_rewards_all=[sim_rewards_all, sim_rewards_std_all], + entropy_all=entropy_all, + step_counts_all=step_counts_all, + batch_rews_all=[batch_rews_all, batch_rews_std_all], + tests_all=[tests_all, tests_std_all], + info=label) + + # save the data for plotting the progress ------------------- + save_data(data=data) + + # plotting the progress ------------------- + # plot_observables(data=data, label=label) + + # stop training if the policy hasn't improved + if (np.sum(best_sim_test >= sim_rewards) > int(num_ensemble_models * 0.7)): + # or (len(sim_rewards[sim_rewards >= 990]) > int(num_ensemble_models * 0.7)): + if it > delay_before_convergence_check and ep < num_epochs - 1: + # Test the entropy measure as convergence criterion + # if np.diff(entropy_all)[-1] < 0: + # print('break') + # break + print('break') + break + else: + best_sim_test = sim_rewards + # Final verification: + # env_final = FelLocalEnv(tango=tango) + # env_final.test = True + # env.TOTAL_COUNTER = len(model_buffer.train_idx) + len(model_buffer.valid_idx) + # mn_test, mn_test_std, mn_length = test_agent(env_final, action_op, num_games=100) + # plot_results(env_final, 'ME-TRPO', save_name='Fermi') + + env_test.env.set_usage('final') + mn_test, mn_test_std, mn_length, _ = test_agent(env_test, action_op, num_games=50) + + label = f'Verification : total {total_iterations}, ' + \ + f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + f'ep: {ep}, it: {it}\n' + \ + f'rew: {mn_test}, std: {mn_test_std}' + plot_results(env_test, label=label) + + env_test.save_current_buffer(info=label) + + # env_test.env.set_usage('default') + + # closing environments.. + for env in envs: + env.close() + file_writer.close() + + +if __name__ == '__main__': + METRPO('', hidden_sizes=hidden_sizes, cr_lr=cr_lr, gamma=gamma, lam=lam, num_epochs=num_epochs, + steps_per_env=steps_per_env, + number_envs=number_envs, critic_iter=critic_iter, delta=delta, algorithm='TRPO', conj_iters=conj_iters, + minibatch_size=minibatch_size, + mb_lr_start=mb_lr, model_batch_size=model_batch_size, simulated_steps=simulated_steps, + num_ensemble_models=num_ensemble_models, model_iter=model_iter, init_random_steps=init_random_steps) + # plot the results + +# important notes: +# Storage +# Hyperparameters +# Scaling + +# Changes: +# No init steps and less step per env 31 instead of 51 and the number of iterations is dynamic + diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/laser_trajectory_control_env.py b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/laser_trajectory_control_env.py new file mode 100644 index 0000000..ea7fe4c --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/laser_trajectory_control_env.py @@ -0,0 +1,240 @@ +import numpy as np +import gym + +# from tango_connection import TangoConnection + +class LaserTrajectoryControlEnv(gym.Env): + + def __init__(self, tango, **kwargs): + self.init_rewards = [] + self.done = False + self.current_length = 0 + self.__name__ = 'LaserTrajectoryControlEnv' + + self.curr_episode = -1 + self.TOTAL_COUNTER = -1 + self.rewards = [] + self.actions = [] + self.states = [] + self.dones = [] + self.initial_conditions = [] + + self.max_length = 25 + self.max_steps = 10 + + # + self.tango = tango + + # some information from tango + self.system = self.tango.system + + self.state_size = self.tango.state_size + self.action_size = self.tango.action_size + + self.init_state = self.tango.init_state + self.init_intensity = self.tango.init_intensity + + # scaling factor definition + if 'half_range' in kwargs: + self.half_range = kwargs.get('half_range') + else: + self.half_range = 3000 + if self.system == 'eos': + self.half_range = 30000 # 30000 + + self.state_range = self.get_range() + self.state_scale = 2 * self.half_range + + # target intensity + if 'target_intensity' in kwargs: + self.target_intensity = kwargs.get('target_intensity') + else: + self.target_intensity = self.init_intensity + + # state, intensity and reward definition + self.init_state_norm = self.scale(self.init_state) + self.init_intensity_norm = self.get_intensity() + self.state = self.init_state_norm.copy() + self.intensity = self.init_intensity_norm.copy() + self.reward = self.get_reward() + + ## max action allowed + if 'max_action' in kwargs: + max_action = kwargs.get('max_action') + else: + max_action = 500 + # bigger max_action... evalueate if the size is correct! + # max_action = 6000 # 3000 + if self.system == 'eos': + max_action = 5000 # 2500 # 5000 + + self.max_action = max_action/self.state_scale + + # observation space definition + self.observation_space = gym.spaces.Box(low=0.0, #+ self.max_action, + high=1.0, #- self.max_action, + shape=(self.state_size,), + dtype=np.float64) + + # action spacec definition + self.action_space = gym.spaces.Box(low=-self.max_action, + high=self.max_action, + shape=(self.action_size,), + dtype=np.float64) + + self.test = False + + def get_range(self): + # define the available state space + state_range = np.c_[self.init_state - self.half_range, self.init_state + self.half_range] + return state_range + + def scale(self, state): + # scales the state from state_range values to [0, 1] + state_scaled = (state - self.state_range[:, 0]) / self.state_scale + return state_scaled + + def descale(self, state): + # descales the state from [0, 1] to state_range values + state_descaled = state * self.state_scale + self.state_range[:, 0] + return state_descaled + + def set_state(self, state): + # writes descaled state + state_descaled = self.descale(state) + self.tango.set_state(state_descaled) + + def get_state(self): + # read scaled state + state = self.tango.get_state() + state_scaled = self.scale(state) + return state_scaled + + def norm_intensity(self, intensity): + # normalize the intensity with respect to target_intensity + intensity_norm = intensity/self.target_intensity + return intensity_norm + + def get_intensity(self): + # read normalized intensity + intensity = self.tango.get_intensity() + intensity_norm = self.norm_intensity(intensity) + return intensity_norm + + def step(self, action): + # step method + self.current_length += 1 + state, reward = self.take_action(action) + + intensity = self.get_intensity() + if intensity > 0.95: + self.done = True + + #elif self.current_length >= self.max_length: + elif self.current_length >= self.max_steps: + self.done = True + self.add_trajectory_data(state=state, action=action, reward=reward, done=self.done) + + print('step', self.current_length,'state ', state, 'a ', action, 'r ', reward) + # self.rewards[self.curr_episode].append(reward) + + return state, reward, self.done, {} + + def take_action(self, action): + # initial value: action /= 12 (maybe too small) + # action /= 12 + # take action method + new_state = self.state + action + + # state must remain in [0, 1] + if any(np.squeeze(new_state) < 0.0) or any(np.squeeze(new_state) > 1.0): + new_state = np.clip(new_state, 0.0, 1.0) + # print('WARNING: state boundaries!') + + # set new state to the machine + self.set_state(new_state) + state = self.get_state() + self.state = state + + # get new intensity from the machine + intensity = self.get_intensity() + self.intensity = intensity + + # reward calculation + reward = self.get_reward() + self.reward = reward + + return state, reward + + def get_reward(self): + # You can change reward function, but it should depend on intensity + # e.g. next line + # reward = -(1 - self.intensity / self.target_intensity) + reward = -(1 - self.intensity / 1.0) + + # reward = self.intensity + return reward + + def reset(self): + # reset method + + self.done = False + self.current_length = 0 + + # self.curr_episode += 1 + # self.rewards.append([]) + + bad_init = True + while bad_init: + new_state = self.observation_space.sample() + + self.set_state(new_state) + state = self.get_state() + self.state = state + + intensity = self.get_intensity() + self.intensity = intensity + self.init_rewards.append(-(1 - self.intensity / 1.0)) + + bad_init = False + + self.curr_episode += 1 + self.rewards.append([]) + self.actions.append([]) + self.states.append([]) + self.dones.append([]) + # self.add_trajectory_data(state=state, action=action, reward=reward, done=done) + self.states[self.curr_episode].append(state) + + return state + + def add_trajectory_data(self, state, action, reward, done): + self.rewards[self.curr_episode].append(reward) + self.actions[self.curr_episode].append(action) + self.states[self.curr_episode].append(state) + self.dones[self.curr_episode].append(done) + + def seed(self, seed=None): + # seed method + np.random.seed(seed) + + def render(self, mode='human'): + # render method + print('ERROR\nnot yet implemented!') + pass + + +if __name__ == '__main__': + + # fel + ''' + # system = 'eos' + system = 'fel2' + path = '/home/niky/FERMI/2020_10_06/configuration/' + conf_file = 'conf_'+system+'.json' + + filename = path+conf_file + tng = TangoConnection(conf_file=filename) + env = LaserTrajectoryControlEnv(tng) + #''' + diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/new_ME_TRPO_read_data_presi_gsi.py b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/new_ME_TRPO_read_data_presi_gsi.py new file mode 100644 index 0000000..68b7f18 --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/new_ME_TRPO_read_data_presi_gsi.py @@ -0,0 +1,132 @@ +import os +import pickle +import matplotlib.pyplot as plt +import numpy as np +project_directory = 'Data_logging/run_test/-nr_steps_25-cr_lr0.001-crit_it_80-d_0.05-conj_iters_10-n_ep_1-mini_bs_500' \ + '-m_bs_100-mb_lr_0.0005-sim_steps_2000-m_iter_15-ensnr_5-init_100/' +project_directory = 'Data_logging/ME_TRPO_stable/2020_10_06_ME_TRPO_stable@FERMI/run2/' +# for file in os.listdir(project_directory): +# filename = file +# print(filename) +# # filename = '18_17_49_35_default' +# filehandler = open(project_directory + filename, 'rb') +# object = pickle.load(filehandler) + +filenames = [] +for file in os.listdir(project_directory): + if 'final' in file: + filenames.append(file) + +filenames.sort() + +# filename = '09_25_19_18_04_training_observables' +filename = filenames[-1] +print(filename) + +filehandler = open(project_directory + filename, 'rb') +object = pickle.load(filehandler) + +def plot_results(data, label='Verification', **kwargs): + # plotting + print('now plotting') + rewards = data + print(rewards) + + # initial_states = env.initial_conditions + + iterations = [] + finals = [] + means = [] + + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 1): + # finals.append(rewards[i][len(rewards[i]) - 1]) + finals.append(rewards[i][-1]) + means.append(np.sum(rewards[i][1:])) + iterations.append(len(rewards[i])) + print(finals) + plot_suffix = label # , Fermi time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.plot(iterations) + ax.set_ylabel('steps') + ax.set_title(plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + color = 'blue' + ax.set_ylabel('Final reward', color=color) # we already handled the x-label with ax1 + ax.tick_params(axis='y', labelcolor=color) + ax.plot(finals, color=color) + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.axhline(y=-0.05, c='blue', ls=':') + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Cumulative reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(means, color=color) + + # ax.set_ylim(ax1.get_ylim()) + if 'save_name' in kwargs: + plt.savefig(kwargs.get('save_name') + '.pdf') + # fig.tight_layout() + fig.align_labels() + fig.tight_layout() + plt.show() + +plot_results(data=object['rews']) +filenames = [] +for file in os.listdir(project_directory): + if 'training_observables' in file: + filenames.append(file) + +filenames.sort() + +# filename = '09_25_19_18_04_training_observables' +filename = filenames[-1] +print(filename) + +filehandler = open(project_directory + filename, 'rb') +object = pickle.load(filehandler) + +sim_rewards_all = object['sim_rewards_all'][0] +entropy_all=object['entropy_all'] +step_counts_all=object['step_counts_all'] +batch_rews_all=object['batch_rews_all'][0] +tests_all = object['tests_all'][0] + +fig, axs = plt.subplots(2, 1, sharex=True) +x = np.arange(len(batch_rews_all)) +ax = axs[0] +ax.step(x, batch_rews_all) +ax.set_ylabel('rews per batch') +# plt.tw +ax2 = ax.twinx() + +color = 'lime' +ax2.set_ylabel('data points', color=color) # we already handled the x-label with ax1 +ax2.tick_params(axis='y', labelcolor=color) +ax2.step(x, step_counts_all, color=color) + +ax = axs[1] +ax.plot(sim_rewards_all, ls=':') +ax.plot(tests_all) +ax.set_ylabel('rewards model') +# plt.tw +ax2 = ax.twinx() + +color = 'lime' +ax2.set_ylabel(r'- log(std($p_\pi$))', color=color) # we already handled the x-label with ax1 +ax2.tick_params(axis='y', labelcolor=color) +ax2.plot(entropy_all, color=color) + +fig.align_labels() +fig.tight_layout() +fig.show() diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_32_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_32_default new file mode 100644 index 0000000..704ceeb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_32_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_47_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_47_training_observables new file mode 100644 index 0000000..b03793b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_47_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_57_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_57_training_observables new file mode 100644 index 0000000..374644d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_53_57_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_06_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_06_training_observables new file mode 100644 index 0000000..bedd91e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_06_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_15_training_observables new file mode 100644 index 0000000..c9b1db4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_25_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_25_training_observables new file mode 100644 index 0000000..c8a39d1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_25_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_34_training_observables new file mode 100644 index 0000000..961e1fa Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_43_training_observables new file mode 100644 index 0000000..4885ddf Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_53_training_observables new file mode 100644 index 0000000..76d5e6c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_54_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_02_training_observables new file mode 100644 index 0000000..3e29d9a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_11_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_11_training_observables new file mode 100644 index 0000000..3547aad Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_11_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_20_training_observables new file mode 100644 index 0000000..ab23a4b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_39_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_39_default new file mode 100644 index 0000000..396fe73 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_39_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_54_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_54_training_observables new file mode 100644 index 0000000..a5f0391 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_55_54_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_04_training_observables new file mode 100644 index 0000000..b3a42fd Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_14_training_observables new file mode 100644 index 0000000..18232e7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_24_training_observables new file mode 100644 index 0000000..ce818da Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_34_training_observables new file mode 100644 index 0000000..550746c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_53_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_53_default new file mode 100644 index 0000000..b9797b5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_56_53_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_07_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_07_training_observables new file mode 100644 index 0000000..22fb582 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_07_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_16_training_observables new file mode 100644 index 0000000..c6c8a31 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_26_training_observables new file mode 100644 index 0000000..4b891d7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_35_training_observables new file mode 100644 index 0000000..c5cd900 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_45_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_45_training_observables new file mode 100644 index 0000000..d55757c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_45_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_55_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_55_training_observables new file mode 100644 index 0000000..98a541d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_57_55_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_04_training_observables new file mode 100644 index 0000000..e262a65 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_14_training_observables new file mode 100644 index 0000000..31513d9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_23_training_observables new file mode 100644 index 0000000..233ee3f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_33_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_33_training_observables new file mode 100644 index 0000000..9c65841 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_33_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_42_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_42_training_observables new file mode 100644 index 0000000..da41e1e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_42_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_51_training_observables new file mode 100644 index 0000000..e6de528 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_58_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_01_training_observables new file mode 100644 index 0000000..d0a244f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_10_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_10_training_observables new file mode 100644 index 0000000..ffc12e2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_10_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_19_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_19_training_observables new file mode 100644 index 0000000..9f29f2d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_19_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_29_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_29_training_observables new file mode 100644 index 0000000..2d44b06 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_29_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_53_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_53_default new file mode 100644 index 0000000..1c2acdc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_07_59_53_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_06_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_06_training_observables new file mode 100644 index 0000000..5800aa7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_06_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_16_training_observables new file mode 100644 index 0000000..8b353e8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_26_training_observables new file mode 100644 index 0000000..c330faf Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_35_training_observables new file mode 100644 index 0000000..8707bdc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_45_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_45_training_observables new file mode 100644 index 0000000..8167448 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_45_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_55_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_55_training_observables new file mode 100644 index 0000000..db4dc2a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_00_55_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_05_training_observables new file mode 100644 index 0000000..f38248d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_15_training_observables new file mode 100644 index 0000000..10bc67f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_24_training_observables new file mode 100644 index 0000000..c68b3c0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_34_training_observables new file mode 100644 index 0000000..9f37359 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_44_training_observables new file mode 100644 index 0000000..8ab94af Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_53_training_observables new file mode 100644 index 0000000..b646d29 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_01_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_03_training_observables new file mode 100644 index 0000000..b6fd94c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_12_training_observables new file mode 100644 index 0000000..2f769c6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_22_training_observables new file mode 100644 index 0000000..3ac6217 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_32_training_observables new file mode 100644 index 0000000..f1ac1fa Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_51_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_51_default new file mode 100644 index 0000000..d4e06a8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_02_51_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_05_training_observables new file mode 100644 index 0000000..53e8c0d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_15_training_observables new file mode 100644 index 0000000..3110c87 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_24_training_observables new file mode 100644 index 0000000..4167413 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_34_training_observables new file mode 100644 index 0000000..1eaba7e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_44_training_observables new file mode 100644 index 0000000..bd063ef Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_54_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_54_training_observables new file mode 100644 index 0000000..09024f1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_03_54_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_04_training_observables new file mode 100644 index 0000000..c2af163 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_14_training_observables new file mode 100644 index 0000000..add80d0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_33_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_33_default new file mode 100644 index 0000000..64de959 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_33_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_52_training_observables new file mode 100644 index 0000000..560d1b4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_04_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_02_training_observables new file mode 100644 index 0000000..019df5d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_12_training_observables new file mode 100644 index 0000000..42aa770 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_22_training_observables new file mode 100644 index 0000000..cac56c3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_40_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_40_default new file mode 100644 index 0000000..4a17f99 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_40_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_58_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_58_training_observables new file mode 100644 index 0000000..77f3194 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_05_58_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_08_training_observables new file mode 100644 index 0000000..25cecf4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_18_training_observables new file mode 100644 index 0000000..7272549 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_37_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_37_default new file mode 100644 index 0000000..ef8bad3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_06_37_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_01_training_observables new file mode 100644 index 0000000..79e29f8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_11_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_11_training_observables new file mode 100644 index 0000000..cf5cce7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_11_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_30_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_30_default new file mode 100644 index 0000000..6c8af45 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_30_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_49_training_observables new file mode 100644 index 0000000..1c06c84 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_59_training_observables new file mode 100644 index 0000000..6f91d60 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_07_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_09_training_observables new file mode 100644 index 0000000..413f4c3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_28_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_28_default new file mode 100644 index 0000000..880750c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_28_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_48_training_observables new file mode 100644 index 0000000..9ddb40f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_58_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_58_training_observables new file mode 100644 index 0000000..ef9e933 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_08_58_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_08_training_observables new file mode 100644 index 0000000..b34f266 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_18_training_observables new file mode 100644 index 0000000..72023fd Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_28_training_observables new file mode 100644 index 0000000..c70a365 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_48_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_48_default new file mode 100644 index 0000000..172ed5f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_09_48_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_18_training_observables new file mode 100644 index 0000000..1e7b07c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_27_training_observables new file mode 100644 index 0000000..0392644 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_46_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_46_default new file mode 100644 index 0000000..f7089d0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_10_46_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_12_training_observables new file mode 100644 index 0000000..aded8b9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_22_training_observables new file mode 100644 index 0000000..daabb58 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_32_training_observables new file mode 100644 index 0000000..bd270ed Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_42_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_42_training_observables new file mode 100644 index 0000000..2bda854 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_42_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_51_training_observables new file mode 100644 index 0000000..34c3b34 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_11_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_01_training_observables new file mode 100644 index 0000000..f9eb132 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_20_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_20_default new file mode 100644 index 0000000..752d68a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_20_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_42_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_42_training_observables new file mode 100644 index 0000000..0b1f86f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_42_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_52_training_observables new file mode 100644 index 0000000..55b669f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_12_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_02_training_observables new file mode 100644 index 0000000..ffbf84c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_12_training_observables new file mode 100644 index 0000000..c44c369 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_22_training_observables new file mode 100644 index 0000000..4d32c47 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_31_training_observables new file mode 100644 index 0000000..c2a6a4a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_41_training_observables new file mode 100644 index 0000000..e18383c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_51_training_observables new file mode 100644 index 0000000..5bdfb65 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_13_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_01_training_observables new file mode 100644 index 0000000..b5d02df Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_20_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_20_default new file mode 100644 index 0000000..2ce118e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_20_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_43_training_observables new file mode 100644 index 0000000..37fcddd Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_52_training_observables new file mode 100644 index 0000000..da9b3e9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_14_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_02_training_observables new file mode 100644 index 0000000..abcb9fa Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_12_training_observables new file mode 100644 index 0000000..6df4a30 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_31_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_31_default new file mode 100644 index 0000000..6fc9f97 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_31_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_56_training_observables new file mode 100644 index 0000000..818f7d8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_15_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_05_training_observables new file mode 100644 index 0000000..448a646 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_15_training_observables new file mode 100644 index 0000000..9e3c9a3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_25_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_25_training_observables new file mode 100644 index 0000000..c744476 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_25_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_35_training_observables new file mode 100644 index 0000000..1c6622e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_54_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_54_default new file mode 100644 index 0000000..99a55b5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_16_54_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_22_training_observables new file mode 100644 index 0000000..0ca93eb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_31_training_observables new file mode 100644 index 0000000..5b964a0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_41_training_observables new file mode 100644 index 0000000..b3b8b6d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_50_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_50_training_observables new file mode 100644 index 0000000..4baf7f0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_17_50_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_12_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_12_default new file mode 100644 index 0000000..ce24524 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_12_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_44_training_observables new file mode 100644 index 0000000..748a4f1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_53_training_observables new file mode 100644 index 0000000..2f4ddce Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_18_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_03_training_observables new file mode 100644 index 0000000..7672dc9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_12_training_observables new file mode 100644 index 0000000..32b3819 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_22_training_observables new file mode 100644 index 0000000..e4d2181 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_31_training_observables new file mode 100644 index 0000000..ef30151 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_40_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_40_training_observables new file mode 100644 index 0000000..46eff79 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_40_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_49_training_observables new file mode 100644 index 0000000..c98db95 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_59_training_observables new file mode 100644 index 0000000..9490701 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_19_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_18_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_18_default new file mode 100644 index 0000000..fae52d7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_18_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_44_training_observables new file mode 100644 index 0000000..a666825 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_53_training_observables new file mode 100644 index 0000000..11b383d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_20_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_03_training_observables new file mode 100644 index 0000000..c43f632 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_12_training_observables new file mode 100644 index 0000000..f109526 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_21_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_21_training_observables new file mode 100644 index 0000000..e87b485 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_21_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_31_training_observables new file mode 100644 index 0000000..7df9500 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_40_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_40_training_observables new file mode 100644 index 0000000..7a2145e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_40_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_50_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_50_training_observables new file mode 100644 index 0000000..336674a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_50_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_59_training_observables new file mode 100644 index 0000000..81ae48e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_21_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_09_training_observables new file mode 100644 index 0000000..daae4e2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_18_training_observables new file mode 100644 index 0000000..ab2744d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_27_training_observables new file mode 100644 index 0000000..aa94486 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_37_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_37_training_observables new file mode 100644 index 0000000..fa29118 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_37_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_46_training_observables new file mode 100644 index 0000000..065f389 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_56_training_observables new file mode 100644 index 0000000..15eb462 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_22_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_23_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_23_05_training_observables new file mode 100644 index 0000000..fd9bddb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_23_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_35_10_final b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_35_10_final new file mode 100644 index 0000000..7c7bc9d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/10_07_08_35_10_final differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/details b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/details new file mode 100644 index 0000000..547dc3e --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run1/details @@ -0,0 +1 @@ +-nr_steps_15-cr_lr0.0001-crit_it_15-d_0.05-conj_iters_15-n_ep_18-mini_bs_500-m_bs_5-mb_lr_0.001-sim_steps_2000-m_iter_15-ensnr_5-init_45 \ No newline at end of file diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_05_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_05_default new file mode 100644 index 0000000..7fd55fc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_05_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_23_training_observables new file mode 100644 index 0000000..22f3de3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_36_training_observables new file mode 100644 index 0000000..0f52776 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_49_training_observables new file mode 100644 index 0000000..dfbc240 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_37_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_01_training_observables new file mode 100644 index 0000000..3b1a8a6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_14_training_observables new file mode 100644 index 0000000..ee1d5e2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_27_training_observables new file mode 100644 index 0000000..9a41840 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_39_training_observables new file mode 100644 index 0000000..1718d10 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_51_training_observables new file mode 100644 index 0000000..551d4d9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_38_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_04_training_observables new file mode 100644 index 0000000..e42c6f6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_16_training_observables new file mode 100644 index 0000000..e55e3e5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_28_training_observables new file mode 100644 index 0000000..67959c0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_41_training_observables new file mode 100644 index 0000000..8165586 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_53_training_observables new file mode 100644 index 0000000..7b72b6e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_39_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_05_training_observables new file mode 100644 index 0000000..30405ec Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_17_training_observables new file mode 100644 index 0000000..124f0d0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_29_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_29_training_observables new file mode 100644 index 0000000..9d80de1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_29_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_55_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_55_default new file mode 100644 index 0000000..2b87ff5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_40_55_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_08_training_observables new file mode 100644 index 0000000..fbaf55a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_19_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_19_training_observables new file mode 100644 index 0000000..7990485 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_19_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_29_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_29_training_observables new file mode 100644 index 0000000..ad9f6f9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_29_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_39_training_observables new file mode 100644 index 0000000..9386e0c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_50_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_50_training_observables new file mode 100644 index 0000000..423331a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_41_50_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_00_training_observables new file mode 100644 index 0000000..5141c2e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_21_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_21_default new file mode 100644 index 0000000..0fd1ee5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_21_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_44_training_observables new file mode 100644 index 0000000..297f8f5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_57_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_57_training_observables new file mode 100644 index 0000000..2d5a0d6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_42_57_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_09_training_observables new file mode 100644 index 0000000..a972143 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_22_training_observables new file mode 100644 index 0000000..3475de8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_35_training_observables new file mode 100644 index 0000000..f2ffdb7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_48_training_observables new file mode 100644 index 0000000..9944b62 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_43_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_01_training_observables new file mode 100644 index 0000000..294ed60 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_14_training_observables new file mode 100644 index 0000000..a3a0991 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_26_training_observables new file mode 100644 index 0000000..c35e602 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_39_training_observables new file mode 100644 index 0000000..96f7225 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_51_training_observables new file mode 100644 index 0000000..334d800 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_44_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_04_training_observables new file mode 100644 index 0000000..7ad1d21 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_16_training_observables new file mode 100644 index 0000000..0dfd120 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_28_training_observables new file mode 100644 index 0000000..37a9704 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_41_training_observables new file mode 100644 index 0000000..bedbccf Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_53_training_observables new file mode 100644 index 0000000..f009fa4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_45_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_20_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_20_default new file mode 100644 index 0000000..019dab6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_20_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_40_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_40_training_observables new file mode 100644 index 0000000..d9b31ce Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_40_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_52_training_observables new file mode 100644 index 0000000..7610f1d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_46_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_05_training_observables new file mode 100644 index 0000000..89ae8aa Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_18_training_observables new file mode 100644 index 0000000..4be819f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_30_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_30_training_observables new file mode 100644 index 0000000..5bd23a0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_30_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_43_training_observables new file mode 100644 index 0000000..afd640d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_55_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_55_training_observables new file mode 100644 index 0000000..423ee81 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_47_55_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_07_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_07_training_observables new file mode 100644 index 0000000..387f5d8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_07_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_19_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_19_training_observables new file mode 100644 index 0000000..38d2e0b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_19_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_32_training_observables new file mode 100644 index 0000000..4d801bb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_44_training_observables new file mode 100644 index 0000000..49da7f6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_56_training_observables new file mode 100644 index 0000000..f488675 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_48_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_08_training_observables new file mode 100644 index 0000000..8a34306 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_20_training_observables new file mode 100644 index 0000000..88ba65a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_32_training_observables new file mode 100644 index 0000000..b202b03 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_44_training_observables new file mode 100644 index 0000000..ca6456a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_49_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_12_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_12_default new file mode 100644 index 0000000..f16fd10 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_12_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_34_training_observables new file mode 100644 index 0000000..0d3d20f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_47_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_47_training_observables new file mode 100644 index 0000000..7a8a0e1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_50_47_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_00_training_observables new file mode 100644 index 0000000..38231b3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_13_training_observables new file mode 100644 index 0000000..10f14be Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_26_training_observables new file mode 100644 index 0000000..67be02d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_39_training_observables new file mode 100644 index 0000000..e5941f4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_51_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_00_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_00_default new file mode 100644 index 0000000..d110b46 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_00_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_32_training_observables new file mode 100644 index 0000000..394ce56 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_45_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_45_training_observables new file mode 100644 index 0000000..3949a30 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_45_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_57_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_57_training_observables new file mode 100644 index 0000000..67a67b6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_52_57_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_09_training_observables new file mode 100644 index 0000000..bde2ba0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_21_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_21_training_observables new file mode 100644 index 0000000..292b9a5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_21_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_34_training_observables new file mode 100644 index 0000000..2f1401a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_46_training_observables new file mode 100644 index 0000000..dbf8c7d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_58_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_58_training_observables new file mode 100644 index 0000000..7b98f62 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_53_58_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_10_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_10_training_observables new file mode 100644 index 0000000..62b68d7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_10_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_22_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_22_training_observables new file mode 100644 index 0000000..359c758 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_22_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_44_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_44_default new file mode 100644 index 0000000..2a0451a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_54_44_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_09_training_observables new file mode 100644 index 0000000..7c02faf Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_21_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_21_training_observables new file mode 100644 index 0000000..6060c70 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_21_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_34_training_observables new file mode 100644 index 0000000..0a7eb74 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_46_training_observables new file mode 100644 index 0000000..fcc1fc9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_55_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_08_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_08_default new file mode 100644 index 0000000..e283719 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_08_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_32_training_observables new file mode 100644 index 0000000..5137687 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_44_training_observables new file mode 100644 index 0000000..f2ba08c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_56_training_observables new file mode 100644 index 0000000..99ddd09 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_56_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_08_training_observables new file mode 100644 index 0000000..38b5c40 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_20_training_observables new file mode 100644 index 0000000..bf568ec Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_32_training_observables new file mode 100644 index 0000000..a4b61ae Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_44_training_observables new file mode 100644 index 0000000..ae7158f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_56_training_observables new file mode 100644 index 0000000..5c11ef0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_57_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_08_training_observables new file mode 100644 index 0000000..fe24859 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_20_training_observables new file mode 100644 index 0000000..0240d28 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_32_training_observables new file mode 100644 index 0000000..9768fe5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_44_training_observables new file mode 100644 index 0000000..5639b17 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_56_training_observables new file mode 100644 index 0000000..5ed7090 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_58_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_08_training_observables new file mode 100644 index 0000000..2850b40 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_20_training_observables new file mode 100644 index 0000000..404afc1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_31_training_observables new file mode 100644 index 0000000..fcde931 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_58_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_58_default new file mode 100644 index 0000000..dde2031 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_08_59_58_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_21_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_21_training_observables new file mode 100644 index 0000000..57140af Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_21_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_34_training_observables new file mode 100644 index 0000000..854f148 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_46_training_observables new file mode 100644 index 0000000..74502cd Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_59_training_observables new file mode 100644 index 0000000..e9c9b0c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_00_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_11_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_11_training_observables new file mode 100644 index 0000000..dea621d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_11_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_23_training_observables new file mode 100644 index 0000000..7682b6e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_35_training_observables new file mode 100644 index 0000000..21d1477 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_48_training_observables new file mode 100644 index 0000000..b70a100 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_01_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_00_training_observables new file mode 100644 index 0000000..6d7feec Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_12_training_observables new file mode 100644 index 0000000..1bb57cb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_24_training_observables new file mode 100644 index 0000000..8322c93 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_47_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_47_default new file mode 100644 index 0000000..c8be7b3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_02_47_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_12_training_observables new file mode 100644 index 0000000..f1b31c8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_24_training_observables new file mode 100644 index 0000000..889a8a1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_36_training_observables new file mode 100644 index 0000000..6f4b3cb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_48_training_observables new file mode 100644 index 0000000..9e317bb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_03_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_00_training_observables new file mode 100644 index 0000000..a71fb8a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_12_training_observables new file mode 100644 index 0000000..d1d03ce Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_24_training_observables new file mode 100644 index 0000000..185f960 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_36_training_observables new file mode 100644 index 0000000..09cd40a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_48_training_observables new file mode 100644 index 0000000..f1a8ef1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_04_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_00_training_observables new file mode 100644 index 0000000..01da143 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_21_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_21_default new file mode 100644 index 0000000..e952105 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_21_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_57_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_57_training_observables new file mode 100644 index 0000000..054aef5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_05_57_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_09_training_observables new file mode 100644 index 0000000..7d978a4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_21_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_21_training_observables new file mode 100644 index 0000000..a66df78 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_21_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_32_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_32_training_observables new file mode 100644 index 0000000..c84cfc6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_32_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_44_training_observables new file mode 100644 index 0000000..128cef6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_56_training_observables new file mode 100644 index 0000000..03b200e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_06_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_08_training_observables new file mode 100644 index 0000000..84ddf75 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_19_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_19_training_observables new file mode 100644 index 0000000..d414e9b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_19_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_31_training_observables new file mode 100644 index 0000000..e577c92 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_43_training_observables new file mode 100644 index 0000000..d92680f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_54_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_54_training_observables new file mode 100644 index 0000000..f4d1cff Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_07_54_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_05_training_observables new file mode 100644 index 0000000..aed6d8d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_17_training_observables new file mode 100644 index 0000000..4aa6bc7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_40_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_40_default new file mode 100644 index 0000000..071aca2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_08_40_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_16_training_observables new file mode 100644 index 0000000..d9fad40 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_28_training_observables new file mode 100644 index 0000000..ffbf467 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_40_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_40_training_observables new file mode 100644 index 0000000..9cd8f1b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_40_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_52_training_observables new file mode 100644 index 0000000..460e002 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_09_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_04_training_observables new file mode 100644 index 0000000..6cf1946 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_16_training_observables new file mode 100644 index 0000000..634786b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_27_training_observables new file mode 100644 index 0000000..7e1c293 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_39_training_observables new file mode 100644 index 0000000..7ed48d7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_51_training_observables new file mode 100644 index 0000000..b6eefc5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_10_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_03_training_observables new file mode 100644 index 0000000..3fb519c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_14_training_observables new file mode 100644 index 0000000..ea99121 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_26_training_observables new file mode 100644 index 0000000..da21949 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_49_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_49_default new file mode 100644 index 0000000..ef56525 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_11_49_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_18_training_observables new file mode 100644 index 0000000..8ac7895 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_30_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_30_training_observables new file mode 100644 index 0000000..2f8c574 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_30_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_42_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_42_training_observables new file mode 100644 index 0000000..4398e45 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_42_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_54_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_54_training_observables new file mode 100644 index 0000000..41fe0f6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_12_54_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_05_training_observables new file mode 100644 index 0000000..c889d37 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_17_training_observables new file mode 100644 index 0000000..7460ede Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_29_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_29_training_observables new file mode 100644 index 0000000..88ec96d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_29_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_41_training_observables new file mode 100644 index 0000000..829aef7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_52_training_observables new file mode 100644 index 0000000..3121091 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_13_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_04_training_observables new file mode 100644 index 0000000..689e904 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_15_training_observables new file mode 100644 index 0000000..a836b84 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_26_training_observables new file mode 100644 index 0000000..9f158f0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_38_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_38_training_observables new file mode 100644 index 0000000..f08dd14 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_38_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_50_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_50_training_observables new file mode 100644 index 0000000..4393e2c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_14_50_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_01_training_observables new file mode 100644 index 0000000..ecf3e7a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_13_training_observables new file mode 100644 index 0000000..7bbdb20 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_38_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_38_default new file mode 100644 index 0000000..eee5610 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_15_38_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_17_training_observables new file mode 100644 index 0000000..75e0fc3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_28_training_observables new file mode 100644 index 0000000..4aba02d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_39_training_observables new file mode 100644 index 0000000..fdcb49f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_50_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_50_training_observables new file mode 100644 index 0000000..24a4785 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_16_50_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_01_training_observables new file mode 100644 index 0000000..efc1921 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_12_training_observables new file mode 100644 index 0000000..4faae03 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_23_training_observables new file mode 100644 index 0000000..5640c0e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_34_training_observables new file mode 100644 index 0000000..e54d544 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_45_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_45_training_observables new file mode 100644 index 0000000..482572a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_45_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_56_training_observables new file mode 100644 index 0000000..2c1a182 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_17_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_07_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_07_training_observables new file mode 100644 index 0000000..f031b07 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_07_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_17_training_observables new file mode 100644 index 0000000..3abe247 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_28_training_observables new file mode 100644 index 0000000..7c63199 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_39_training_observables new file mode 100644 index 0000000..d0ef91b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_50_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_50_training_observables new file mode 100644 index 0000000..d3ef454 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_18_50_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_19_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_19_00_training_observables new file mode 100644 index 0000000..638541a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_19_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_19_27_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_19_27_default new file mode 100644 index 0000000..0c19933 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_19_27_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_05_training_observables new file mode 100644 index 0000000..56b45e6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_16_training_observables new file mode 100644 index 0000000..ffda916 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_27_training_observables new file mode 100644 index 0000000..2f1e4d4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_38_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_38_training_observables new file mode 100644 index 0000000..21a560a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_38_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_49_training_observables new file mode 100644 index 0000000..a765428 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_20_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_00_training_observables new file mode 100644 index 0000000..7996b7e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_11_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_11_training_observables new file mode 100644 index 0000000..3800972 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_11_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_32_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_32_default new file mode 100644 index 0000000..1b95eb8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_21_32_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_15_training_observables new file mode 100644 index 0000000..f327b8f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_26_training_observables new file mode 100644 index 0000000..070ab63 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_37_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_37_training_observables new file mode 100644 index 0000000..be9bd92 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_37_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_48_training_observables new file mode 100644 index 0000000..af80324 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_59_training_observables new file mode 100644 index 0000000..ecddf0e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_22_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_09_training_observables new file mode 100644 index 0000000..8610ad2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_20_training_observables new file mode 100644 index 0000000..46dec80 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_31_training_observables new file mode 100644 index 0000000..a1bb829 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_42_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_42_training_observables new file mode 100644 index 0000000..0171468 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_42_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_52_training_observables new file mode 100644 index 0000000..9db253a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_23_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_03_training_observables new file mode 100644 index 0000000..ae19146 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_14_training_observables new file mode 100644 index 0000000..651e3c0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_24_training_observables new file mode 100644 index 0000000..0a93749 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_34_training_observables new file mode 100644 index 0000000..7a5483f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_56_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_56_default new file mode 100644 index 0000000..bfe1441 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_24_56_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_25_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_25_training_observables new file mode 100644 index 0000000..6c050df Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_25_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_36_training_observables new file mode 100644 index 0000000..fdf5d8e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_47_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_47_training_observables new file mode 100644 index 0000000..d74a637 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_47_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_58_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_58_training_observables new file mode 100644 index 0000000..56d8ed8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_25_58_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_09_training_observables new file mode 100644 index 0000000..240e1bd Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_20_training_observables new file mode 100644 index 0000000..a04e428 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_31_training_observables new file mode 100644 index 0000000..23521e7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_41_training_observables new file mode 100644 index 0000000..82d1b17 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_52_training_observables new file mode 100644 index 0000000..4358289 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_26_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_27_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_27_03_training_observables new file mode 100644 index 0000000..d0db2eb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_27_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_27_28_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_27_28_default new file mode 100644 index 0000000..72226de Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_27_28_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_07_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_07_training_observables new file mode 100644 index 0000000..d63e6bb Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_07_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_17_training_observables new file mode 100644 index 0000000..c569602 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_28_training_observables new file mode 100644 index 0000000..b64a4bf Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_39_training_observables new file mode 100644 index 0000000..89cfecc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_49_training_observables new file mode 100644 index 0000000..74b9eb9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_28_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_00_training_observables new file mode 100644 index 0000000..27c12fc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_10_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_10_training_observables new file mode 100644 index 0000000..308c2dc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_10_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_21_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_21_training_observables new file mode 100644 index 0000000..cd5a98d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_21_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_31_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_31_training_observables new file mode 100644 index 0000000..fe5682f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_31_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_55_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_55_default new file mode 100644 index 0000000..feece30 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_29_55_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_30_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_30_39_training_observables new file mode 100644 index 0000000..6607c4e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_30_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_30_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_30_49_training_observables new file mode 100644 index 0000000..3e66d8b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_30_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_00_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_00_training_observables new file mode 100644 index 0000000..d498fc1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_00_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_11_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_11_training_observables new file mode 100644 index 0000000..9e88d45 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_11_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_33_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_33_default new file mode 100644 index 0000000..835d411 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_31_33_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_05_training_observables new file mode 100644 index 0000000..f8a134a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_16_training_observables new file mode 100644 index 0000000..9b2277a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_27_training_observables new file mode 100644 index 0000000..07236dd Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_38_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_38_training_observables new file mode 100644 index 0000000..60c87a5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_38_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_48_training_observables new file mode 100644 index 0000000..0772c25 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_59_training_observables new file mode 100644 index 0000000..6cf0aa0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_32_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_09_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_09_training_observables new file mode 100644 index 0000000..6821ab3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_09_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_20_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_20_training_observables new file mode 100644 index 0000000..7965d41 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_20_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_30_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_30_training_observables new file mode 100644 index 0000000..642a5b2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_30_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_41_training_observables new file mode 100644 index 0000000..e79c752 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_52_training_observables new file mode 100644 index 0000000..5f4eeb3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_33_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_02_training_observables new file mode 100644 index 0000000..bd922a6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_13_training_observables new file mode 100644 index 0000000..bc9e3d4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_23_training_observables new file mode 100644 index 0000000..384e4f5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_34_training_observables new file mode 100644 index 0000000..ef0ba77 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_44_training_observables new file mode 100644 index 0000000..bcfc18a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_34_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_35_10_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_35_10_default new file mode 100644 index 0000000..9567e0f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_35_10_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_02_training_observables new file mode 100644 index 0000000..34670f2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_13_training_observables new file mode 100644 index 0000000..5b66f58 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_24_training_observables new file mode 100644 index 0000000..f92c054 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_35_training_observables new file mode 100644 index 0000000..8924be4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_46_training_observables new file mode 100644 index 0000000..ee19d32 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_57_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_57_training_observables new file mode 100644 index 0000000..ca12064 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_36_57_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_08_training_observables new file mode 100644 index 0000000..dbd53cf Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_19_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_19_training_observables new file mode 100644 index 0000000..f1d43e0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_19_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_29_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_29_training_observables new file mode 100644 index 0000000..9ad5549 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_29_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_40_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_40_training_observables new file mode 100644 index 0000000..d44a2b5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_40_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_51_training_observables new file mode 100644 index 0000000..dab50e6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_37_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_01_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_01_training_observables new file mode 100644 index 0000000..021073e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_01_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_12_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_12_training_observables new file mode 100644 index 0000000..9062e90 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_12_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_23_training_observables new file mode 100644 index 0000000..d007d80 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_33_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_33_training_observables new file mode 100644 index 0000000..e85200e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_33_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_44_training_observables new file mode 100644 index 0000000..d1d502e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_38_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_11_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_11_default new file mode 100644 index 0000000..80d0916 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_11_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_44_training_observables new file mode 100644 index 0000000..0edf1f3 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_55_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_55_training_observables new file mode 100644 index 0000000..0f953b5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_39_55_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_05_training_observables new file mode 100644 index 0000000..48557e8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_15_training_observables new file mode 100644 index 0000000..a889cfc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_25_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_25_training_observables new file mode 100644 index 0000000..210616a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_25_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_36_training_observables new file mode 100644 index 0000000..5f31d91 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_46_training_observables new file mode 100644 index 0000000..0ea3b8f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_56_training_observables new file mode 100644 index 0000000..6cc0fbe Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_40_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_06_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_06_training_observables new file mode 100644 index 0000000..be9f4c7 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_06_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_16_training_observables new file mode 100644 index 0000000..2ae933a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_26_training_observables new file mode 100644 index 0000000..dee9a3d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_36_training_observables new file mode 100644 index 0000000..ffb431b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_41_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_00_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_00_default new file mode 100644 index 0000000..22119ac Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_00_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_41_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_41_training_observables new file mode 100644 index 0000000..e0f8edc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_41_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_51_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_51_training_observables new file mode 100644 index 0000000..6ba9a90 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_42_51_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_02_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_02_training_observables new file mode 100644 index 0000000..eb2e38f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_02_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_13_training_observables new file mode 100644 index 0000000..6489d7d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_23_training_observables new file mode 100644 index 0000000..cb8e67b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_34_training_observables new file mode 100644 index 0000000..d2f1448 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_44_training_observables new file mode 100644 index 0000000..c1f1698 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_55_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_55_training_observables new file mode 100644 index 0000000..7783efe Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_43_55_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_05_training_observables new file mode 100644 index 0000000..d2cddf1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_16_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_16_training_observables new file mode 100644 index 0000000..95591ba Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_16_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_26_training_observables new file mode 100644 index 0000000..cadbf58 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_52_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_52_default new file mode 100644 index 0000000..835b53f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_44_52_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_45_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_45_43_training_observables new file mode 100644 index 0000000..8db53c4 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_45_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_45_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_45_53_training_observables new file mode 100644 index 0000000..3fb46f6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_45_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_03_training_observables new file mode 100644 index 0000000..df5ef9e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_13_training_observables new file mode 100644 index 0000000..c366a71 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_23_training_observables new file mode 100644 index 0000000..0274459 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_33_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_33_training_observables new file mode 100644 index 0000000..f53ced2 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_33_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_43_training_observables new file mode 100644 index 0000000..443509a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_53_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_53_training_observables new file mode 100644 index 0000000..5257e5b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_46_53_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_03_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_03_training_observables new file mode 100644 index 0000000..f2caf9d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_03_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_13_training_observables new file mode 100644 index 0000000..a882f2c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_38_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_38_default new file mode 100644 index 0000000..d89ce76 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_47_38_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_18_training_observables new file mode 100644 index 0000000..6894050 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_28_training_observables new file mode 100644 index 0000000..25ec271 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_38_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_38_training_observables new file mode 100644 index 0000000..a0b590a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_38_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_48_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_48_training_observables new file mode 100644 index 0000000..220e2b8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_48_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_58_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_58_training_observables new file mode 100644 index 0000000..734bcb5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_48_58_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_08_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_08_training_observables new file mode 100644 index 0000000..2abf4d0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_08_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_18_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_18_training_observables new file mode 100644 index 0000000..7805751 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_18_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_28_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_28_training_observables new file mode 100644 index 0000000..0d617d6 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_28_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_39_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_39_training_observables new file mode 100644 index 0000000..8a5358e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_39_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_49_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_49_training_observables new file mode 100644 index 0000000..78106a9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_49_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_59_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_59_training_observables new file mode 100644 index 0000000..0c772be Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_49_59_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_50_23_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_50_23_default new file mode 100644 index 0000000..87bd786 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_50_23_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_15_training_observables new file mode 100644 index 0000000..ad6035a Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_26_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_26_training_observables new file mode 100644 index 0000000..40dab55 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_26_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_36_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_36_training_observables new file mode 100644 index 0000000..6856294 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_36_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_46_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_46_training_observables new file mode 100644 index 0000000..71c054c Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_46_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_56_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_56_training_observables new file mode 100644 index 0000000..28fa4ae Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_51_56_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_07_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_07_training_observables new file mode 100644 index 0000000..fc0ad30 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_07_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_17_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_17_training_observables new file mode 100644 index 0000000..cbe2261 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_17_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_27_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_27_training_observables new file mode 100644 index 0000000..b4a81b1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_27_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_37_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_37_training_observables new file mode 100644 index 0000000..c39c8aa Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_52_37_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_01_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_01_default new file mode 100644 index 0000000..25a93a1 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_01_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_44_training_observables new file mode 100644 index 0000000..acbded5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_55_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_55_training_observables new file mode 100644 index 0000000..f5fc160 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_53_55_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_05_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_05_training_observables new file mode 100644 index 0000000..3ecb474 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_05_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_15_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_15_training_observables new file mode 100644 index 0000000..99de73d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_15_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_38_default b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_38_default new file mode 100644 index 0000000..ca52810 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_54_38_default differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_25_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_25_training_observables new file mode 100644 index 0000000..09d004b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_25_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_35_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_35_training_observables new file mode 100644 index 0000000..7c91370 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_35_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_45_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_45_training_observables new file mode 100644 index 0000000..754aa8d Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_45_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_54_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_54_training_observables new file mode 100644 index 0000000..d6c4dfc Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_55_54_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_04_training_observables new file mode 100644 index 0000000..ee40722 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_14_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_14_training_observables new file mode 100644 index 0000000..945d2d9 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_14_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_24_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_24_training_observables new file mode 100644 index 0000000..0401190 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_24_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_34_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_34_training_observables new file mode 100644 index 0000000..b7e44f5 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_34_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_44_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_44_training_observables new file mode 100644 index 0000000..c8c63f8 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_44_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_54_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_54_training_observables new file mode 100644 index 0000000..a1f957e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_56_54_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_04_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_04_training_observables new file mode 100644 index 0000000..03baf2f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_04_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_13_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_13_training_observables new file mode 100644 index 0000000..27d654b Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_13_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_23_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_23_training_observables new file mode 100644 index 0000000..bda0b9e Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_23_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_33_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_33_training_observables new file mode 100644 index 0000000..91fc331 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_33_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_43_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_43_training_observables new file mode 100644 index 0000000..5b297f0 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_43_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_52_training_observables b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_52_training_observables new file mode 100644 index 0000000..fc3565f Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_09_57_52_training_observables differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_10_06_05_final b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_10_06_05_final new file mode 100644 index 0000000..b932043 Binary files /dev/null and b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/10_07_10_06_05_final differ diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/details b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/details new file mode 100644 index 0000000..8f818f5 --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/run2/details @@ -0,0 +1 @@ +-nr_steps_15-cr_lr0.0001-crit_it_15-d_0.05-conj_iters_15-n_ep_28-mini_bs_500-m_bs_5-mb_lr_0.001-sim_steps_2000-m_iter_15-ensnr_9-init_45 \ No newline at end of file diff --git a/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/tango_connection.py b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/tango_connection.py new file mode 100644 index 0000000..ad1af35 --- /dev/null +++ b/Data_Experiments/2020_10_06_ME_TRPO_stable@FERMI/tango_connection.py @@ -0,0 +1,253 @@ +import json +import time +import numpy as np +import PyTango as tango + +class TangoConnection: + + def __init__(self, conf_file, **kwargs): + + # load json configuration file + with open(conf_file) as f: + self.conf_data = json.load(f) + + self.system = self.conf_data['system'] + + # get actuators data + conf_actuators = self.conf_data['actuators'] + self.actuators_data= self.get_confdata(conf_actuators) + self.actuators_device_num = self.actuators_data[0] + self.actuators_device_list = self.actuators_data[1] + self.actuators_device_attr_num = self.actuators_data[2] + self.actuators_device_attr_list = self.actuators_data[3] + + self.actuators_size = np.sum(self.actuators_device_attr_num) + self.state_size = self.actuators_size.copy() + self.action_size = self.actuators_size.copy() + self.state = np.zeros(self.state_size) + + # get sensors data + conf_sensors = self.conf_data['sensors'] + self.sensors_data = self.get_confdata(conf_sensors) + self.sensors_device_num = self.sensors_data[0] + self.sensors_device_list = self.sensors_data[1] + self.sensors_device_attr_num = self.sensors_data[2] + self.sensors_device_attr_list = self.sensors_data[3] + + self.sensors_size = np.sum(self.sensors_device_attr_num) + self.intensity = np.zeros(1) + + # get spectrometer data + conf_spectrometer = self.conf_data['spectrometer'] + self.spectrometer_data = self.get_confdata(conf_spectrometer) + self.spectrometer_device_num = self.spectrometer_data[0] + self.spectrometer_device_list = self.spectrometer_data[1] + self.spectrometer_device_attr_num = self.spectrometer_data[2] + self.spectrometer_device_attr_list = self.spectrometer_data[3] + + # get security data + conf_security = self.conf_data['security'] + self.security_data = self.get_confdata(conf_security) + self.security_device_num = self.security_data[0] + self.security_device_list = self.security_data[1] + self.security_device_attr_num = self.security_data[2] + self.security_device_attr_list = self.security_data[3] + self.security_threshold = 100. + + if 'num_samples' in kwargs: + self.num_samples = kwargs.get('num_samples') + else: + self.num_samples = 25 # 11 # 25 # 51 # 25 + + # self.pause = 0.5 + 0.02*self.num_samples + self.pause = 0.5 + 0.02*self.num_samples + 0.25 + # self.pause = 0.5 + 0.02*self.num_samples + 1 + + if 'target_state' in kwargs: + self.target_actuators = kwargs.get('target_state') + else: + self.target_actuators = 131072 * np.ones(self.actuators_size) + + if self.system == 'sequencer': + self.set_state(self.target_actuators) + self.target_position = self.get_position() + + # read initial values for actuators and sensors + self.init_state = self.get_state() + self.init_intensity = self.get_intensity() + + self.state = self.init_state.copy() + self.intensity = self.init_intensity.copy() + + + def get_confdata(self, conf_dev): + dev_list, dev_attr_num, dev_attr_list = [], [], [] + dev_num = len(conf_dev) + for j in range(dev_num): + dev_data = conf_dev[j] + dev_name = dev_data['host'] + dev_data['address'] + dev = tango.DeviceProxy(dev_name) + dev_attr = dev_data['attributes'] + + dev_list.append(dev) + dev_attr_num.append(len(dev_attr)) + dev_attr_list.append(dev_attr) + return [dev_num, dev_list, dev_attr_num, dev_attr_list] + + def get_position(self): + position = np.zeros(self.sensors_size) + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + position[idx] = dev.read_attribute(attr_name).value + + return position + + def set_state(self, state): + self.check_charge() + self.set_actuators(state) + self.state = state + + + def get_state(self): + self.check_charge() + state = self.get_actuators() + self.state = state + return state + + def set_actuators(self, actuators_val): + + for i in range(self.actuators_device_num): + dev = self.actuators_device_list[i] + for j in range(self.actuators_device_attr_num[i]): + idx = self.actuators_device_num * i + j + attr_name = self.actuators_device_attr_list[i][j] + attr_val = actuators_val[idx] + dev.write_attribute(attr_name, attr_val) + + time.sleep(self.pause) + pass + + def get_actuators(self): + attr_val = np.zeros(self.actuators_size) + for i in range(self.actuators_device_num): + dev = self.actuators_device_list[i] + for j in range(self.actuators_device_attr_num[i]): + idx = self.actuators_device_num * i + j + attr_name = self.actuators_device_attr_list[i][j] + attr_val[idx] = dev.read_attribute(attr_name).value + return attr_val + + def get_sensors(self): + attr_val = [] + + if self.system in ['fel', 'fel1', 'fel2']: + #if self.system == 'fel' or self.system == 'fel1' or self.system == 'fel2': + attr_val = np.zeros(self.sensors_size) + attr_val_seq = np.zeros((self.sensors_size, self.num_samples)) + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + attr_val_seq[idx] = dev.command_inout(attr_name, [0, int(self.num_samples)]) + attr_val[idx] = np.median(attr_val_seq[idx]) + + elif self.system == 'sequencer': + position = self.get_position() + screen_intensity = np.zeros(self.sensors_device_num) + for i in range(self.sensors_device_num): + screen_position = position[self.sensors_device_num * i:self.sensors_device_num * i + 2] + target_position = self.target_position[self.sensors_device_num * i:self.sensors_device_num * i + 2] + difference = screen_position - target_position + distance = np.sqrt(np.power(difference, 2)) + if any(distance > 0.1): + screen_intensity[i] = 0.0 + else: + den = 2 * np.power(0.04, 2) + screen_intensity[i] = np.exp(-np.sum(np.power(difference, 2)) / den) + attr_val = screen_intensity + #''' + elif self.system == 'eos': + attr_val = np.zeros(self.sensors_size) + attr_val_seq = np.zeros((self.sensors_size, self.num_samples)) + idx = 0 + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + # idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + attr_val_seq[idx] = dev.command_inout(attr_name, [0, int(self.num_samples)]) + attr_val[idx] = np.median(attr_val_seq[idx]) + idx += 1 + #''' + return attr_val + + def get_intensity(self): + self.check_charge() + attr_val = self.get_sensors() + intensity = np.prod(attr_val) + self.intensity = intensity + return intensity + + def get_image(self): + self.check_charge() + attr_val = [] + for i in range(self.spectrometer_device_num): + dev = self.spectrometer_device_list[i] + for j in range(self.spectrometer_device_attr_num[i]): + # idx = self.spectrometer_device_num * i + j + attr_name = self.spectrometer_device_attr_list[i][j] + attr_val.append(dev.read_attribute(attr_name).value) + return attr_val[0] + + def get_security_check(self): + attr_val = [] + for i in range(self.security_device_num): + dev = self.security_device_list[i] + for j in range(self.spectrometer_device_attr_num[i]): + # idx = self.security_device_num * i + j + attr_name = self.security_device_attr_list[i][j] + attr_val.append(dev.read_attribute(attr_name).value) + return attr_val[0] + + def check_charge(self): + if self.system in ['fel', 'fel1', 'fel2']: + #if self.system == 'fel' or self.system == 'fel1' or self.system == 'fel2': + #if self.system in ['eos', 'fel']: + # print('\nSECURITY CHECK\n') + flag = 0 + charge = self.get_security_check() + #while charge < 100.: + while charge < self.security_threshold: + flag = 1 + print('\nwait...\n') + time.sleep(5) + charge = self.get_security_check() + + if flag: + print('FEL is coming back!\nWait 1 minute more...\n') + time.sleep(60) + + + +if __name__ == '__main__': + + # sequencer + # system = 'sequencer' + # path = '/home/niky/PycharmProjects/FERMI/devel/sequencer_new/configuration/' + + # fel + ''' + # system = 'eos' + system = 'fel2' + path = '/home/niky/FERMI/2020_10_06/configuration/' + conf_file = 'conf_'+system+'.json' + + filename = path+conf_file + + tng = TangoConnection(conf_file=filename) + ''' + diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_20_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_20_plot_model_0 new file mode 100644 index 0000000..906d670 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_20_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_20_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_20_plot_model_1 new file mode 100644 index 0000000..c50aa14 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_20_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_21_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_21_plot_model_2 new file mode 100644 index 0000000..d13bf47 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_24_21_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_30_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_30_plot_model_0 new file mode 100644 index 0000000..d5e1ab6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_30_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_31_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_31_plot_model_1 new file mode 100644 index 0000000..2e93055 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_31_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_31_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_31_plot_model_2 new file mode 100644 index 0000000..d2d6e5e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_27_31_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_0 new file mode 100644 index 0000000..a3a7ba1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_1 new file mode 100644 index 0000000..d34457d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_2 new file mode 100644 index 0000000..cf6d89e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_04_37_54_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_00_52_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_00_52_training_observables new file mode 100644 index 0000000..d4d68c1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_00_52_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_10_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_10_training_observables new file mode 100644 index 0000000..d2b9726 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_10_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_27_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_27_training_observables new file mode 100644 index 0000000..7b5d545 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_27_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_45_training_observables new file mode 100644 index 0000000..5be9e09 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_01_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_02_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_02_training_observables new file mode 100644 index 0000000..d9d750a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_02_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_20_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_20_training_observables new file mode 100644 index 0000000..3f5b1d4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_20_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_37_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_37_training_observables new file mode 100644 index 0000000..25a45e4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_37_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_54_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_54_training_observables new file mode 100644 index 0000000..f25af21 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_02_54_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_13_training_observables new file mode 100644 index 0000000..ba11d2b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_32_training_observables new file mode 100644 index 0000000..f350bc6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_51_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_51_training_observables new file mode 100644 index 0000000..303ee8e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_03_51_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_16_training_observables new file mode 100644 index 0000000..05ee47d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_37_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_37_training_observables new file mode 100644 index 0000000..0b220f2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_37_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_56_training_observables new file mode 100644 index 0000000..2941778 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_05_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_16_training_observables new file mode 100644 index 0000000..e29fe1e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_35_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_35_training_observables new file mode 100644 index 0000000..564343a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_35_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_57_training_observables new file mode 100644 index 0000000..b4a9d2d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_06_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_07_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_07_18_training_observables new file mode 100644 index 0000000..a662e03 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_07_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_07_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_07_38_training_observables new file mode 100644 index 0000000..5f2afdf Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_07_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_08_55_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_08_55_training_observables new file mode 100644 index 0000000..70e8524 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_08_55_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_12_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_12_training_observables new file mode 100644 index 0000000..8d04597 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_12_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_31_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_31_training_observables new file mode 100644 index 0000000..642e908 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_31_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_48_training_observables new file mode 100644 index 0000000..ca5508b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_09_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_06_training_observables new file mode 100644 index 0000000..28f58b0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_23_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_23_training_observables new file mode 100644 index 0000000..29d2297 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_23_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_41_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_41_training_observables new file mode 100644 index 0000000..cacc8ad Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_41_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_59_training_observables new file mode 100644 index 0000000..4ba5f7c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_10_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_16_training_observables new file mode 100644 index 0000000..9e85e8b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_34_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_34_training_observables new file mode 100644 index 0000000..0cf553e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_34_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_52_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_52_training_observables new file mode 100644 index 0000000..b32741f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_11_52_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_14_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_14_training_observables new file mode 100644 index 0000000..e4d5ab0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_14_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_32_training_observables new file mode 100644 index 0000000..9c9c954 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_50_training_observables new file mode 100644 index 0000000..28cf5b9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_13_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_08_training_observables new file mode 100644 index 0000000..7689018 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_26_training_observables new file mode 100644 index 0000000..0dbddc9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_44_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_44_training_observables new file mode 100644 index 0000000..0701c16 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_14_44_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_01_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_01_training_observables new file mode 100644 index 0000000..68ab000 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_01_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_19_training_observables new file mode 100644 index 0000000..a89d349 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_37_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_37_training_observables new file mode 100644 index 0000000..2deb613 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_15_37_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_04_training_observables new file mode 100644 index 0000000..db61367 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_22_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_22_training_observables new file mode 100644 index 0000000..4395c7b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_22_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_40_training_observables new file mode 100644 index 0000000..20b1b3e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_57_training_observables new file mode 100644 index 0000000..3624ad6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_17_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_15_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_15_training_observables new file mode 100644 index 0000000..d28a534 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_15_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_33_training_observables new file mode 100644 index 0000000..7169e91 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_51_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_51_training_observables new file mode 100644 index 0000000..62d9d5d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_18_51_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_10_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_10_training_observables new file mode 100644 index 0000000..cff1b11 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_10_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_27_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_27_training_observables new file mode 100644 index 0000000..55d0446 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_27_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_45_training_observables new file mode 100644 index 0000000..f3f5196 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_19_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_03_training_observables new file mode 100644 index 0000000..a3dcd41 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_21_training_observables new file mode 100644 index 0000000..5ce7157 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_39_training_observables new file mode 100644 index 0000000..8e18d39 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_56_training_observables new file mode 100644 index 0000000..81017d7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_20_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_14_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_14_training_observables new file mode 100644 index 0000000..a9665fa Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_14_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_33_training_observables new file mode 100644 index 0000000..2c5f95c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_52_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_52_training_observables new file mode 100644 index 0000000..a5ed6bc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_21_52_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_10_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_10_training_observables new file mode 100644 index 0000000..ac4ea1c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_10_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_28_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_28_training_observables new file mode 100644 index 0000000..6b0ebff Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_28_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_45_training_observables new file mode 100644 index 0000000..134508d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_22_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_03_training_observables new file mode 100644 index 0000000..1f0d8ab Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_21_training_observables new file mode 100644 index 0000000..2c1fc5a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_39_training_observables new file mode 100644 index 0000000..ef24865 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_57_training_observables new file mode 100644 index 0000000..20f281b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_23_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_15_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_15_training_observables new file mode 100644 index 0000000..b6db35f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_15_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_33_training_observables new file mode 100644 index 0000000..000d5f7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_51_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_51_training_observables new file mode 100644 index 0000000..91a073f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_24_51_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_09_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_09_training_observables new file mode 100644 index 0000000..0d07cc0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_09_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_27_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_27_training_observables new file mode 100644 index 0000000..6ba48ac Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_27_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_45_training_observables new file mode 100644 index 0000000..911acdf Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_25_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_03_training_observables new file mode 100644 index 0000000..b1a2f87 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_21_training_observables new file mode 100644 index 0000000..5b67fef Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_39_training_observables new file mode 100644 index 0000000..48cdcdc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_57_training_observables new file mode 100644 index 0000000..8e5061a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_26_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_15_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_15_training_observables new file mode 100644 index 0000000..c82649c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_15_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_32_training_observables new file mode 100644 index 0000000..20686fc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_48_training_observables new file mode 100644 index 0000000..8a72bad Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_27_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_05_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_05_training_observables new file mode 100644 index 0000000..9387a14 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_05_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_23_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_23_training_observables new file mode 100644 index 0000000..d5efbe8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_23_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_43_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_43_training_observables new file mode 100644 index 0000000..32b77e8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_28_43_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_00_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_00_training_observables new file mode 100644 index 0000000..1ef7ab6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_00_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_18_training_observables new file mode 100644 index 0000000..f5f7ebe Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_35_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_35_training_observables new file mode 100644 index 0000000..22bb185 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_35_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_52_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_52_training_observables new file mode 100644 index 0000000..014f98b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_29_52_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_33_29_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_33_29_training_observables new file mode 100644 index 0000000..374425b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_33_29_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_33_47_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_33_47_training_observables new file mode 100644 index 0000000..39083e7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_33_47_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_04_training_observables new file mode 100644 index 0000000..72e380e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_21_training_observables new file mode 100644 index 0000000..a8cb2a0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_39_training_observables new file mode 100644 index 0000000..5725310 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_56_training_observables new file mode 100644 index 0000000..d400d9f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_34_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_25_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_25_training_observables new file mode 100644 index 0000000..b710def Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_25_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_42_training_observables new file mode 100644 index 0000000..1eccfe1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_59_training_observables new file mode 100644 index 0000000..59cd801 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_36_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_16_training_observables new file mode 100644 index 0000000..cfe2751 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_33_training_observables new file mode 100644 index 0000000..84c078e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_50_training_observables new file mode 100644 index 0000000..1254cd2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_37_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_38_07_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_38_07_training_observables new file mode 100644 index 0000000..c1e0b88 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_38_07_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_41_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_41_40_training_observables new file mode 100644 index 0000000..a083427 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_41_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_41_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_41_56_training_observables new file mode 100644 index 0000000..3829245 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_41_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_13_training_observables new file mode 100644 index 0000000..3ddb79f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_30_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_30_training_observables new file mode 100644 index 0000000..a7d3651 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_30_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_46_training_observables new file mode 100644 index 0000000..f04d9ec Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_42_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_03_training_observables new file mode 100644 index 0000000..d2e49fc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_20_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_20_training_observables new file mode 100644 index 0000000..75ae8ce Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_20_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_37_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_37_training_observables new file mode 100644 index 0000000..7cf08b2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_37_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_54_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_54_training_observables new file mode 100644 index 0000000..928f23b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_43_54_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_11_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_11_training_observables new file mode 100644 index 0000000..c78075a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_11_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_28_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_28_training_observables new file mode 100644 index 0000000..16b84ff Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_28_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_45_training_observables new file mode 100644 index 0000000..505a675 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_44_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_02_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_02_training_observables new file mode 100644 index 0000000..dd4fb5f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_02_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_19_training_observables new file mode 100644 index 0000000..b305724 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_36_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_36_training_observables new file mode 100644 index 0000000..c4d4f00 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_36_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_54_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_54_training_observables new file mode 100644 index 0000000..98b96bd Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_45_54_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_46_11_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_46_11_training_observables new file mode 100644 index 0000000..73bf076 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_46_11_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_20_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_20_training_observables new file mode 100644 index 0000000..a0f76d9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_20_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_36_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_36_training_observables new file mode 100644 index 0000000..fc3a496 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_36_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_53_training_observables new file mode 100644 index 0000000..910ba4f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_54_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_10_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_10_training_observables new file mode 100644 index 0000000..87e7415 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_10_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_26_training_observables new file mode 100644 index 0000000..9766634 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_43_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_43_training_observables new file mode 100644 index 0000000..9babe29 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_55_43_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_00_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_00_training_observables new file mode 100644 index 0000000..1e27870 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_00_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_16_training_observables new file mode 100644 index 0000000..e5520c9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_33_training_observables new file mode 100644 index 0000000..030e5c0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_50_training_observables new file mode 100644 index 0000000..177d8b0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_56_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_57_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_57_06_training_observables new file mode 100644 index 0000000..fd449e2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_57_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_57_23_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_57_23_training_observables new file mode 100644 index 0000000..eb8ecc7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_05_57_23_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_11_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_11_training_observables new file mode 100644 index 0000000..dde5aaa Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_11_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_29_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_29_training_observables new file mode 100644 index 0000000..47d277a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_29_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_46_training_observables new file mode 100644 index 0000000..0a4cb84 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_00_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_04_training_observables new file mode 100644 index 0000000..3f232a3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_21_training_observables new file mode 100644 index 0000000..f29d6aa Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_38_training_observables new file mode 100644 index 0000000..f8dcc57 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_56_training_observables new file mode 100644 index 0000000..9a16fa6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_01_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_13_training_observables new file mode 100644 index 0000000..a4db3e8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_30_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_30_training_observables new file mode 100644 index 0000000..2119d3f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_30_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_48_training_observables new file mode 100644 index 0000000..90f2e7f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_02_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_05_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_05_training_observables new file mode 100644 index 0000000..a97521d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_05_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_22_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_22_training_observables new file mode 100644 index 0000000..084e311 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_22_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_40_training_observables new file mode 100644 index 0000000..0cf38a5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_03_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_09_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_09_training_observables new file mode 100644 index 0000000..dd2e068 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_09_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_26_training_observables new file mode 100644 index 0000000..433f837 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_42_training_observables new file mode 100644 index 0000000..d9f252c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_59_training_observables new file mode 100644 index 0000000..36a92ca Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_08_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_16_training_observables new file mode 100644 index 0000000..45586d2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_33_training_observables new file mode 100644 index 0000000..a1403fc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_49_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_49_training_observables new file mode 100644 index 0000000..a436414 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_09_49_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_07_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_07_training_observables new file mode 100644 index 0000000..7c9532c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_07_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_24_training_observables new file mode 100644 index 0000000..b5a114c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_41_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_41_training_observables new file mode 100644 index 0000000..87b24ec Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_41_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_58_training_observables new file mode 100644 index 0000000..2784995 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_10_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_15_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_15_training_observables new file mode 100644 index 0000000..5b54ab4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_15_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_32_training_observables new file mode 100644 index 0000000..aff83d8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_49_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_49_training_observables new file mode 100644 index 0000000..a7f4b14 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_11_49_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_06_training_observables new file mode 100644 index 0000000..5a64cfd Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_23_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_23_training_observables new file mode 100644 index 0000000..753f759 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_23_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_40_training_observables new file mode 100644 index 0000000..35aa973 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_57_training_observables new file mode 100644 index 0000000..3ca451f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_12_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_15_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_15_training_observables new file mode 100644 index 0000000..92221b3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_15_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_33_training_observables new file mode 100644 index 0000000..1c915b3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_51_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_51_training_observables new file mode 100644 index 0000000..f979705 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_13_51_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_08_training_observables new file mode 100644 index 0000000..aec67d0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_25_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_25_training_observables new file mode 100644 index 0000000..b31b054 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_25_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_42_training_observables new file mode 100644 index 0000000..18f18ff Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_59_training_observables new file mode 100644 index 0000000..06e618b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_14_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_16_training_observables new file mode 100644 index 0000000..8c79918 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_33_training_observables new file mode 100644 index 0000000..4497c06 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_50_training_observables new file mode 100644 index 0000000..76b4e8c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_15_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_08_training_observables new file mode 100644 index 0000000..c11eb25 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_25_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_25_training_observables new file mode 100644 index 0000000..206d94e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_25_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_42_training_observables new file mode 100644 index 0000000..f767456 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_59_training_observables new file mode 100644 index 0000000..a169c18 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_16_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_16_training_observables new file mode 100644 index 0000000..8c9c8d8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_33_training_observables new file mode 100644 index 0000000..a6cd70a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_50_training_observables new file mode 100644 index 0000000..a498527 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_17_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_07_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_07_training_observables new file mode 100644 index 0000000..0039384 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_07_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_25_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_25_training_observables new file mode 100644 index 0000000..91b12fb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_25_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_42_training_observables new file mode 100644 index 0000000..a5c2fa5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_59_training_observables new file mode 100644 index 0000000..515a41c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_18_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_16_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_16_training_observables new file mode 100644 index 0000000..c6286e2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_16_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_33_training_observables new file mode 100644 index 0000000..f949495 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_50_training_observables new file mode 100644 index 0000000..0085999 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_19_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_07_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_07_training_observables new file mode 100644 index 0000000..23b9f98 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_07_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_24_training_observables new file mode 100644 index 0000000..a296980 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_41_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_41_training_observables new file mode 100644 index 0000000..d9af10f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_41_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_58_training_observables new file mode 100644 index 0000000..b7c3c84 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_20_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_15_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_15_training_observables new file mode 100644 index 0000000..913b754 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_15_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_32_training_observables new file mode 100644 index 0000000..34c311e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_49_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_49_training_observables new file mode 100644 index 0000000..e6abd16 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_21_49_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_22_07_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_22_07_training_observables new file mode 100644 index 0000000..a480e88 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_11-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_100/11_06_06_22_07_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_48_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_48_plot_model_0 new file mode 100644 index 0000000..2b15824 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_48_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_49_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_49_plot_model_1 new file mode 100644 index 0000000..9344102 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_49_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_49_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_49_plot_model_2 new file mode 100644 index 0000000..57687c8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_26_49_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_25_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_25_plot_model_0 new file mode 100644 index 0000000..99f22a2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_25_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_26_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_26_plot_model_1 new file mode 100644 index 0000000..5def5c8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_26_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_26_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_26_plot_model_2 new file mode 100644 index 0000000..69a2a77 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_26_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_58_training_observables new file mode 100644 index 0000000..c1201dd Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_32_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_14_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_14_training_observables new file mode 100644 index 0000000..48c6a8a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_14_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_31_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_31_training_observables new file mode 100644 index 0000000..09ca32e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_31_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_48_training_observables new file mode 100644 index 0000000..abaa39f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_33_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_05_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_05_training_observables new file mode 100644 index 0000000..62fd357 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_05_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_22_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_22_training_observables new file mode 100644 index 0000000..e0ecce5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_22_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_39_training_observables new file mode 100644 index 0000000..946a9b9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_56_training_observables new file mode 100644 index 0000000..ba8b740 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_34_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_46_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_46_plot_model_0 new file mode 100644 index 0000000..031150d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_46_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_46_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_46_plot_model_1 new file mode 100644 index 0000000..568f87d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_46_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_47_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_47_plot_model_2 new file mode 100644 index 0000000..02b33d9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_35_47_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_12_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_12_training_observables new file mode 100644 index 0000000..1ebb6d1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_12_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_29_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_29_training_observables new file mode 100644 index 0000000..056cd8c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_29_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_46_training_observables new file mode 100644 index 0000000..6f75dc3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_36_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_03_training_observables new file mode 100644 index 0000000..bf34073 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_21_training_observables new file mode 100644 index 0000000..8e402f3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_38_training_observables new file mode 100644 index 0000000..449b4c6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_37_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_34_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_34_plot_model_0 new file mode 100644 index 0000000..4182883 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_34_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_34_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_34_plot_model_1 new file mode 100644 index 0000000..54e9598 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_34_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_35_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_35_plot_model_2 new file mode 100644 index 0000000..44f9960 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_35_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_56_training_observables new file mode 100644 index 0000000..552261d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_38_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_13_training_observables new file mode 100644 index 0000000..fc70678 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_29_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_29_training_observables new file mode 100644 index 0000000..a38a3c7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_29_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_46_training_observables new file mode 100644 index 0000000..47c18b3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_39_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_40_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_40_03_training_observables new file mode 100644 index 0000000..091ef66 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_40_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_40_20_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_40_20_training_observables new file mode 100644 index 0000000..45d0415 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_40_20_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_08_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_08_plot_model_0 new file mode 100644 index 0000000..48b7f8a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_08_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_09_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_09_plot_model_1 new file mode 100644 index 0000000..5f9ee21 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_09_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_09_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_09_plot_model_2 new file mode 100644 index 0000000..9952288 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_09_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_30_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_30_training_observables new file mode 100644 index 0000000..f51e5e5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_30_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_47_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_47_training_observables new file mode 100644 index 0000000..577a3a9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_41_47_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_05_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_05_training_observables new file mode 100644 index 0000000..44691f7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_05_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_22_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_22_training_observables new file mode 100644 index 0000000..167686b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_22_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_43_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_43_training_observables new file mode 100644 index 0000000..6b149a1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_42_43_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_04_training_observables new file mode 100644 index 0000000..983cd41 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_24_training_observables new file mode 100644 index 0000000..c0be94f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_44_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_44_training_observables new file mode 100644 index 0000000..436ea2f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_43_44_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_04_training_observables new file mode 100644 index 0000000..4e0fba5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_24_training_observables new file mode 100644 index 0000000..a501455 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_44_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_44_training_observables new file mode 100644 index 0000000..966449c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_44_44_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_45_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_45_03_training_observables new file mode 100644 index 0000000..1a71b5c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_45_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_45_23_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_45_23_training_observables new file mode 100644 index 0000000..dd12139 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_45_23_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_0 new file mode 100644 index 0000000..86b177d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_1 new file mode 100644 index 0000000..e9d1ab1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_2 new file mode 100644 index 0000000..9c74a0a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_22_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_48_training_observables new file mode 100644 index 0000000..b7bc937 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_46_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_08_training_observables new file mode 100644 index 0000000..6abaed5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_28_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_28_training_observables new file mode 100644 index 0000000..b29fa98 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_28_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_48_training_observables new file mode 100644 index 0000000..de52766 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_47_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_08_training_observables new file mode 100644 index 0000000..de92b28 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_28_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_28_training_observables new file mode 100644 index 0000000..6a2ed05 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_28_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_48_training_observables new file mode 100644 index 0000000..70b9cb3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_48_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_08_training_observables new file mode 100644 index 0000000..e62a771 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_28_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_28_training_observables new file mode 100644 index 0000000..dcca402 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_28_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_48_training_observables new file mode 100644 index 0000000..f1adc92 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_49_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_08_training_observables new file mode 100644 index 0000000..88ce1f7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_28_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_28_training_observables new file mode 100644 index 0000000..c30da8c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_28_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_48_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_48_training_observables new file mode 100644 index 0000000..83d802c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_50_48_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_51_08_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_51_08_training_observables new file mode 100644 index 0000000..3396974 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_51_08_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_02_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_02_plot_model_0 new file mode 100644 index 0000000..edfd508 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_02_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_02_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_02_plot_model_1 new file mode 100644 index 0000000..1a7dfb6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_02_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_03_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_03_plot_model_2 new file mode 100644 index 0000000..acbba56 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_03_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_31_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_31_training_observables new file mode 100644 index 0000000..3dd3bc1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_31_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_50_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_50_training_observables new file mode 100644 index 0000000..8570faf Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_52_50_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_10_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_10_training_observables new file mode 100644 index 0000000..93675fb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_10_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_30_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_30_training_observables new file mode 100644 index 0000000..e4da378 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_30_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_49_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_49_training_observables new file mode 100644 index 0000000..ff3a7a2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_53_49_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_54_09_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_54_09_training_observables new file mode 100644 index 0000000..b442adb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_54_09_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_19_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_19_plot_model_0 new file mode 100644 index 0000000..8216514 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_19_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_19_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_19_plot_model_1 new file mode 100644 index 0000000..f670f93 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_19_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_20_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_20_plot_model_2 new file mode 100644 index 0000000..3e79f59 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_20_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_46_training_observables new file mode 100644 index 0000000..792ba6b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_55_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_05_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_05_training_observables new file mode 100644 index 0000000..fae5c66 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_05_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_25_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_25_training_observables new file mode 100644 index 0000000..2cd0746 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_25_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_45_training_observables new file mode 100644 index 0000000..df49e5a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_56_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_04_training_observables new file mode 100644 index 0000000..bfbcc6a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_24_training_observables new file mode 100644 index 0000000..8a9db70 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_44_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_44_training_observables new file mode 100644 index 0000000..8ed3cc7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_57_44_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_12_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_12_plot_model_0 new file mode 100644 index 0000000..70be60e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_12_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_12_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_12_plot_model_1 new file mode 100644 index 0000000..3b9e2bf Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_12_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_13_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_13_plot_model_2 new file mode 100644 index 0000000..874c535 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_13_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_40_training_observables new file mode 100644 index 0000000..a3bf9d8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_06_59_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_00_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_00_training_observables new file mode 100644 index 0000000..120d9cb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_00_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_20_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_20_training_observables new file mode 100644 index 0000000..4aba805 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_20_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_39_training_observables new file mode 100644 index 0000000..af966a4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_59_training_observables new file mode 100644 index 0000000..a03fae2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_00_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_01_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_01_19_training_observables new file mode 100644 index 0000000..1fd7488 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_01_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_01_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_01_39_training_observables new file mode 100644 index 0000000..e4bc30c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_01_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_40_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_40_plot_model_0 new file mode 100644 index 0000000..aeeb762 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_40_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_40_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_40_plot_model_1 new file mode 100644 index 0000000..8a37435 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_40_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_41_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_41_plot_model_2 new file mode 100644 index 0000000..c866cc5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_03_41_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_06_training_observables new file mode 100644 index 0000000..e1ee469 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_25_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_25_training_observables new file mode 100644 index 0000000..4c72ab6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_25_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_45_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_45_training_observables new file mode 100644 index 0000000..83055b0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_04_45_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_05_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_05_training_observables new file mode 100644 index 0000000..b84245e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_05_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_24_training_observables new file mode 100644 index 0000000..625dcd4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_44_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_44_training_observables new file mode 100644 index 0000000..9999044 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_05_44_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_04_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_04_training_observables new file mode 100644 index 0000000..85a8a36 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_04_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_24_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_24_training_observables new file mode 100644 index 0000000..970a5e4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_24_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_43_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_43_training_observables new file mode 100644 index 0000000..38d2be0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_06_43_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_03_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_03_training_observables new file mode 100644 index 0000000..df7603d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_03_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_23_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_23_training_observables new file mode 100644 index 0000000..658ca2a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_23_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_43_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_43_training_observables new file mode 100644 index 0000000..ef043cd Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_07_43_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_02_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_02_training_observables new file mode 100644 index 0000000..b53997f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_02_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_22_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_22_training_observables new file mode 100644 index 0000000..091fc87 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_22_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_42_training_observables new file mode 100644 index 0000000..b982d49 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_08_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_02_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_02_training_observables new file mode 100644 index 0000000..165a841 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_02_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_22_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_22_training_observables new file mode 100644 index 0000000..a3fb616 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_22_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_42_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_42_training_observables new file mode 100644 index 0000000..e3a3a01 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_09_42_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_01_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_01_training_observables new file mode 100644 index 0000000..ff2149a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_01_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_21_training_observables new file mode 100644 index 0000000..73e4019 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_41_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_41_training_observables new file mode 100644 index 0000000..505b789 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_10_41_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_11_01_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_11_01_training_observables new file mode 100644 index 0000000..c3736e7 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_11_01_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_0 new file mode 100644 index 0000000..a51ce30 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_1 new file mode 100644 index 0000000..7d3bc8c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_2 new file mode 100644 index 0000000..1859d6e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_12_24_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_14_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_14_training_observables new file mode 100644 index 0000000..26db334 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_14_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_34_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_34_training_observables new file mode 100644 index 0000000..cdb4d24 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_34_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_54_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_54_training_observables new file mode 100644 index 0000000..a2af686 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_14_54_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_14_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_14_training_observables new file mode 100644 index 0000000..52d70d8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_14_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_34_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_34_training_observables new file mode 100644 index 0000000..d03dbb4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_34_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_54_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_54_training_observables new file mode 100644 index 0000000..bfa3779 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_15_54_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_16_14_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_16_14_training_observables new file mode 100644 index 0000000..1583066 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_16_14_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_16_34_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_16_34_training_observables new file mode 100644 index 0000000..179c100 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_16_34_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_36_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_36_plot_model_0 new file mode 100644 index 0000000..970f3e9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_36_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_36_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_36_plot_model_1 new file mode 100644 index 0000000..c3d6ed4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_36_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_37_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_37_plot_model_2 new file mode 100644 index 0000000..0ab5d3c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_17_37_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_18_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_18_26_training_observables new file mode 100644 index 0000000..89b852a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_18_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_18_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_18_46_training_observables new file mode 100644 index 0000000..a5ea44e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_18_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_06_training_observables new file mode 100644 index 0000000..e9a3795 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_26_training_observables new file mode 100644 index 0000000..e282c08 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_46_training_observables new file mode 100644 index 0000000..cb0c4fc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_19_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_06_training_observables new file mode 100644 index 0000000..480b1bc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_26_training_observables new file mode 100644 index 0000000..d030ecf Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_46_training_observables new file mode 100644 index 0000000..110545f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_20_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_06_training_observables new file mode 100644 index 0000000..e6afb80 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_26_training_observables new file mode 100644 index 0000000..14f122a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_46_training_observables new file mode 100644 index 0000000..d7d0a53 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_21_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_06_training_observables new file mode 100644 index 0000000..25dbf97 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_26_training_observables new file mode 100644 index 0000000..d57a7e9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_46_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_46_training_observables new file mode 100644 index 0000000..5865cd4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_22_46_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_23_06_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_23_06_training_observables new file mode 100644 index 0000000..4f8516f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_23_06_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_23_26_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_23_26_training_observables new file mode 100644 index 0000000..49429d8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_23_26_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_21_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_21_plot_model_0 new file mode 100644 index 0000000..0c2a3eb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_21_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_22_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_22_plot_model_1 new file mode 100644 index 0000000..8c14535 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_22_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_22_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_22_plot_model_2 new file mode 100644 index 0000000..d9af404 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_24_22_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_01_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_01_training_observables new file mode 100644 index 0000000..16e36c2 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_01_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_21_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_21_training_observables new file mode 100644 index 0000000..78bf384 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_21_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_40_training_observables new file mode 100644 index 0000000..3820af1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_25_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_00_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_00_training_observables new file mode 100644 index 0000000..1f57bb0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_00_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_20_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_20_training_observables new file mode 100644 index 0000000..9d0aae1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_20_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_40_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_40_training_observables new file mode 100644 index 0000000..b5b48f4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_26_40_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_00_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_00_training_observables new file mode 100644 index 0000000..5b7691d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_00_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_19_training_observables new file mode 100644 index 0000000..b1f6fb3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_39_training_observables new file mode 100644 index 0000000..fa2cc1b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_59_training_observables new file mode 100644 index 0000000..b52bece Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_27_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_19_training_observables new file mode 100644 index 0000000..c18e575 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_39_training_observables new file mode 100644 index 0000000..2abb85c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_59_training_observables new file mode 100644 index 0000000..6d9a4f0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_28_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_19_training_observables new file mode 100644 index 0000000..f355331 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_39_training_observables new file mode 100644 index 0000000..65e74c6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_59_training_observables new file mode 100644 index 0000000..b253683 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_29_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_19_training_observables new file mode 100644 index 0000000..7f2d322 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_39_training_observables new file mode 100644 index 0000000..2c21bc6 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_59_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_59_training_observables new file mode 100644 index 0000000..52cc2f1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_30_59_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_31_19_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_31_19_training_observables new file mode 100644 index 0000000..c544a5b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_31_19_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_31_39_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_31_39_training_observables new file mode 100644 index 0000000..400f621 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_31_39_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_58_plot_model_0 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_58_plot_model_0 new file mode 100644 index 0000000..ba2a616 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_58_plot_model_0 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_59_plot_model_1 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_59_plot_model_1 new file mode 100644 index 0000000..b85ae4c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_59_plot_model_1 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_59_plot_model_2 b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_59_plot_model_2 new file mode 100644 index 0000000..cf6bf8f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_32_59_plot_model_2 differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_13_training_observables new file mode 100644 index 0000000..d3f8c67 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_33_training_observables new file mode 100644 index 0000000..768f7c0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_53_training_observables new file mode 100644 index 0000000..bb7ac2e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_34_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_13_training_observables new file mode 100644 index 0000000..c6a039e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_33_training_observables new file mode 100644 index 0000000..16ae595 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_53_training_observables new file mode 100644 index 0000000..fe13447 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_35_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_13_training_observables new file mode 100644 index 0000000..1deefb1 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_32_training_observables new file mode 100644 index 0000000..cd0da8f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_52_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_52_training_observables new file mode 100644 index 0000000..d93ab17 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_36_52_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_12_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_12_training_observables new file mode 100644 index 0000000..a8030e3 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_12_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_32_training_observables new file mode 100644 index 0000000..1f5171e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_52_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_52_training_observables new file mode 100644 index 0000000..7e777a8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_37_52_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_12_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_12_training_observables new file mode 100644 index 0000000..a1db9f0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_12_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_33_training_observables new file mode 100644 index 0000000..28acef8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_53_training_observables new file mode 100644 index 0000000..2500554 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_38_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_12_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_12_training_observables new file mode 100644 index 0000000..2b14c74 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_12_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_32_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_32_training_observables new file mode 100644 index 0000000..abbabda Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_32_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_53_training_observables new file mode 100644 index 0000000..5091818 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_39_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_13_training_observables new file mode 100644 index 0000000..5bc7993 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_33_training_observables new file mode 100644 index 0000000..b2b370d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_53_training_observables new file mode 100644 index 0000000..b4936b0 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_40_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_13_training_observables new file mode 100644 index 0000000..22e80c8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_33_training_observables new file mode 100644 index 0000000..868c62e Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_53_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_53_training_observables new file mode 100644 index 0000000..506468d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_41_53_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_13_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_13_training_observables new file mode 100644 index 0000000..01c975c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_13_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_33_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_33_training_observables new file mode 100644 index 0000000..692a1f4 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_33_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_56_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_56_training_observables new file mode 100644 index 0000000..74c1d2b Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_42_56_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_17_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_17_training_observables new file mode 100644 index 0000000..5658b7f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_17_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_37_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_37_training_observables new file mode 100644 index 0000000..b01bb3f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_37_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_57_training_observables new file mode 100644 index 0000000..95e85ec Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_43_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_17_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_17_training_observables new file mode 100644 index 0000000..ba009cc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_17_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_37_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_37_training_observables new file mode 100644 index 0000000..eb1261f Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_37_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_57_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_57_training_observables new file mode 100644 index 0000000..0c1ac44 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_44_57_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_18_training_observables new file mode 100644 index 0000000..c4770fb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_38_training_observables new file mode 100644 index 0000000..969a5ce Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_58_training_observables new file mode 100644 index 0000000..61eea88 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_45_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_18_training_observables new file mode 100644 index 0000000..0dae9b9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_38_training_observables new file mode 100644 index 0000000..9262f29 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_58_training_observables new file mode 100644 index 0000000..2f16bc9 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_46_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_18_training_observables new file mode 100644 index 0000000..498579d Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_38_training_observables new file mode 100644 index 0000000..4f2c106 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_58_training_observables new file mode 100644 index 0000000..a33077c Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_47_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_18_training_observables new file mode 100644 index 0000000..4075739 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_38_training_observables new file mode 100644 index 0000000..96e0c85 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_58_training_observables new file mode 100644 index 0000000..27e232a Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_48_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_18_training_observables new file mode 100644 index 0000000..d49d628 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_38_training_observables new file mode 100644 index 0000000..508c7dc Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_58_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_58_training_observables new file mode 100644 index 0000000..212ddf5 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_49_58_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_50_18_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_50_18_training_observables new file mode 100644 index 0000000..b1b64cb Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_50_18_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_50_38_training_observables b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_50_38_training_observables new file mode 100644 index 0000000..abbba65 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_50_38_training_observables differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_59_17_final b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_59_17_final new file mode 100644 index 0000000..42010b8 Binary files /dev/null and b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/-nr_steps_25-cr_lr-n_ep_13-m_bs_100-sim_steps_3000-m_iter_35-ensnr_3-init_200/11_06_07_59_17_final differ diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/AEdyna_clean_test.py b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/AEdyna_clean_test.py new file mode 100644 index 0000000..07edeb3 --- /dev/null +++ b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/AEdyna_clean_test.py @@ -0,0 +1,1353 @@ +import os +import pickle +from datetime import datetime +import itertools as it +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +# from stable_baselines.common.noise import NormalActionNoise + +from stable_baselines.sac.policies import MlpPolicy +# from stable_baselines.td3.policies import MlpPolicy +# from stable_baselines.common.policies import MlpPolicy +from stable_baselines import SAC as Agent +# from stable_baselines import TD3 as Agent +# from stable_baselines import PPO2 as Agent + +import tensorflow as tf + +# from local_fel_simulated_env import FelLocalEnv +# from simulated_tango import SimTangoConnection +from laser_trajectory_control_env import LaserTrajectoryControlEnv +from tango_connection import TangoConnection + +# from naf2 import NAF + +# set random seed +random_seed = 111 +np.random.seed(random_seed) +config = tf.ConfigProto( + device_count={'GPU': 0} +) +# config = None + +# tango = SimTangoConnection() +# real_env = FelLocalEnv(tango=tango) +conf_file = '/home/niky/FERMI/2020_11_05/configuration/conf_fel2.json' +tango = TangoConnection(conf_file=conf_file) +real_env = LaserTrajectoryControlEnv(tango=tango) + +# Hyper papameters +steps_per_env = 25 +init_random_steps = 200 +total_steps = 500 # 350 # 300 # 250 +num_epochs = int((total_steps - init_random_steps) / steps_per_env) + 1 + +print('Number of epochs: ', num_epochs) + +hidden_sizes = [100, 100] + +max_training_iterations = 50 +delay_before_convergence_check = 1 + +algorithm = 'SAC' + +# minibatch_size = 100 +simulated_steps = 3000 # 2500 + +model_batch_size = 100 +num_ensemble_models = 3 + +early_stopping = True +model_iter = 35 # 30 + +model_training_iterations = 10 +network_size = 15 +# Set the priors for the anchor method: +# TODO: How to set these correctly? +init_params = dict(init_stddev_1_w=np.sqrt(1), + init_stddev_1_b=np.sqrt(1), + init_stddev_2_w=1.0 / np.sqrt(network_size)) + +data_noise = 0.00000 # estimated noise variance +lambda_anchor = data_noise / (np.array([init_params['init_stddev_1_w'], + init_params['init_stddev_1_b'], + init_params['init_stddev_2_w']]) ** 2) + +# How often to check the progress of the network training +# e.g. lambda it, episode: (it + 1) % max(3, (ep+1)*2) == 0 +dynamic_wait_time = lambda it, ep: (it + 1) % 1 == 0 # + +# Learning rate as function of ep +lr_start = 1e-3 # 1e-3 +lr_end = 1e-3 # 1e-3 +lr = lambda ep: max(lr_start + ep / 30 * (lr_end - lr_start), lr_end) + +# Create the logging directory: +project_directory = 'Data_logging/Test_plots_5/' + +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-cr_lr' + '-n_ep_' + str(num_epochs) + \ + '-m_bs_' + str(model_batch_size) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + '-ensnr_' + str(num_ensemble_models) + '-init_' + str( + init_random_steps) + '/' +project_directory = project_directory + hyp_str_all + +# To label the plots: +hyp_str_all = '-nr_steps_' + str(steps_per_env) + '-n_ep_' + str(num_epochs) + \ + '-m_bs_' + str(model_batch_size) + \ + '-sim_steps_' + str(simulated_steps) + \ + '-m_iter_' + str(model_iter) + \ + '\n-ensnr_' + str(num_ensemble_models) + +if not os.path.isdir(project_directory): + os.makedirs(project_directory) + print("created folder : ", project_directory) + + +# Class for data storage during the tests +class TrajectoryBuffer(): + '''Class for data storage during the tests''' + + def __init__(self, name, directory): + self.save_frequency = 100000 + self.directory = directory + self.name = name + self.rews = [] + self.obss = [] + self.acts = [] + self.dones = [] + self.info = "" + self.idx = -1 + + def new_trajectory(self, obs): + self.idx += 1 + self.rews.append([]) + self.acts.append([]) + self.obss.append([]) + self.dones.append([]) + self.store_step(obs=obs) + + def store_step(self, obs=None, act=None, rew=None, done=None): + self.rews[self.idx].append(rew) + self.obss[self.idx].append(obs) + self.acts[self.idx].append(act) + self.dones[self.idx].append(done) + + if self.__len__() % self.save_frequency == 0: + self.save_buffer() + + def __len__(self): + assert (len(self.rews) == len(self.obss) == len(self.acts) == len(self.dones)) + return len(self.obss) + + def save_buffer(self, **kwargs): + if 'info' in kwargs: + self.info = kwargs.get('info') + now = datetime.now() + # clock_time = "{}_{}_{}_{}_".format(now.day, now.hour, now.minute, now.second) + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}_' + data = dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + # print('saving...', data) + out_put_writer = open(self.directory + clock_time + self.name, 'wb') + pickle.dump(data, out_put_writer, -1) + # pickle.dump(self.actions, out_put_writer, -1) + out_put_writer.close() + + def get_data(self): + return dict(obss=self.obss, + acts=self.acts, + rews=self.rews, + dones=self.dones, + info=self.info) + + +class MonitoringEnv(gym.Wrapper): + ''' + Gym Wrapper to store information for scaling to correct scpace and for post analysis. + ''' + + def __init__(self, env, **kwargs): + gym.Wrapper.__init__(self, env) + self.data_dict = dict() + self.environment_usage = 'default' + self.directory = project_directory + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(self.environment_usage) + + self.test_env_flag = False + + self.obs_dim = self.env.observation_space.shape + self.obs_high = self.env.observation_space.high + self.obs_low = self.env.observation_space.high + self.act_dim = self.env.action_space.shape + self.act_high = self.env.action_space.high + self.act_low = self.env.action_space.low + + # state space definition + self.observation_space = gym.spaces.Box(low=-1.0, + high=1.0, + shape=self.obs_dim, + dtype=np.float64) + + # action space definition + self.action_space = gym.spaces.Box(low=-1.0, + high=1.0, + shape=self.act_dim, + dtype=np.float64) + + # if 'test_env' in kwargs: + # self.test_env_flag = True + self.verification = False + if 'verification' in kwargs: + self.verification = kwargs.get('verification') + + def reset(self, **kwargs): + init_obs = self.env.reset(**kwargs) + # print('Reset Env: ', (init_obs),10*'-- ') + self.current_buffer.new_trajectory(init_obs) + init_obs = self.scale_state_env(init_obs) + # print('Reset Menv: ', (init_obs)) + return init_obs + + def step(self, action): + # print('a', action) + action = self.descale_action_env(action) + # print('as', action) + ob, reward, done, info = self.env.step(action) + # print('Env: ', reward) + # print('Env: ', ob, 'r:', reward, done) + self.current_buffer.store_step(obs=ob, act=action, rew=reward, done=done) + ob = self.scale_state_env(ob) + reward = self.rew_scale(reward) + # print('Menv: ', ob, 'r:', reward, done) + # print('Menv: ', reward) + return ob, reward, done, info + + def set_usage(self, usage): + self.environment_usage = usage + if usage in self.data_dict: + self.current_buffer = self.data_dict.get(usage) + else: + self.data_dict[self.environment_usage] = TrajectoryBuffer(name=self.environment_usage, + directory=self.directory) + self.current_buffer = self.data_dict.get(usage) + + def close_usage(self, usage): + # Todo: Implement to save complete data + self.current_buffer = self.data_dict.get(usage) + self.current_buffer.save_buffer() + + def scale_state_env(self, ob): + scale = (self.env.observation_space.high - self.env.observation_space.low) + return (2 * ob - (self.env.observation_space.high + self.env.observation_space.low)) / scale + # return ob + + def descale_action_env(self, act): + scale = (self.env.action_space.high - self.env.action_space.low) + return (scale * act + self.env.action_space.high + self.env.action_space.low) / 2 + # return act + + def rew_scale(self, rew): + # we only scale for the network training: + # if not self.test_env_flag: + # rew = rew * 2 + 1 + + if not self.verification: + '''Rescale reward from [-1,0] to [-1,1] for the training of the network in case of tests''' + rew = rew * 2 + 1 + # pass + # if rew < -1: + # print('Hallo was geht: ', rew) + # else: + # print('Okay...', rew) + return rew + + def save_current_buffer(self, info=''): + self.current_buffer = self.data_dict.get(self.environment_usage) + self.current_buffer.save_buffer(info=info) + print('Saved current buffer', self.environment_usage) + + def set_directory(self, directory): + self.directory = directory + + +def make_env(**kwargs): + '''Create the environement''' + return MonitoringEnv(env=real_env, **kwargs) + + +def flatten_list(tensor_list): + ''' + Flatten a list of tensors + ''' + return tf.concat([flatten(t) for t in tensor_list], axis=0) + + +def flatten(tensor): + ''' + Flatten a tensor + ''' + return tf.reshape(tensor, shape=(-1,)) + + +def test_agent(env_test, agent_op, num_games=10): + ''' + Test an agent 'agent_op', 'num_games' times + Return mean and std + ''' + games_r = [] + games_length = [] + games_dones = [] + for _ in range(num_games): + d = False + game_r = 0 + o = env_test.reset() + game_length = 0 + while not d: + try: + a_s, _ = agent_op([o]) + except: + a_s, _ = agent_op(o) + o, r, d, _ = env_test.step(a_s[0]) # Niky + game_r += r + game_length += 1 + # print(o, a_s, r) + success = r > -0.05 + # print(r) + games_r.append(game_r) + games_length.append(success) + games_dones.append(d) + return np.mean(games_r), np.std(games_r), np.mean(games_length), np.mean(games_dones) + + +class FullBuffer(): + def __init__(self): + self.rew = [] + self.obs = [] + self.act = [] + self.nxt_obs = [] + self.done = [] + + self.train_idx = [] + self.valid_idx = [] + self.idx = 0 + + def store(self, obs, act, rew, nxt_obs, done): + self.rew.append(rew) + self.obs.append(obs) + self.act.append(act) + self.nxt_obs.append(nxt_obs) + self.done.append(done) + + self.idx += 1 + + def generate_random_dataset(self, ratio=False): + '''ratio: how much for valid taken''' + rnd = np.arange(len(self.obs)) + np.random.shuffle(rnd) + self.valid_idx = rnd[:] + self.train_idx = rnd[:] # change back + if ratio: + self.valid_idx = rnd[: int(len(self.obs) * ratio)] + self.train_idx = rnd[int(len(self.obs) * ratio):] + + print('Train set:', len(self.train_idx), 'Valid set:', len(self.valid_idx)) + + def get_training_batch(self): + return np.array(self.obs)[self.train_idx], np.array(np.expand_dims(self.act, -1))[self.train_idx], \ + np.array(self.rew)[self.train_idx], np.array(self.nxt_obs)[self.train_idx], np.array(self.done)[ + self.train_idx] + + def get_valid_batch(self): + return np.array(self.obs)[self.valid_idx], np.array(np.expand_dims(self.act, -1))[self.valid_idx], \ + np.array(self.rew)[self.valid_idx], np.array(self.nxt_obs)[self.valid_idx], np.array(self.done)[ + self.valid_idx] + + def get_maximum(self): + idx = np.argmax(self.rew) + print('rew', np.array(self.rew)[idx]) + return np.array(self.obs)[idx], np.array(np.expand_dims(self.act, -1))[idx], \ + np.array(self.rew)[idx], np.array(self.nxt_obs)[idx], np.array(self.done)[ + idx] + + def __len__(self): + assert (len(self.rew) == len(self.obs) == len(self.act) == len(self.nxt_obs) == len(self.done)) + return len(self.obs) + + +class NN: + def __init__(self, x, y, y_dim, hidden_size, n, learning_rate, init_params): + self.init_params = init_params + + # set up NN + with tf.variable_scope('model_' + str(n) + '_nn'): + self.inputs = x + self.y_target = y + if True: + self.inputs = tf.scalar_mul(0.8, self.inputs) + self.layer_1_w = tf.layers.Dense(hidden_size, + activation=tf.nn.tanh, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_w'), + dtype=tf.float64), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_b'), + dtype=tf.float64)) + + self.layer_1 = self.layer_1_w.apply(self.inputs) + + self.layer_2_w = tf.layers.Dense(hidden_size, + activation=tf.nn.tanh, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_w'), + dtype=tf.float64), + bias_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_1_b'), + dtype=tf.float64)) + + self.layer_2 = self.layer_2_w.apply(self.layer_1) + # + self.output_w = tf.layers.Dense(y_dim, + activation=None, + use_bias=False, + kernel_initializer=tf.random_normal_initializer(mean=0., + stddev=self.init_params.get( + 'init_stddev_2_w'), + dtype=tf.float64)) + else: + self.layer_1_w = tf.layers.Dense(hidden_size, + activation=tf.nn.tanh + ) + + self.layer_1 = self.layer_1_w.apply(self.inputs) + + self.layer_2_w = tf.layers.Dense(hidden_size, + activation=tf.nn.tanh) + + self.layer_2 = self.layer_2_w.apply(self.layer_1) + + self.output_w = tf.layers.Dense(y_dim, + activation=None) + # # + + self.output = self.output_w.apply(self.layer_2) + + # set up loss and optimiser - we'll modify this later with anchoring regularisation + self.opt_method = tf.train.AdamOptimizer(learning_rate) + # self.mse_ = 1 / tf.shape(self.inputs, out_type=tf.int64)[0] * \ + # tf.reduce_sum(tf.square(self.y_target - self.output)) + self.mse_ = tf.reduce_mean(((self.y_target - self.output)) ** 2) + self.loss_ = 1 / tf.shape(self.inputs, out_type=tf.int64)[0] * \ + tf.reduce_sum(tf.square(self.y_target - self.output)) + self.optimizer = self.opt_method.minimize(self.loss_) + self.optimizer_mse = self.opt_method.minimize(self.mse_) + + def get_weights(self, sess): + '''method to return current params''' + + ops = [self.layer_1_w.kernel, self.layer_1_w.bias, + self.layer_2_w.kernel, self.layer_2_w.bias, + self.output_w.kernel] + w1, b1, w2, b2, w = sess.run(ops) + + return w1, b1, w2, b2, w + + # def get_weights(self, sess): + # '''method to return current params''' + # + # ops = [self.layer_1_w.kernel, self.layer_1_w.bias, + # self.output_w.kernel] + # w1, b1, w = sess.run(ops) + # + # return w1, b1, w + + def anchor(self, lambda_anchor, sess): + '''regularise around initialised parameters after session has started''' + + w1, b1, w2, b2, w = self.get_weights(sess=sess) + + # get initial params to hold for future trainings + self.w1_init, self.b1_init, self.w2_init, self.b2_init, self.w_out_init = w1, b1, w2, b2, w + + loss_anchor = lambda_anchor[0] * tf.reduce_sum(tf.square(self.w1_init - self.layer_1_w.kernel)) + loss_anchor += lambda_anchor[1] * tf.reduce_sum(tf.square(self.b1_init - self.layer_1_w.bias)) + + loss_anchor = lambda_anchor[0] * tf.reduce_sum(tf.square(self.w2_init - self.layer_2_w.kernel)) + loss_anchor += lambda_anchor[1] * tf.reduce_sum(tf.square(self.b2_init - self.layer_2_w.bias)) + + loss_anchor += lambda_anchor[2] * tf.reduce_sum(tf.square(self.w_out_init - self.output_w.kernel)) + + # combine with original loss + self.loss_ = self.loss_ + tf.scalar_mul(1 / tf.shape(self.inputs)[0], loss_anchor) + self.optimizer = self.opt_method.minimize(self.loss_) + return self.optimizer, self.loss_ + + +class NetworkEnv(gym.Wrapper): + ''' + Wrapper to handle the network interaction + ''' + + def __init__(self, env, model_func=None, done_func=None, number_models=1, **kwargs): + gym.Wrapper.__init__(self, env) + + self.model_func = model_func + self.done_func = done_func + self.number_models = number_models + self.len_episode = 0 + # self.threshold = self.env.threshold + # print('the threshold is: ', self.threshold) + self.max_steps = env.max_steps + self.verification = False + if 'verification' in kwargs: + self.verification = kwargs.get('verification') + self.visualize() + + def reset(self, **kwargs): + + # self.threshold = -0.05 * 2 + 1 # rescaled [-1,1] + self.len_episode = 0 + self.done = False + # kwargs['simulation'] = True + # action = self.env.reset(**kwargs) + if self.model_func is not None: + obs = np.random.uniform(-1, 1, self.env.observation_space.shape) + # print('reset', obs) + # Todo: remove + # self.obs = self.env.reset() + # obs = self.env.reset() + else: + # obs = self.env.reset(**kwargs) + pass + # Does this work? + self.obs = np.clip(obs, -1.0, 1.0) + # self.obs = obs.copy() + # if self.test_phase: + # print('test reset', self.obs) + # print('Reset : ',self.obs) + self.current_model = np.random.randint(0, max(self.number_models, 1)) + return self.obs + + def step(self, action): + # self.visualize([np.squeeze(action)]) + if self.model_func is not None: + # predict the next state on a random model + # obs, rew = self.model_func(self.obs, [np.squeeze(action)], np.random.randint(0, self.number_models)) + + if self.verification: + # obs_cov = np.diag(np.square(np.std(obss, axis=0, ddof=1)) + data_noise) + # print(obs_std) + # obs = np.squeeze(np.random.multivariate_normal(obs_m, (obs_cov), 1)) + # obs, rew, done, info = self.env.step( + # action) # + obs, rew = self.model_func(self.obs, [np.squeeze(action)], self.number_models) + else: + # obs, rew, done, info = self.env.step( + # action) + obs, rew = self.model_func(self.obs, [np.squeeze(action)], self.current_model) + # obss = [] + # rews = [] + # for i in range(num_ensemble_models): + # + # obs, rew = self.model_func(self.obs, [np.squeeze(action)], i) + # obss.append((obs)) + # rews.append(rew) + # # idx = np.argmin(rews) + # # obs = obss[idx] + # obs_m = np.mean(obss, axis=0) + # obs = obs_m + # print(obs) + # rew = rews[idx] + # rew = np.mean(np.clip(rews, -1, 1)) + self.obs = np.clip(obs.copy(), -1, 1) + # if (self.obs == -1).any() or (self.obs == 1).any(): + # rew = -1 + rew = np.clip(rew, -1, 1) + if not self.verification: + rew = (rew - 1) / 2 + + # obs_real, rew_real, _, _ = self.env.step(action) + # obs, rew, self.done, _ = self.env.step(action) + # print('Diff: ', np.linalg.norm(obs - obs_real), np.linalg.norm(rew - rew_real)) + # print('MEnv: ', np.linalg.norm(obs ), np.linalg.norm(rew )) + # obs += np.random.randn(obs.shape[-1]) + # # Todo: remove + # self.env.state = self.obs + # done = rew > self.threshold + + self.len_episode += 1 + # print('threshold at:', self.threshold) + # For niky hardcoded reward threshold in [-1,1] space from [0,1] -0.05 => 0.9------------------------------------------------------------ + if rew > -0.05: # self.threshold: TODO: to be changed + # ---------------------------------------------------------------------------------------------------------------------- + self.done = True + # if (self.obs == -1).any() or (self.obs == 1).any(): + # print('boundary hit...', self.obs, rew) + # # print("Done", rew) + if self.len_episode >= self.max_steps: + self.done = True + # + return self.obs, rew, self.done, dict() + # return obs, rew, done, info + else: + # self.obs, rew, done, _ = real_env.step(action) + # return self.obs, rew, done, "" + pass + # return env.step(action) + + def visualize(self, data=None, label=None): + + action = [np.zeros(self.env.action_space.shape)] + state = np.zeros(self.env.observation_space.shape) + maximum = 0 + if data is not None: + action = [data[1]] + state = data[0] + maximum = (data[2] - 1) / 2 + delta = 0.05 + x = np.arange(-1, 1, delta) + y = np.arange(-1, 1, delta) + X, Y = np.meshgrid(x, y) + + if self.number_models == num_ensemble_models: + Nr = 1 + Nc = 1 + fig, axs = plt.subplots(Nr, Nc) + fig.subplots_adjust(hspace=0.3) + images = [] + for nr in range(self.number_models): + rewards = np.zeros(X.shape) + + # print(self.number_models) + for i1 in range(len(x)): + for j1 in range(len(y)): + state[0] = x[i1] + state[1] = y[j1] + rewards[i1, j1] = (self.model_func(state, [np.squeeze(action)], + nr))[1] / num_ensemble_models + axs.contour(X, Y, (rewards - 1) / 2, alpha=1) + self.save_buffer(nr, data, X, Y, rewards) + # list_combinations = list(it.combinations([0, 1, 2, 3], 2)) + # + # for i in range(Nr): + # for j in range(Nc): + # + # for nr in range(self.number_models): + # rewards = np.zeros(X.shape) + # + # # print(self.number_models) + # for i1 in range(len(x)): + # for j1 in range(len(y)): + # current_pair = list_combinations[i * Nc + j] + # state[current_pair[0]] = x[i1] + # state[current_pair[1]] = y[j1] + # rewards[i1, j1] = (self.model_func(state, [np.squeeze(action)], + # nr))[1] / num_ensemble_models + # axs[i, j].contour(X, Y, (rewards - 1) / 2, alpha=1) + # # plt.plot(np.array(states, dtype=object)[:, 1],) + # # images.append(axs[i, j].contour(X, Y, (rewards - 1) / 2, 25, alpha=1)) + # # axs[i, j].label_outer() + # plt.title(maximum) + plt.title(label) + # plt.colorbar() + # fig.show() + plt.show() + else: + pass + # action = [np.random.uniform(-1, 1, 4)] + # state_vec = np.linspace(-1, 1, 100) + # states = [] + # # print(self.number_models) + # + # for i in state_vec: + # states.append(self.model_func(np.array([i, 0, 0, 0]), action, + # self.number_models)) + # + # plt.plot(np.array(states, dtype=object)[:, 1]) + + # states = np.zeros(X.shape) + # # print(self.number_models) + # for i in range(len(x)): + # for j in range(len(y)): + # states[i, j] = (self.model_func(np.array([x[i], y[j], 0, 0]), action, + # self.number_models)[1]) + # plt.contourf(states) + + def save_buffer(self, model_nr, data, X, Y, rews, **kwargs): + if 'info' in kwargs: + self.info = kwargs.get('info') + now = datetime.now() + # clock_time = "{}_{}_{}_{}_".format(now.day, now.hour, now.minute, now.second) + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}_' + data = dict(data=data, + model=model_nr, + rews=rews, + X=X, + Y=Y) + # print('saving...', data) + out_put_writer = open(project_directory + clock_time + 'plot_model_' + str(model_nr), 'wb') + pickle.dump(data, out_put_writer, -1) + # pickle.dump(self.actions, out_put_writer, -1) + out_put_writer.close() + + +class StructEnv(gym.Wrapper): + ''' + Gym Wrapper to store information like number of steps and total reward of the last espisode. + ''' + + def __init__(self, env): + gym.Wrapper.__init__(self, env) + self.n_obs = self.env.reset() + self.total_rew = 0 + self.len_episode = 0 + + def reset(self, **kwargs): + self.n_obs = self.env.reset(**kwargs) + self.total_rew = 0 + self.len_episode = 0 + return self.n_obs.copy() + + def step(self, action): + ob, reward, done, info = self.env.step(action) + # print('reward in struct', reward) + self.total_rew += reward + self.len_episode += 1 + return ob, reward, done, info + + def get_episode_reward(self): + return self.total_rew + + def get_episode_length(self): + return self.len_episode + + +def restore_model(old_model_variables, m_variables): + # variable used as index for restoring the actor's parameters + it_v2 = tf.Variable(0, trainable=False) + + restore_m_params = [] + for m_v in m_variables: + upd_m_rsh = tf.reshape(old_model_variables[it_v2: it_v2 + tf.reduce_prod(m_v.shape)], shape=m_v.shape) + restore_m_params.append(m_v.assign(upd_m_rsh)) + it_v2 += tf.reduce_prod(m_v.shape) + + return tf.group(*restore_m_params) + + +def aedyna(env_name, hidden_sizes=[32, 32], cr_lr=5e-3, num_epochs=50, + critic_iter=10, steps_per_env=100, delta=0.05, algorithm='TRPO', conj_iters=10, + simulated_steps=1000, num_ensemble_models=2, model_iter=15, model_batch_size=512, + init_random_steps=steps_per_env): + ''' + Anchor ensemble dyna reinforcement learning + The states and actions are provided by the gym environment with the correct boxes. + The reward has to be between [-1,0]. + Parameters: + ----------- + env_name: Name of the environment + hidden_sizes: list of the number of hidden units for each layer + num_epochs: number of training epochs + number_envs: number of "parallel" synchronous environments + # NB: it isn't distributed across multiple CPUs + steps_per_env: number of steps per environment + # NB: the total number of steps per epoch will be: steps_per_env*number_envs + algorithm: type of algorithm. Either 'TRPO' or 'NPO' + minibatch_size: Batch size used to train the critic + mb_lr: learning rate of the environment model + model_batch_size: batch size of the environment model + simulated_steps: number of simulated steps for each policy update + model_iter: number of iterations without improvement before stopping training the model + ''' + model_batch_size = model_batch_size + tf.reset_default_graph() + + # Create a few environments to collect the trajectories + env = StructEnv(make_env()) + env_test = StructEnv(make_env(verification=True)) + + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape[0] + + # Placeholders for model + act_ph = tf.placeholder(shape=(None, act_dim), dtype=tf.float64, name='act') + obs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float64, name='obs') + # NEW + nobs_ph = tf.placeholder(shape=(None, obs_dim[0]), dtype=tf.float64, name='nobs') + rew_ph = tf.placeholder(shape=(None, 1), dtype=tf.float64, name='rew') + + # Placeholder for learning rate + mb_lr_ = tf.placeholder("float", None) + + old_model_variables = tf.placeholder(shape=(None,), dtype=tf.float64, name='old_model_variables') + + def variables_in_scope(scope): + # get all trainable variables in 'scope' + return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) + + ######################################################### + ######################### MODEL ######################### + ######################################################### + + m_opts = [] + m_losses = [] + + nobs_pred_m = [] + act_obs = tf.concat([obs_ph, act_ph], 1) + target = tf.concat([nobs_ph, rew_ph], 1) + + # computational graph of N models and the correct losses for the anchor method + m_classes = [] + + for i in range(num_ensemble_models): + m_class = NN(x=act_obs, y=target, y_dim=obs_dim[0] + 1, + learning_rate=mb_lr_, n=i, + hidden_size=network_size, init_params=init_params) + + nobs_pred = m_class.output + + nobs_pred_m.append(nobs_pred) + + m_classes.append(m_class) + m_losses.append(m_class.mse_) + m_opts.append(m_class.optimizer_mse) + + ##################### RESTORE MODEL ###################### + initialize_models = [] + models_variables = [] + for i in range(num_ensemble_models): + m_variables = variables_in_scope('model_' + str(i) + '_nn') + initialize_models.append(restore_model(old_model_variables, m_variables)) + # List of weights + models_variables.append(flatten_list(m_variables)) + + ######################################################### + ##################### END MODEL ######################### + ######################################################### + # Time + now = datetime.now() + clock_time = "{}_{}_{}_{}".format(now.day, now.hour, now.minute, now.second) + print('Time:', clock_time) + + hyp_str = '-spe_' + str(steps_per_env) + '-cr_lr' + str(cr_lr) + '-crit_it_' + str( + critic_iter) + '-delta_' + str(delta) + '-conj_iters_' + str(conj_iters) + + file_writer = tf.summary.FileWriter('log_dir/' + env_name + '/' + algorithm + '_' + clock_time + '_' + hyp_str, + tf.get_default_graph()) + + ################################################################################################# + + # Session start!!!!!!!! + # create a session + sess = tf.Session(config=config) + # initialize the variables + sess.run(tf.global_variables_initializer()) + + def model_op(o, a, md_idx): + mo = sess.run(nobs_pred_m[md_idx], feed_dict={obs_ph: [o], act_ph: [a[0]]}) + return np.squeeze(mo[:, :-1]), float(np.squeeze(mo[:, -1])) + + def run_model_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew): + # print({'obs_ph': r_obs.shape, 'act_ph': r_act.shape, 'nobs_ph': r_nxt_obs.shape}) + r_act = np.squeeze(r_act, axis=2) + # print(r_act.shape) + r_rew = np.reshape(r_rew, (-1, 1)) + # print(r_rew.shape) + return_val = sess.run(m_loss_anchor[model_idx], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew}) + return return_val + + def run_model_opt_loss(model_idx, r_obs, r_act, r_nxt_obs, r_rew, mb_lr): + r_act = np.squeeze(r_act, axis=2) + r_rew = np.reshape(r_rew, (-1, 1)) + # return sess.run([m_opts[model_idx], m_losses[model_idx]], + # feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr}) + return sess.run([m_opts_anchor[model_idx], m_loss_anchor[model_idx]], + feed_dict={obs_ph: r_obs, act_ph: r_act, nobs_ph: r_nxt_obs, rew_ph: r_rew, mb_lr_: mb_lr}) + + def model_assign(i, model_variables_to_assign): + ''' + Update the i-th model's parameters + ''' + return sess.run(initialize_models[i], feed_dict={old_model_variables: model_variables_to_assign}) + + def train_model(tr_obs, tr_act, tr_nxt_obs, tr_rew, v_obs, v_act, v_nxt_obs, v_rew, step_count, model_idx, mb_lr): + + # Get validation loss on the old model only used for monitoring + mb_valid_loss1 = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + # Restore the initial random weights to have a new, clean neural network + # initial_variables_models - list stored before already in the code below - + # important for the anchor method + model_assign(model_idx, initial_variables_models[model_idx]) + + # Get validation loss on the now initialized model + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + acc_m_losses = [] + + md_params = sess.run(models_variables[model_idx]) + best_mb = {'iter': 0, 'loss': mb_valid_loss, 'params': md_params} + it = 0 + + # Create mini-batch for training + lb = len(tr_obs) + + shuffled_batch = np.arange(lb) + np.random.shuffle(shuffled_batch) + + if not early_stopping: + # model_batch_size = lb + # Take a fixed accuracy + not_converged = True + while not_converged: + + # update the model on each mini-batch + last_m_losses = [] + for idx in range(0, lb, lb): + minib = shuffled_batch + + _, ml = run_model_opt_loss(model_idx, tr_obs[minib], tr_act[minib], tr_nxt_obs[minib], + tr_rew[minib], mb_lr=mb_lr) + acc_m_losses.append(ml) + last_m_losses.append(ml) + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + # mb_lr + if mb_valid_loss < max(mb_lr, 1e-4) or it > 1e5: + not_converged = False + it += 1 + + best_mb['loss'] = mb_valid_loss + best_mb['iter'] = it + # store the parameters to the array + best_mb['params'] = sess.run(models_variables[model_idx]) + else: + # Run until the number of model_iter has passed from the best val loss at it on... + ml = 1 + # while not (best_mb['iter'] < it - model_iter and ml < 5e-3): + while best_mb['iter'] > it - model_iter: + # update the model on each mini-batch + last_m_losses = [] + for idx in range(0, lb, model_batch_size): + minib = shuffled_batch[idx:min(idx + model_batch_size, lb)] + _, ml = run_model_opt_loss(model_idx, tr_obs[minib], tr_act[minib], tr_nxt_obs[minib], + tr_rew[minib], mb_lr=mb_lr) + acc_m_losses.append(ml) + last_m_losses.append(ml) + + # Check if the loss on the validation set has improved + mb_valid_loss = run_model_loss(model_idx, v_obs, v_act, v_nxt_obs, v_rew) + + if mb_valid_loss < best_mb['loss']: + best_mb['loss'] = mb_valid_loss + best_mb['iter'] = it + # store the parameters to the array + best_mb['params'] = sess.run(models_variables[model_idx]) + + it += 1 + + # Restore the model with the lower validation loss + model_assign(model_idx, best_mb['params']) + + print('Model:{}, iter:{} -- Old Val loss:{:.6f} New Val loss:{:.6f} -- ' + 'New Train loss:{:.6f} -- Loss_data {:.6f}'.format(model_idx, + it, + mb_valid_loss1, + best_mb[ + 'loss'], + np.mean( + last_m_losses), ml)) + summary = tf.Summary() + summary.value.add(tag='supplementary/m_loss', simple_value=np.mean(acc_m_losses)) + summary.value.add(tag='supplementary/iterations', simple_value=it) + file_writer.add_summary(summary, step_count) + file_writer.flush() + + def plot_results(env_wrapper, label, **kwargs): + # plotting + print('now plotting...') + rewards = env_wrapper.env.current_buffer.get_data()['rews'] + + # initial_states = env.initial_conditions + + iterations = [] + finals = [] + means = [] + stds = [] + + # init_states = pd.read_pickle('/Users/shirlaen/PycharmProjects/DeepLearning/spinningup/Environments/initData') + + for i in range(len(rewards)): + if (len(rewards[i]) > 1): + # finals.append(rewards[i][len(rewards[i]) - 1]) + finals.append(rewards[i][-1]) + means.append(np.mean(rewards[i][1:])) + stds.append(np.std(rewards[i][1:])) + iterations.append(len(rewards[i])) + # print(iterations) + x = range(len(iterations)) + iterations = np.array(iterations) + finals = np.array(finals) + means = np.array(means) + stds = np.array(stds) + + plot_suffix = label # , Fermi time: {env.TOTAL_COUNTER / 600:.1f} h' + + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.plot(x, iterations) + ax.set_ylabel('Iterations (1)') + ax.set_title(plot_suffix) + # fig.suptitle(label, fontsize=12) + if 'data_number' in kwargs: + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(x, kwargs.get('data_number'), color=color) + + ax = axs[1] + color = 'blue' + ax.set_ylabel('Final reward', color=color) # we already handled the x-label with ax1 + ax.tick_params(axis='y', labelcolor=color) + ax.plot(x, finals, color=color) + + ax.set_title('Final reward per episode') # + plot_suffix) + ax.set_xlabel('Episodes (1)') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.set_ylabel('Mean reward', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.fill_between(x, means - stds, means + stds, + alpha=0.5, edgecolor=color, facecolor='#FF9848') + ax1.plot(x, means, color=color) + + # ax.set_ylim(ax1.get_ylim()) + if 'save_name' in kwargs: + plt.savefig(kwargs.get('save_name') + '.pdf') + # fig.tight_layout() + plt.show() + + def plot_observables(data, label, **kwargs): + """plot observables during the test""" + + sim_rewards_all = np.array(data.get('sim_rewards_all')) + step_counts_all = np.array(data.get('step_counts_all')) + batch_rews_all = np.array(data.get('batch_rews_all')) + tests_all = np.array(data.get('tests_all')) + + fig, axs = plt.subplots(2, 1, sharex=True) + x = np.arange(len(batch_rews_all[0])) + ax = axs[0] + ax.step(x, batch_rews_all[0]) + ax.fill_between(x, batch_rews_all[0] - batch_rews_all[1], batch_rews_all[0] + batch_rews_all[1], + alpha=0.5) + ax.set_ylabel('rews per batch') + + ax.set_title(label) + + # plt.tw + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('data points', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.step(x, step_counts_all, color=color) + + ax = axs[1] + ax.plot(sim_rewards_all[0], ls=':') + ax.fill_between(x, sim_rewards_all[0] - sim_rewards_all[1], sim_rewards_all[0] + sim_rewards_all[1], + alpha=0.5) + try: + ax.plot(tests_all[0]) + ax.fill_between(x, tests_all[0] - tests_all[1], tests_all[0] + tests_all[1], + alpha=0.5) + ax.axhline(y=np.max(tests_all[0]), c='orange') + except: + pass + ax.set_ylabel('rewards tests') + # plt.tw + ax.grid(True) + ax2 = ax.twinx() + + color = 'lime' + ax2.set_ylabel('success', color=color) # we already handled the x-label with ax1 + ax2.tick_params(axis='y', labelcolor=color) + ax2.plot(length_all, color=color) + fig.align_labels() + plt.show() + + def save_data(data, **kwargs): + '''logging function''' + now = datetime.now() + clock_time = f'{now.month:0>2}_{now.day:0>2}_{now.hour:0>2}_{now.minute:0>2}_{now.second:0>2}' + out_put_writer = open(project_directory + clock_time + '_training_observables', 'wb') + pickle.dump(data, out_put_writer, -1) + out_put_writer.close() + + # variable to store the total number of steps + step_count = 0 + model_buffer = FullBuffer() + print('Env batch size:', steps_per_env, ' Batch size:', steps_per_env) + + # Create a simulated environment + sim_env = NetworkEnv(make_env(), model_op, None, num_ensemble_models) + + # ------------------------------------------------------------------------------------------------------ + # -------------------------------------Try to set correct anchors--------------------------------------- + # Get the initial parameters of each model + # These are used in later epochs when we aim to re-train the models anew with the new dataset + initial_variables_models = [] + for model_var in models_variables: + initial_variables_models.append(sess.run(model_var)) + + # update the anchor model losses: + m_opts_anchor = [] + m_loss_anchor = [] + for i in range(num_ensemble_models): + opt, loss = m_classes[i].anchor(lambda_anchor=lambda_anchor, sess=sess) + m_opts_anchor.append(opt) + m_loss_anchor.append(loss) + + # ------------------------------------------------------------------------------------------------------ + # -------------------------------------Try to set correct anchors--------------------------------------- + + total_iterations = 0 + + sim_rewards_all = [] + sim_rewards_std_all = [] + length_all = [] + tests_all = [] + tests_std_all = [] + batch_rews_all = [] + batch_rews_std_all = [] + step_counts_all = [] + + agent = Agent(MlpPolicy, sim_env, verbose=1) + for ep in range(num_epochs): + + # lists to store rewards and length of the trajectories completed + batch_rew = [] + batch_len = [] + print('============================', ep, '============================') + # Execute in serial the environment, storing temporarily the trajectories. + + # Todo: Test randomization stronger if reward lower...we need a good scheme + env.reset() + + # iterate over a fixed number of steps + steps_train = init_random_steps if ep == 0 else steps_per_env + + for _ in range(steps_train): + # run the policy + + if ep == 0: + # Sample random action during the first epoch + if (step_count+1) % 5 == 0: + env.reset() + act = np.random.uniform(-1, 1, size=env.action_space.shape[-1]) + + else: + + # act = sess.run(a_sampl, feed_dict={obs_ph: [env.n_obs], log_std: init_log_std}) + # act = np.clip(act + np.random.randn(act.shape[0], act.shape[1]) * 0.1, -1, 1) + act, _ = agent.predict(env.n_obs) + + act = np.squeeze(act) + # take a step in the environment + obs2, rew, done, _ = env.step(np.array(act)) + + # add the new transition to the temporary buffer + model_buffer.store(env.n_obs.copy(), act, rew.copy(), obs2.copy(), done) + + env.n_obs = obs2.copy() + step_count += 1 + + if done: + batch_rew.append(env.get_episode_reward()) + batch_len.append(env.get_episode_length()) + + env.reset() + + # save the data for plotting the collected data for the model + # env.save_current_buffer() + + print('Ep:%d Rew:%.2f -- Step:%d' % (ep, np.mean(batch_rew), step_count)) + + # env_test.env.set_usage('default') + # plot_results(env_test, f'Total {total_iterations}, ' + # f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + # f'modelit: {ep}') + + ############################################################ + ###################### MODEL LEARNING ###################### + ############################################################ + + target_threshold = max(model_buffer.rew) + sim_env.threshold = target_threshold # min(target_threshold, -0.05) + print('maximum: ', sim_env.threshold) + + mb_lr = lr(ep) + print('mb_lr: ', mb_lr) + + # Initialize randomly a training and validation set + if early_stopping: + model_buffer.generate_random_dataset(ratio=0.1) # 0.2) + else: + model_buffer.generate_random_dataset() + for i in range(num_ensemble_models): + # Initialize randomly a training and validation set + # if early_stopping: + # model_buffer.generate_random_dataset(ratio=0.1) # 0.2) + # else: + # model_buffer.generate_random_dataset() + # get both datasets + train_obs, train_act, train_rew, train_nxt_obs, _ = model_buffer.get_training_batch() + valid_obs, valid_act, valid_rew, valid_nxt_obs, _ = model_buffer.get_valid_batch() + # train the dynamic model on the datasets just sampled + train_model(train_obs, train_act, train_nxt_obs, train_rew, valid_obs, valid_act, valid_nxt_obs, valid_rew, + step_count, i, mb_lr=mb_lr) + + ############################################################ + ###################### POLICY LEARNING ###################### + ############################################################ + data = model_buffer.get_maximum() + print(data) + label = f'Total {total_iterations}, ' + \ + f'data points: {len(model_buffer)}, ' + \ + f'ep: {ep}, max: {data}\n' + hyp_str_all + sim_env.visualize(data=data, label=label) + # sim_env.visualize() + best_sim_test = -1e16 * np.ones(num_ensemble_models) + agent = Agent(MlpPolicy, sim_env, verbose=1) + for it in range(max_training_iterations): + total_iterations += 1 + print('\t Policy it', it, end='..') + + ################# Agent UPDATE ################ + agent.learn(total_timesteps=simulated_steps, log_interval=1000, reset_num_timesteps=False) + # Testing the policy on a real environment + # summary = tf.Summary() + # summary.value.add(tag='test/performance', simple_value=mn_test) + # file_writer.add_summary(summary, step_count) + # file_writer.flush() + + # Test the policy on simulated environment. + # dynamic_wait_time_count = dynamic_wait_time(ep) + if dynamic_wait_time(it, ep): + print('Iterations: ', total_iterations) + + # for niky perform test! ----------------------------- + # env_test.env.set_usage('test') + # + # mn_test, mn_test_std, mn_length, mn_success = test_agent(env_test, agent.predict, num_games=50) + # # perform test! ----------------------------- + label = f'Total {total_iterations}, ' + \ + f'data points: {len(model_buffer)}, ' + \ + f'ep: {ep}, it: {it}\n' + hyp_str_all + # + # # for niky plot results of test ----------------------------- + # # plot_results(env_test, label=label) + # + # # env_test.save_current_buffer(info=label) + # + # print(' Test score: ', np.round(mn_test, 2), np.round(mn_test_std, 2), + # np.round(mn_length, 2), np.round(mn_success, 2)) + # + # # save the data for plotting the tests + # tests_all.append(mn_test) + # tests_std_all.append(mn_test_std) + # length_all.append(mn_length) + # perform test end! ----------------------------- + env_test.env.set_usage('default') + + print('Simulated test:', end=' ** ') + + sim_rewards = [] + for i in range(num_ensemble_models): + sim_m_env = NetworkEnv(make_env(), model_op, None, number_models=i, verification=True) + mn_sim_rew, _, _, _ = test_agent(sim_m_env, agent.predict, num_games=10) + sim_rewards.append(mn_sim_rew) + print(mn_sim_rew, end=' ** ') + + print("") + + step_counts_all.append(step_count) + + sim_rewards = np.array(sim_rewards) + sim_rewards_all.append(np.mean(sim_rewards)) + sim_rewards_std_all.append(np.std(sim_rewards)) + + batch_rews_all.append(np.mean(batch_rew)) + batch_rews_std_all.append(np.std(batch_rew)) + + data = dict(sim_rewards_all=[sim_rewards_all, sim_rewards_std_all], + entropy_all=length_all, + step_counts_all=step_counts_all, + batch_rews_all=[batch_rews_all, batch_rews_std_all], + tests_all=[tests_all, tests_std_all], + info=label) + + # save the data for plotting the progress ------------------- + save_data(data=data) + + # plotting the progress ------------------- + # if it % 10 == 0: + # plot_observables(data=data, label=label) + + # stop training if the policy hasn't improved + if (np.sum(best_sim_test >= sim_rewards) > int(num_ensemble_models * 0.7)): + if it > delay_before_convergence_check and ep < num_epochs - 1: + print('break') + break + else: + best_sim_test = sim_rewards + + # Final verification: + env_test.env.set_usage('final') + mn_test, mn_test_std, mn_length, _ = test_agent(env_test, agent.predict, num_games=50) + + label = f'Verification : total {total_iterations}, ' + \ + f'data points: {len(model_buffer.train_idx) + len(model_buffer.valid_idx)}, ' + \ + f'ep: {ep}, it: {it}\n' + \ + f'rew: {mn_test}, std: {mn_test_std}' + plot_results(env_test, label=label) + + env_test.save_current_buffer(info=label) + + env_test.env.set_usage('default') + + # closing environments.. + env.close() + file_writer.close() + + +if __name__ == '__main__': + aedyna('', hidden_sizes=hidden_sizes, num_epochs=num_epochs, + steps_per_env=steps_per_env, algorithm='TRPO', model_batch_size=model_batch_size, + simulated_steps=simulated_steps, + num_ensemble_models=num_ensemble_models, model_iter=model_iter, init_random_steps=init_random_steps) + + + + + + + + + + + diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/laser_trajectory_control_env.py b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/laser_trajectory_control_env.py new file mode 100644 index 0000000..ea7fe4c --- /dev/null +++ b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/laser_trajectory_control_env.py @@ -0,0 +1,240 @@ +import numpy as np +import gym + +# from tango_connection import TangoConnection + +class LaserTrajectoryControlEnv(gym.Env): + + def __init__(self, tango, **kwargs): + self.init_rewards = [] + self.done = False + self.current_length = 0 + self.__name__ = 'LaserTrajectoryControlEnv' + + self.curr_episode = -1 + self.TOTAL_COUNTER = -1 + self.rewards = [] + self.actions = [] + self.states = [] + self.dones = [] + self.initial_conditions = [] + + self.max_length = 25 + self.max_steps = 10 + + # + self.tango = tango + + # some information from tango + self.system = self.tango.system + + self.state_size = self.tango.state_size + self.action_size = self.tango.action_size + + self.init_state = self.tango.init_state + self.init_intensity = self.tango.init_intensity + + # scaling factor definition + if 'half_range' in kwargs: + self.half_range = kwargs.get('half_range') + else: + self.half_range = 3000 + if self.system == 'eos': + self.half_range = 30000 # 30000 + + self.state_range = self.get_range() + self.state_scale = 2 * self.half_range + + # target intensity + if 'target_intensity' in kwargs: + self.target_intensity = kwargs.get('target_intensity') + else: + self.target_intensity = self.init_intensity + + # state, intensity and reward definition + self.init_state_norm = self.scale(self.init_state) + self.init_intensity_norm = self.get_intensity() + self.state = self.init_state_norm.copy() + self.intensity = self.init_intensity_norm.copy() + self.reward = self.get_reward() + + ## max action allowed + if 'max_action' in kwargs: + max_action = kwargs.get('max_action') + else: + max_action = 500 + # bigger max_action... evalueate if the size is correct! + # max_action = 6000 # 3000 + if self.system == 'eos': + max_action = 5000 # 2500 # 5000 + + self.max_action = max_action/self.state_scale + + # observation space definition + self.observation_space = gym.spaces.Box(low=0.0, #+ self.max_action, + high=1.0, #- self.max_action, + shape=(self.state_size,), + dtype=np.float64) + + # action spacec definition + self.action_space = gym.spaces.Box(low=-self.max_action, + high=self.max_action, + shape=(self.action_size,), + dtype=np.float64) + + self.test = False + + def get_range(self): + # define the available state space + state_range = np.c_[self.init_state - self.half_range, self.init_state + self.half_range] + return state_range + + def scale(self, state): + # scales the state from state_range values to [0, 1] + state_scaled = (state - self.state_range[:, 0]) / self.state_scale + return state_scaled + + def descale(self, state): + # descales the state from [0, 1] to state_range values + state_descaled = state * self.state_scale + self.state_range[:, 0] + return state_descaled + + def set_state(self, state): + # writes descaled state + state_descaled = self.descale(state) + self.tango.set_state(state_descaled) + + def get_state(self): + # read scaled state + state = self.tango.get_state() + state_scaled = self.scale(state) + return state_scaled + + def norm_intensity(self, intensity): + # normalize the intensity with respect to target_intensity + intensity_norm = intensity/self.target_intensity + return intensity_norm + + def get_intensity(self): + # read normalized intensity + intensity = self.tango.get_intensity() + intensity_norm = self.norm_intensity(intensity) + return intensity_norm + + def step(self, action): + # step method + self.current_length += 1 + state, reward = self.take_action(action) + + intensity = self.get_intensity() + if intensity > 0.95: + self.done = True + + #elif self.current_length >= self.max_length: + elif self.current_length >= self.max_steps: + self.done = True + self.add_trajectory_data(state=state, action=action, reward=reward, done=self.done) + + print('step', self.current_length,'state ', state, 'a ', action, 'r ', reward) + # self.rewards[self.curr_episode].append(reward) + + return state, reward, self.done, {} + + def take_action(self, action): + # initial value: action /= 12 (maybe too small) + # action /= 12 + # take action method + new_state = self.state + action + + # state must remain in [0, 1] + if any(np.squeeze(new_state) < 0.0) or any(np.squeeze(new_state) > 1.0): + new_state = np.clip(new_state, 0.0, 1.0) + # print('WARNING: state boundaries!') + + # set new state to the machine + self.set_state(new_state) + state = self.get_state() + self.state = state + + # get new intensity from the machine + intensity = self.get_intensity() + self.intensity = intensity + + # reward calculation + reward = self.get_reward() + self.reward = reward + + return state, reward + + def get_reward(self): + # You can change reward function, but it should depend on intensity + # e.g. next line + # reward = -(1 - self.intensity / self.target_intensity) + reward = -(1 - self.intensity / 1.0) + + # reward = self.intensity + return reward + + def reset(self): + # reset method + + self.done = False + self.current_length = 0 + + # self.curr_episode += 1 + # self.rewards.append([]) + + bad_init = True + while bad_init: + new_state = self.observation_space.sample() + + self.set_state(new_state) + state = self.get_state() + self.state = state + + intensity = self.get_intensity() + self.intensity = intensity + self.init_rewards.append(-(1 - self.intensity / 1.0)) + + bad_init = False + + self.curr_episode += 1 + self.rewards.append([]) + self.actions.append([]) + self.states.append([]) + self.dones.append([]) + # self.add_trajectory_data(state=state, action=action, reward=reward, done=done) + self.states[self.curr_episode].append(state) + + return state + + def add_trajectory_data(self, state, action, reward, done): + self.rewards[self.curr_episode].append(reward) + self.actions[self.curr_episode].append(action) + self.states[self.curr_episode].append(state) + self.dones[self.curr_episode].append(done) + + def seed(self, seed=None): + # seed method + np.random.seed(seed) + + def render(self, mode='human'): + # render method + print('ERROR\nnot yet implemented!') + pass + + +if __name__ == '__main__': + + # fel + ''' + # system = 'eos' + system = 'fel2' + path = '/home/niky/FERMI/2020_10_06/configuration/' + conf_file = 'conf_'+system+'.json' + + filename = path+conf_file + tng = TangoConnection(conf_file=filename) + env = LaserTrajectoryControlEnv(tng) + #''' + diff --git a/Data_Experiments/2020_11_05_AE_Dyna@FERMI/tango_connection.py b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/tango_connection.py new file mode 100644 index 0000000..ad1af35 --- /dev/null +++ b/Data_Experiments/2020_11_05_AE_Dyna@FERMI/tango_connection.py @@ -0,0 +1,253 @@ +import json +import time +import numpy as np +import PyTango as tango + +class TangoConnection: + + def __init__(self, conf_file, **kwargs): + + # load json configuration file + with open(conf_file) as f: + self.conf_data = json.load(f) + + self.system = self.conf_data['system'] + + # get actuators data + conf_actuators = self.conf_data['actuators'] + self.actuators_data= self.get_confdata(conf_actuators) + self.actuators_device_num = self.actuators_data[0] + self.actuators_device_list = self.actuators_data[1] + self.actuators_device_attr_num = self.actuators_data[2] + self.actuators_device_attr_list = self.actuators_data[3] + + self.actuators_size = np.sum(self.actuators_device_attr_num) + self.state_size = self.actuators_size.copy() + self.action_size = self.actuators_size.copy() + self.state = np.zeros(self.state_size) + + # get sensors data + conf_sensors = self.conf_data['sensors'] + self.sensors_data = self.get_confdata(conf_sensors) + self.sensors_device_num = self.sensors_data[0] + self.sensors_device_list = self.sensors_data[1] + self.sensors_device_attr_num = self.sensors_data[2] + self.sensors_device_attr_list = self.sensors_data[3] + + self.sensors_size = np.sum(self.sensors_device_attr_num) + self.intensity = np.zeros(1) + + # get spectrometer data + conf_spectrometer = self.conf_data['spectrometer'] + self.spectrometer_data = self.get_confdata(conf_spectrometer) + self.spectrometer_device_num = self.spectrometer_data[0] + self.spectrometer_device_list = self.spectrometer_data[1] + self.spectrometer_device_attr_num = self.spectrometer_data[2] + self.spectrometer_device_attr_list = self.spectrometer_data[3] + + # get security data + conf_security = self.conf_data['security'] + self.security_data = self.get_confdata(conf_security) + self.security_device_num = self.security_data[0] + self.security_device_list = self.security_data[1] + self.security_device_attr_num = self.security_data[2] + self.security_device_attr_list = self.security_data[3] + self.security_threshold = 100. + + if 'num_samples' in kwargs: + self.num_samples = kwargs.get('num_samples') + else: + self.num_samples = 25 # 11 # 25 # 51 # 25 + + # self.pause = 0.5 + 0.02*self.num_samples + self.pause = 0.5 + 0.02*self.num_samples + 0.25 + # self.pause = 0.5 + 0.02*self.num_samples + 1 + + if 'target_state' in kwargs: + self.target_actuators = kwargs.get('target_state') + else: + self.target_actuators = 131072 * np.ones(self.actuators_size) + + if self.system == 'sequencer': + self.set_state(self.target_actuators) + self.target_position = self.get_position() + + # read initial values for actuators and sensors + self.init_state = self.get_state() + self.init_intensity = self.get_intensity() + + self.state = self.init_state.copy() + self.intensity = self.init_intensity.copy() + + + def get_confdata(self, conf_dev): + dev_list, dev_attr_num, dev_attr_list = [], [], [] + dev_num = len(conf_dev) + for j in range(dev_num): + dev_data = conf_dev[j] + dev_name = dev_data['host'] + dev_data['address'] + dev = tango.DeviceProxy(dev_name) + dev_attr = dev_data['attributes'] + + dev_list.append(dev) + dev_attr_num.append(len(dev_attr)) + dev_attr_list.append(dev_attr) + return [dev_num, dev_list, dev_attr_num, dev_attr_list] + + def get_position(self): + position = np.zeros(self.sensors_size) + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + position[idx] = dev.read_attribute(attr_name).value + + return position + + def set_state(self, state): + self.check_charge() + self.set_actuators(state) + self.state = state + + + def get_state(self): + self.check_charge() + state = self.get_actuators() + self.state = state + return state + + def set_actuators(self, actuators_val): + + for i in range(self.actuators_device_num): + dev = self.actuators_device_list[i] + for j in range(self.actuators_device_attr_num[i]): + idx = self.actuators_device_num * i + j + attr_name = self.actuators_device_attr_list[i][j] + attr_val = actuators_val[idx] + dev.write_attribute(attr_name, attr_val) + + time.sleep(self.pause) + pass + + def get_actuators(self): + attr_val = np.zeros(self.actuators_size) + for i in range(self.actuators_device_num): + dev = self.actuators_device_list[i] + for j in range(self.actuators_device_attr_num[i]): + idx = self.actuators_device_num * i + j + attr_name = self.actuators_device_attr_list[i][j] + attr_val[idx] = dev.read_attribute(attr_name).value + return attr_val + + def get_sensors(self): + attr_val = [] + + if self.system in ['fel', 'fel1', 'fel2']: + #if self.system == 'fel' or self.system == 'fel1' or self.system == 'fel2': + attr_val = np.zeros(self.sensors_size) + attr_val_seq = np.zeros((self.sensors_size, self.num_samples)) + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + attr_val_seq[idx] = dev.command_inout(attr_name, [0, int(self.num_samples)]) + attr_val[idx] = np.median(attr_val_seq[idx]) + + elif self.system == 'sequencer': + position = self.get_position() + screen_intensity = np.zeros(self.sensors_device_num) + for i in range(self.sensors_device_num): + screen_position = position[self.sensors_device_num * i:self.sensors_device_num * i + 2] + target_position = self.target_position[self.sensors_device_num * i:self.sensors_device_num * i + 2] + difference = screen_position - target_position + distance = np.sqrt(np.power(difference, 2)) + if any(distance > 0.1): + screen_intensity[i] = 0.0 + else: + den = 2 * np.power(0.04, 2) + screen_intensity[i] = np.exp(-np.sum(np.power(difference, 2)) / den) + attr_val = screen_intensity + #''' + elif self.system == 'eos': + attr_val = np.zeros(self.sensors_size) + attr_val_seq = np.zeros((self.sensors_size, self.num_samples)) + idx = 0 + for i in range(self.sensors_device_num): + dev = self.sensors_device_list[i] + for j in range(self.sensors_device_attr_num[i]): + # idx = self.sensors_device_num * i + j + attr_name = self.sensors_device_attr_list[i][j] + attr_val_seq[idx] = dev.command_inout(attr_name, [0, int(self.num_samples)]) + attr_val[idx] = np.median(attr_val_seq[idx]) + idx += 1 + #''' + return attr_val + + def get_intensity(self): + self.check_charge() + attr_val = self.get_sensors() + intensity = np.prod(attr_val) + self.intensity = intensity + return intensity + + def get_image(self): + self.check_charge() + attr_val = [] + for i in range(self.spectrometer_device_num): + dev = self.spectrometer_device_list[i] + for j in range(self.spectrometer_device_attr_num[i]): + # idx = self.spectrometer_device_num * i + j + attr_name = self.spectrometer_device_attr_list[i][j] + attr_val.append(dev.read_attribute(attr_name).value) + return attr_val[0] + + def get_security_check(self): + attr_val = [] + for i in range(self.security_device_num): + dev = self.security_device_list[i] + for j in range(self.spectrometer_device_attr_num[i]): + # idx = self.security_device_num * i + j + attr_name = self.security_device_attr_list[i][j] + attr_val.append(dev.read_attribute(attr_name).value) + return attr_val[0] + + def check_charge(self): + if self.system in ['fel', 'fel1', 'fel2']: + #if self.system == 'fel' or self.system == 'fel1' or self.system == 'fel2': + #if self.system in ['eos', 'fel']: + # print('\nSECURITY CHECK\n') + flag = 0 + charge = self.get_security_check() + #while charge < 100.: + while charge < self.security_threshold: + flag = 1 + print('\nwait...\n') + time.sleep(5) + charge = self.get_security_check() + + if flag: + print('FEL is coming back!\nWait 1 minute more...\n') + time.sleep(60) + + + +if __name__ == '__main__': + + # sequencer + # system = 'sequencer' + # path = '/home/niky/PycharmProjects/FERMI/devel/sequencer_new/configuration/' + + # fel + ''' + # system = 'eos' + system = 'fel2' + path = '/home/niky/FERMI/2020_10_06/configuration/' + conf_file = 'conf_'+system+'.json' + + filename = path+conf_file + + tng = TangoConnection(conf_file=filename) + ''' + diff --git a/Figures/AE-DYNA_observables.png b/Figures/AE-DYNA_observables.png new file mode 100644 index 0000000..b76c7a2 Binary files /dev/null and b/Figures/AE-DYNA_observables.png differ diff --git a/Figures/AE-DYNA_verification.png b/Figures/AE-DYNA_verification.png new file mode 100644 index 0000000..6b27ec2 Binary files /dev/null and b/Figures/AE-DYNA_verification.png differ diff --git a/Figures/FERMI_all_experiments_NAF_convergence.png b/Figures/FERMI_all_experiments_NAF_convergence.png new file mode 100644 index 0000000..4b7de1f Binary files /dev/null and b/Figures/FERMI_all_experiments_NAF_convergence.png differ diff --git a/Figures/FERMI_all_experiments_NAF_episodes.png b/Figures/FERMI_all_experiments_NAF_episodes.png new file mode 100644 index 0000000..7d6bcff Binary files /dev/null and b/Figures/FERMI_all_experiments_NAF_episodes.png differ diff --git a/Figures/ME-TRPO_observables.png b/Figures/ME-TRPO_observables.png new file mode 100644 index 0000000..a2caeed Binary files /dev/null and b/Figures/ME-TRPO_observables.png differ diff --git a/Figures/ME-TRPO_verification.png b/Figures/ME-TRPO_verification.png new file mode 100644 index 0000000..630b1ad Binary files /dev/null and b/Figures/ME-TRPO_verification.png differ diff --git a/NAF_results.py b/NAF_results.py new file mode 100644 index 0000000..40ee0e3 --- /dev/null +++ b/NAF_results.py @@ -0,0 +1,205 @@ +import os +import pickle +import numpy as np +import matplotlib.pyplot as plt + + +def load_pickle_logging(file_name): + # directory = 'checkpoints/' + file_name + '/' + directory = file_name + '/' + files = [] + directory = directory + 'data/' + for f in os.listdir(directory): + if 'trajectory_data' in f and 'pkl' in f: + files.append(f) + files.sort() + print(files[-1]) + + with open(directory + files[-1], 'rb') as f: + states = pickle.load(f) + actions = pickle.load(f) + rewards = pickle.load(f) + dones = pickle.load(f) + return states, actions, rewards, dones + + +def load_pickle_final(file_name): + # directory = 'checkpoints/' + file_name + '/' + directory = file_name + '/' + file = 'plot_data_0.pkl' + + with open(directory + file, 'rb') as f: + rews = pickle.load(f) + inits = pickle.load(f) + losses = pickle.load(f) + v_s = pickle.load(f) + return rews, inits, losses, v_s + + +file_name = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_double_q_Tango_11' +states_0, actions_0, rewards_0, dones_0 = load_pickle_logging(file_name) +file_name = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_double_q_Tango_11_bis' +states_1, actions_1, rewards_1, dones_1 = load_pickle_logging(file_name) +rewards = [rewards_0, rewards_1] + +file_name_s = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11' +states_s0, actions_s0, rewards_s0, dones_s0 = load_pickle_logging(file_name_s) +file_name_s = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis' +states_s, actions_s, rewards_s, dones_s = load_pickle_logging(file_name_s) +rewards_s = [rewards_s, rewards_s0] + + +def read_rewards(rewards_in): + iterations_all = [] + final_rews_all = [] + mean_rews_all = [] + for k in range(len(rewards_in)): + rewards = rewards_in[k] + + iterations = [] + final_rews = [] + mean_rews = [] + for i in range(len(rewards)): + if len(rewards[i]) > 0: + final_rews.append(rewards[i][len(rewards[i]) - 1]) + iterations.append(len(rewards[i])) + try: + mean_rews.append(np.sum(rewards[i][1:])) + except: + mean_rews.append([]) + iterations_all.append(iterations) + final_rews_all.append(final_rews) + mean_rews_all.append(mean_rews) + + iterations = np.mean(np.array(iterations_all), axis=0) + final_rews = np.mean(np.array(final_rews_all), axis=0) + mean_rews = np.mean(np.array(mean_rews_all), axis=0) + return iterations, final_rews, mean_rews + + +def plot_results(rewards, rewards_single, **kwargs): + + iterations, final_rews, mean_rews = read_rewards(rewards) + iterations_s, final_rews_s, mean_rews_s = read_rewards(rewards_single) + + plot_suffix = "" # f', number of iterations: {env.TOTAL_COUNTER}, Linac4 time: {env.TOTAL_COUNTER / 600:.1f} h' + fig, axs = plt.subplots(2, 1, sharex=True) + + ax = axs[0] + ax.axvspan(0,100, alpha=0.2, color='coral') + color = 'blue' + ax.plot(iterations, c=color) + ax.plot(iterations_s, c=color, ls=':') + ax.set_ylabel('steps', color=color) + ax.tick_params(axis='y', labelcolor=color) + ax1 = plt.twinx(ax) + color = 'k' + ax1.plot(np.cumsum(iterations), c=color) + ax1.plot(np.cumsum(iterations_s), c=color, ls=':') + ax1.set_ylabel('cumulative steps', color=color) + ax.set_title('Iterations' + plot_suffix) + # fig.suptitle(label, fontsize=12) + + ax = axs[1] + ax.axvspan(0, 100, alpha=0.2, color='coral') + color = 'blue' + # ax.plot(starts, c=color) + ax.plot(mean_rews, c=color) + ax.plot(mean_rews_s, c=color, ls=':') + ax.set_ylabel('cum. return', color=color) + # ax.axhline(-0.05, ls=':', color='r') + ax.tick_params(axis='y', labelcolor=color) + ax.set_title('Reward per episode') # + plot_suffix) + ax.set_xlabel('episodes') + + ax1 = plt.twinx(ax) + color = 'lime' + ax1.plot(final_rews[:-1], color=color) + ax1.plot(final_rews_s[:-1], color=color, ls=':') + + ax1.set_ylabel('final return', color=color) + ax1.axhline(-0.05, ls=':', color=color) + ax1.tick_params(axis='y', labelcolor=color) + + + fig.align_labels() + fig.tight_layout() + # fig.suptitle('NonUniformImage class', fontsize='large') + if 'save_name' in kwargs: + save_name = kwargs.get('save_name') + plt.savefig(save_name + '_episodes.pdf') + plt.savefig(save_name + '_episodes.png') + plt.show() + + + +label = 'FERMI_all_experiments_NAF' +# +# plot_results(rewards, rewards_s, label) + + +def read_losses_v_s(losses0, v_s0, max_length): + losses_all = [] + v_s_all = [] + for k in range(len(losses0)): + + losses = losses0[k] + print(len(losses)) + v_s = v_s0[k] + losses_all.append(losses[:max_length]) + v_s_all.append(v_s[:max_length]) + losses = np.mean(losses_all, axis=0) + v_s = np.mean(v_s_all, axis=0) + return losses, v_s + + +file_name = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_double_q_Tango_11' +rews0, inits0, losses0, v_s0 = load_pickle_final(file_name) +file_name = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_double_q_Tango_11_bis' +rews1, inits1, losses1, v_s1 = load_pickle_final(file_name) +losses, v_s = read_losses_v_s([losses0, losses1], [v_s0, v_s1], 691) +rewards = [rews0, rews1] + +file_name = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11' +rews0, inits0, losses0, v_s0 = load_pickle_final(file_name) +file_name = 'Data_Experiments/2020_07_20_NAF@FERMI/FEL_training_100_single_q_Tango_11_bis' +rews1, inits1, losses1, v_s1 = load_pickle_final(file_name) +losses_s, v_s_s = read_losses_v_s([losses0, losses1], [v_s0, v_s1], 691) + +rewards_s = [rews0, rews1] + + +def plot_convergence(losses, v_s, losses_s, v_s_s, label, **kwargs): + fig, ax = plt.subplots() + ax.set_title(label) + ax.set_xlabel('steps') + + color = 'tab:blue' + ax.semilogy(losses, color=color) + ax.semilogy(losses_s, color=color, ls=':') + ax.tick_params(axis='y', labelcolor=color) + ax.set_ylabel('Bellman error', color=color) + # ax.set_ylim(0, 1) + + ax1 = plt.twinx(ax) + # ax1.set_ylim(-2, 1) + color = 'lime' + + ax1.set_ylabel('V', color=color) # we already handled the x-label with ax1 + ax1.tick_params(axis='y', labelcolor=color) + ax1.plot(v_s, color=color) + ax1.plot(v_s_s, color=color, ls=':') + plt.tight_layout() + if 'save_name' in kwargs: + save_name = kwargs.get('save_name') + plt.savefig(save_name + '_convergence' + '.pdf') + plt.savefig(save_name + '_convergence' + '.png') + plt.show() + +label = 'FERMI_all_experiments_NAF' +save_name = 'Figures/' + label +plot_convergence(losses, v_s, losses_s, v_s_s, label=label, save_name=save_name) + +label = 'FERMI_all_experiments_NAF' +save_name = 'Figures/' + label +plot_results(rewards, rewards_s, save_name=save_name) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..3aead26 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# FermiPaper + +These are the results of RL tests @FERMI-FEL +A new implementation of the NAF with doule Q learning: +![NAF2_training](Figures/FERMI_all_experiments_NAF_episodes.png) +![NAF2_training](Figures/FERMI_all_experiments_NAF_convergence.png) + +A new implementation of a AE-dyna: +![AE-DYNA](Figures/AE-DYNA_observables.png) +![AE-DYNA](Figures/AE-DYNA_verification.png) +A variant of the ME-TRPO: +![ME-TRPO](Figures/ME-TRPO_observables.png) +![ME-TRPO](Figures/ME-TRPO_verification.png) +The evolution: +![ME-TRPO](Figures/Learning_evolution.png) \ No newline at end of file diff --git a/local_fel_simulated_env.py b/local_fel_simulated_env.py new file mode 100644 index 0000000..0d66356 --- /dev/null +++ b/local_fel_simulated_env.py @@ -0,0 +1,377 @@ +import pickle + +import numpy as np +import gym +# import pybobyqa +import tensorflow as tf +import matplotlib.pyplot as plt +import pandas as pd + +from simulated_tango import SimTangoConnection + + +class FelLocalEnv(gym.Env): + + def __init__(self, tango, **kwargs): + self.max_steps = 10 + print('init env ' * 20) + self.init_rewards = [] + self.done = False + self.current_length = 0 + self.__name__ = 'FelLocalEnv' + + self.curr_episode = -1 + self.TOTAL_COUNTER = -1 + + self.rewards = [] + self.states = [] + self.actions = [] + self.dones = [] + + self.initial_conditions = [] + + # tango = SimTangoConnection() simulates the behaviour of the system we want to control + self.tango = tango + + # some information from tango + self.state_size = self.tango.state_size + self.action_size = self.tango.action_size + + self.target_state = self.tango.target_state + self.target_intensity = self.tango.target_intensity + + # current state + self.init_state = self.tango.state + + # scaling factor definition + if 'half_range' in kwargs: + self.half_range = kwargs.get('half_range') + else: + self.half_range = 3000 + + self.state_range = self.get_range() + self.state_scale = 2 * self.half_range + + # state, intensity and reward first definition + self.state = self.scale(self.init_state) + self.intensity = self.get_intensity() + self.reward = self.get_reward() + + # max action allowed + if 'max_action' in kwargs: + max_action = kwargs.get('max_action') + else: + max_action = 500 + # max_action = 6000 + self.max_action = max_action / self.state_scale + + print('max_action', max_action) + + # state space definition + self.observation_space = gym.spaces.Box(low=0.0, + high=1.0, + shape=(self.state_size,), + dtype=np.float64) + + # action space definition + self.action_space = gym.spaces.Box(low=-self.max_action, + high=self.max_action, + shape=(self.state_size,), + dtype=np.float64) + self.test = False + + print('real env scale:', self.action_space.low, self.action_space.high, self.observation_space.low, + self.observation_space.high) + + def get_range(self): + # defines the available state space + state_range = np.c_[self.init_state - self.half_range, self.init_state + self.half_range] + return state_range + + def scale(self, state): + # scales the state from state_range values to [0, 1] + state_scaled = (state - self.state_range[:, 0]) / self.state_scale + return state_scaled + + def descale(self, state): + # descales the state from [0, 1] to state_range values + state_descaled = state * self.state_scale + self.state_range[:, 0] + return state_descaled + + def set_state(self, state): + # writes descaled state + state_descaled = self.descale(state) + self.tango.set_state(state_descaled) + + def get_state(self): + # read scaled state + state = self.tango.get_state() + state_scaled = self.scale(state) + return state_scaled + + def set_state_ext(self, state): + state_descaled = self.descale(state) + self.tango.set_state(state_descaled) + state = self.tango.get_state() + self.state = self.scale(state) + + def norm_intensity(self, intensity): + # normalize the intensity with respect to target_intensity + intensity_norm = intensity / self.target_intensity + return intensity_norm + + def get_intensity(self): + # read normalized intensity + intensity = self.tango.get_intensity() + intensity_norm = self.norm_intensity(intensity) + return intensity_norm + + def step(self, action): + action = np.squeeze(action) + # print('a', action) + # step method + self.current_length += 1 + # rescale action + # action /= 6 + # action = np.clip(action, -1, 1) + state, reward = self.take_action(action.copy()) + # state = state + 1e-4*np.random.randn(self.observation_space.shape[-1]) + # reward += 1e-4 * np.random.randn(1)[0] + intensity = self.get_intensity() + # print('intensity', intensity) + # if any(self.states[self.curr_episode][-1] == state): + # self.boundary += 1 + # print('boundary hit nr: ', self.boundary) + # else: + # self.boundary = -1 + + if intensity > .95: + self.done = True + # print('passed at', intensity) + # elif self.boundary > 10: + # self.done = True + elif self.current_length >= self.max_steps: + # print('failed at', intensity) + self.done = True + + # elif any(self.state + action)<0 or any(self.state + action)>1: + # self.done = True + + # print('step:') + # print() + # # print('s ', state) + + ######################################################################################################## + # print(self.curr_episode, self.current_length, 'state ', state, 'a ', action, 'r ', reward) + ######################################################################################################## + if self.test: + self.add_trajectory_data(state=state, action=action, reward=reward, done=self.done) + # if self.done: + # print('done at ', reward) + return state, reward, self.done, {} + + def take_action(self, action): + # print('action inner: ', np.round(action*12,2)) + # action /= 12 + # take action method + new_state = self.state + action # + 0.05*np.random.randn(action.shape[-1]) + # state must remain in [0, 1] + if any(new_state < 0.0) or any(new_state > 1.0): + new_state = np.clip(new_state, 0.0, 1.0) + # self.done = True + # print('WARNING: state boundaries!') + + # set new state to the machine + self.set_state(new_state) + state = self.get_state() + self.state = state + + # get new intensity from the machine + intensity = self.get_intensity() + self.intensity = intensity + + # reward calculation + reward = self.get_reward() + self.reward = reward + + return state, reward + + def get_reward(self): + # You can change reward function, but it should depend on intensity + # e.g. next line + reward = -(1 - self.intensity / self.target_intensity) + + # reward = self.intensity + return reward + + def reset(self, **kwargs): + # print('reset true env') + self.boundary = -1 + # reset method + self.done = False + self.current_length = 0 + # self.curr_step = 0 + + bad_init = True + while bad_init: + if 'set_state' in kwargs: + new_state = kwargs.get('set_state') + print('set_state') + else: + new_state = self.observation_space.sample() + + self.set_state(new_state) + state = self.get_state() + self.state = state + + intensity = self.get_intensity() + self.intensity = intensity + # bad_init = False if -(1 - self.intensity / self.target_intensity) > -1 else True + reward = -(1 - self.intensity / self.target_intensity) + self.init_rewards.append(reward) + bad_init = False + + done = self.intensity > .95 + action = np.zeros(self.action_space.shape) + self.curr_episode += 1 + if self.test: + # self.curr_episode += 1 + self.rewards.append([]) + self.actions.append([]) + self.states.append([]) + self.dones.append([]) + self.add_trajectory_data(state=state, action=action, reward=reward, done=done) + # print('reset',self.dones) + + # print('\n init:', -(1 - self.intensity / self.target_intensity)) + # return 2 * (state - 0.5) + return state + + def add_trajectory_data(self, state, action, reward, done): + self.rewards[self.curr_episode].append(reward) + self.actions[self.curr_episode].append(action) + self.states[self.curr_episode].append(state) + self.dones[self.curr_episode].append(done) + + def seed(self, seed=None): + # seed method + np.random.seed(seed) + + def render(self, mode='human'): + # render method + print('ERROR\nnot yet implemented!') + pass + + def store_trajectories_to_pkl(self, name, directory): + out_put_writer = open(directory + name, 'wb') + pickle.dump(self.states, out_put_writer, -1) + pickle.dump(self.actions, out_put_writer, -1) + pickle.dump(self.rewards, out_put_writer, -1) + pickle.dump(self.dones, out_put_writer, -1) + out_put_writer.close() + + +if __name__ == '__main__': + import scipy.optimize as opt + + tng = SimTangoConnection() + env = FelLocalEnv(tng) + low = env.action_space.low + high = env.action_space.high + + + def normalize(input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + return tf.math.scalar_mul(tf.convert_to_tensor(2, dtype=tf.float64), + tf.math.add(tf.convert_to_tensor(-0.5, dtype=tf.float64), + tf.multiply(tf.math.add(input, -low), 1 / (high - low)))) + + + def de_normalize(input, box): + low = tf.convert_to_tensor(box.low, dtype=tf.float64) + high = tf.convert_to_tensor(box.high, dtype=tf.float64) + return tf.math.add( + tf.multiply(tf.math.add(tf.math.scalar_mul(tf.convert_to_tensor(1 / 2, dtype=tf.float64), input), + tf.convert_to_tensor(0.5, dtype=tf.float64)), + (high - low)), low) + + + # print((env.action_space.sample() - low)/(high-low)) + # print('') + # for _ in range(1): + # s = env.reset() + # a = env.action_space.sample() + # box = env.action_space + # # ns, r = env.step(a) + # print(a) + # print(normalize(a, box=box)) + # print(de_normalize(normalize(a, box=box), box=box)) + # # print(env.action_space.low) + # # print('state:', env.descale(s)) + # # # print(a) + # # print('new state:', env.descale(ns)) + # # print('reward:', r) + # # print('') + class WrappedEnv(gym.Wrapper): + def __init__(self, env, **kwargs): + gym.Wrapper.__init__(self, env) + self.current_action = np.zeros(env.action_space.shape[0]) + + def reset(self, **kwargs): + self.current_obs = self.env.reset(**kwargs) + return self.current_obs + + def step(self, action): + self.env.state = self.current_obs + ob, reward, done, info = self.env.step(action) + return ob, reward, done, info + + + environment_instance = WrappedEnv(env=env) + + rews = [] + actions = [] + states = [] + + + def objective(action): + actions.append(action.copy()) + _, r, _, _ = environment_instance.step(action=action.copy()) + rews.append(abs(r)) + return abs(r) + + + if True: + + def constr(action): + if any(action > environment_instance.action_space.high[0]): + return -1 + elif any(action < environment_instance.action_space.low[0]): + return -1 + else: + return 1 + + init = environment_instance.reset() + print('init: ', init) + start_vector = np.zeros(environment_instance.action_space.shape[0]) + # rhobeg = 1 * environment_instance.action_space.high[0] + # print('rhobeg: ', rhobeg) + # res = opt.fmin_cobyla(objective, start_vector, [constr], rhobeg=rhobeg, rhoend=.001) + # constr = {'type': 'ineq', 'fun': lambda x: any(abs(x) > 1/12)} + # minimizer_kwargs = {"method": "COBYLA", "constraints": constr} + # res = opt.basinhopping(objective, start_vector, minimizer_kwargs=minimizer_kwargs) + # print(res) + upper = environment_instance.action_space.high*12 + lower = environment_instance.action_space.low*12 + soln = pybobyqa.solve(objective, start_vector, maxfun=500, bounds=(lower, upper), + rhobeg=1, seek_global_minimum=True) + print(soln) + + fig, axs = plt.subplots(2, sharex=True) + axs[1].plot(rews) + + pd.DataFrame(actions).plot(ax=axs[0]) + plt.show() + environment_instance.state = init + print(environment_instance.step(soln.x)) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..94e3a87 --- /dev/null +++ b/main.py @@ -0,0 +1,16 @@ +# This is a sample Python script. + +# Press ⌃R to execute it or replace it with your code. +# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings. + + +def print_hi(name): + # Use a breakpoint in the code line below to debug your script. + print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint. + + +# Press the green button in the gutter to run the script. +if __name__ == '__main__': + print_hi('PyCharm') + +# See PyCharm help at https://www.jetbrains.com/help/pycharm/ diff --git a/show_models.py b/show_models.py new file mode 100644 index 0000000..704f81a --- /dev/null +++ b/show_models.py @@ -0,0 +1,63 @@ +import os +import numpy as np +import matplotlib.pyplot as plt +# data = dict(data=data, +# model=model_nr, +# rews=rews, +# X=X, +# Y=Y) +import pickle + +project_directory = 'Data_logging/Test_plots/'+'-nr_steps_25-cr_lr-n_ep_7-m_bs_100-sim_steps_2500-m_iter_30-ensnr_3-init_100/' +files = [] +inner_list = [] +for f in os.listdir(project_directory): + if 'plot_model' in f: + if f[-1] == '0': + if len(inner_list)>0: + files.append(inner_list) + inner_list = [] + inner_list.append(f) + else: + inner_list.append(f) + +# print(files) +number = 3 +with open(project_directory+files[number][0], 'rb') as f: + file_data = pickle.load(f) +# print(file_data) + +maximum = 0 +data = file_data['data'] +if data is not None: + action = [data[1]] + state = data[0] + maximum = (data[2] - 1) / 2 +else: + action = np.zeros(4) + state = np.zeros(4) + +delta = 0.05 +x = np.arange(-1, 1, delta) +y = np.arange(-1, 1, delta) +X, Y = np.meshgrid(x, y) +Nr = 1 +Nc = 1 +fig, axs = plt.subplots(Nr, Nc) +fig.subplots_adjust(hspace=0.3) +images = [] +for pos in range(len(files[0])): + with open(project_directory+files[number][pos], 'rb') as f: + file_data = pickle.load(f) + rewards= file_data['rews'] + + # print(self.number_models) + for i1 in range(len(x)): + for j1 in range(len(y)): + state[0] = x[i1] + state[1] = y[j1] + + axs.contour(X, Y, (rewards - 1) / 2, alpha=1) + +# plt.colorbar() +fig.show() \ No newline at end of file diff --git a/simulated_tango.py b/simulated_tango.py new file mode 100644 index 0000000..5fa80fb --- /dev/null +++ b/simulated_tango.py @@ -0,0 +1,115 @@ +import numpy as np + + +class SimTangoConnection: + # Seed laser control simulator + + def __init__(self, **kwargs): + + self.system = 'LocalSimulator' + + # actuators: 2 piezo motors with 2 degree of freedom each + self.actuators_num = 2 + self.actuators_attr_num = 2 + + # sensors: 2 Coupled Charge Device + self.sensors_num = 2 + + # state: piezo-motor inputs + self.state_size = self.actuators_num * self.actuators_attr_num + + # action: variation of piezo-motor inputs + self.action_size = self.actuators_num * self.actuators_attr_num + + # state with highest intensity + if 'target_state' in kwargs: + self.target_state = kwargs.get('target_state') + else: + self.target_state = 131072 * np.ones(self.state_size) + # piezo-motor range: integers in [0, 262144] + + # response matrix + self.rm = self.get_respmatrix() + + # state definition + self.state = self.target_state.copy() + + # position on CCD when the highest intensity is reached + self.target_position = self.get_position() + + # position definition + self.position = self.target_position.copy() + + # highest intensity + self.target_intensity = self.get_intensity() + + # intensity definition + self.intensity = self.target_intensity.copy() + + + + + def get_respmatrix(self): + # response matrix + # position on CCDs = response matrix * piezo-motor inputs + rm = np.array([[-1.5570540161682593E-5, +3.2428289038152253E-7, +0.0000000000000000E-0, +0.0000000000000000E-0], + [+1.7061003855705661E-6, +1.3362442319301898E-5, +0.0000000000000000E-0, +0.0000000000000000E-0], + [+3.6504472405940234E-5, -2.7883739555787350E-8, +2.1631117516360490E-5, -1.0906340491205139E-6], + [-2.9017613830940000E-6, -2.6667704592363296E-5, -5.2804805443334150E-7, + +8.0338913621924470E-6]]) + return rm + + def get_position(self): + # return current position on CCD + position = self.rm.dot(self.state) + return position + + def set_state(self, state): + # set state in simulator + self.state = state + + def get_state(self): + # return current state + state = self.state + return state + + def get_intensity(self): + # return current intensity + # intensity calculated on laser spot position on CCDs + + position = self.get_position() + + # initialization of the intensity on each CCD + screen_intensity = np.zeros(self.sensors_num) + # acquisition of the intensity on each CCD + for i in range(self.sensors_num): + # laser spot position on CCD_i + screen_position = position[self.sensors_num*i:self.sensors_num*i+2] + # target position on CCD_i + target_position = self.target_position[self.sensors_num*i:self.sensors_num*i+2] + # current position error with respect to target position on CCD_i + difference = screen_position - target_position + # absolute value of the distance between the 2 positions + distance = np.sqrt(np.power(difference, 2)) + + # You can adapt this if condition + if any(distance > 0.1): + screen_intensity[i] = 0.0 + # if the spot is more distant than 0.1 then the spot is not in the CCD + else: + # a gaussian represent the intensity on CCD_i + + # screen_intensity[i] = 1 - np.sqrt(np.sum(distance)) + den = 2*np.power(0.04, 2) + screen_intensity[i] = np.exp(-np.sum(np.power(difference, 2))/den) + # NOTICE: + # here you can play with 0.1 in any(distance > 0.1) and 0.07 in den = 2*np.power(0.07, 2) + + # intensity is given by the product of the CCD intensities + intensity = np.prod(screen_intensity) + self.intensity = intensity + return intensity + + +if __name__ == '__main__': + tng = SimTangoConnection() diff --git a/tex/Untitled.bib b/tex/Untitled.bib new file mode 100644 index 0000000..b730ba2 --- /dev/null +++ b/tex/Untitled.bib @@ -0,0 +1,158 @@ +% Encoding: UTF-8 + +@Article{Levine2020, + author = {Sergey Levine and Aviral Kumar and George Tucker and Justin Fu}, + title = {Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems}, + abstract = {In this tutorial article, we aim to provide the reader with the conceptual tools needed to get started on research on offline reinforcement learning algorithms: reinforcement learning algorithms that utilize previously collected data, without additional online data collection. Offline reinforcement learning algorithms hold tremendous promise for making it possible to turn large datasets into powerful decision making engines. Effective offline reinforcement learning methods would be able to extract policies with the maximum possible utility out of the available data, thereby allowing automation of a wide range of decision-making domains, from healthcare and education to robotics. However, the limitations of current algorithms make this difficult. We will aim to provide the reader with an understanding of these challenges, particularly in the context of modern deep reinforcement learning methods, and describe some potential solutions that have been explored in recent work to mitigate these challenges, along with recent applications, and a discussion of perspectives on open problems in the field.}, + date = {2020-05-04}, + eprint = {2005.01643v3}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + file = {:http\://arxiv.org/pdf/2005.01643v3:PDF}, + keywords = {cs.LG, cs.AI, stat.ML}, +} + +@WWW{, + url = {https://gym.openai.com}, +} + +@Article{Dabney2020, + author = {Will Dabney and Zeb Kurth-Nelson and Naoshige Uchida and Clara Kwon Starkweather and Demis Hassabis and R{\'{e}}mi Munos and Matthew Botvinick}, + title = {A distributional code for value in dopamine-based reinforcement learning}, + doi = {10.1038/s41586-019-1924-6}, + number = {7792}, + pages = {671--675}, + volume = {577}, + journal = {Nature}, + month = {jan}, + publisher = {Springer Science and Business Media {LLC}}, + year = {2020}, +} + +@Misc{fujimoto2018addressing, + author = {Scott Fujimoto and Herke van Hoof and David Meger}, + title = {Addressing Function Approximation Error in Actor-Critic Methods}, + eprint = {1802.09477}, + archiveprefix = {arXiv}, + primaryclass = {cs.AI}, + year = {2018}, +} + +@InProceedings{Gal2016, + author = {Gal, Yarin and McAllister, Rowan and Rasmussen, Carl Edward}, + booktitle = {Data-Efficient Machine Learning workshop, ICML}, + title = {Improving PILCO with Bayesian neural network dynamics models}, + pages = {34}, + volume = {4}, + year = {2016}, +} + +@SuppBook{Haarnoja2018, + author = {Tuomas Haarnoja and Aurick Zhou and Pieter Abbeel and Sergey Levine}, + date = {2018-01-04}, + title = {Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor}, + eprint = {1801.01290v2}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Model-free deep reinforcement learning (RL) algorithms have been demonstrated on a range of challenging decision making and control tasks. However, these methods typically suffer from two major challenges: very high sample complexity and brittle convergence properties, which necessitate meticulous hyperparameter tuning. Both of these challenges severely limit the applicability of such methods to complex, real-world domains. In this paper, we propose soft actor-critic, an off-policy actor-critic deep RL algorithm based on the maximum entropy reinforcement learning framework. In this framework, the actor aims to maximize expected reward while also maximizing entropy. That is, to succeed at the task while acting as randomly as possible. Prior deep RL methods based on this framework have been formulated as Q-learning methods. By combining off-policy updates with a stable stochastic actor-critic formulation, our method achieves state-of-the-art performance on a range of continuous control benchmark tasks, outperforming prior on-policy and off-policy methods. Furthermore, we demonstrate that, in contrast to other off-policy algorithms, our approach is very stable, achieving very similar performance across different random seeds.}, + file = {:http\://arxiv.org/pdf/1801.01290v2:PDF}, + keywords = {cs.LG, cs.AI, stat.ML}, +} + +@Misc{Charles2013, + author = {Hirlaender, S.}, + title = {PER-NAF}, + howpublished = {\url{https://github.com/MathPhysSim/PER-NAF}}, + commit = {4f57d6a0e4c030202a07a60bc1bb1ed1544bf679}, + journal = {GitHub repository}, + publisher = {GitHub}, + year = {2013}, +} + +@Article{Kumar2020, + author = {Aviral Kumar and Abhishek Gupta and Sergey Levine}, + date = {2020-03-16}, + title = {DisCor: Corrective Feedback in Reinforcement Learning via Distribution Correction}, + eprint = {2003.07305v1}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Deep reinforcement learning can learn effective policies for a wide range of tasks, but is notoriously difficult to use due to instability and sensitivity to hyperparameters. The reasons for this remain unclear. When using standard supervised methods (e.g., for bandits), on-policy data collection provides "hard negatives" that correct the model in precisely those states and actions that the policy is likely to visit. We call this phenomenon "corrective feedback." We show that bootstrapping-based Q-learning algorithms do not necessarily benefit from this corrective feedback, and training on the experience collected by the algorithm is not sufficient to correct errors in the Q-function. In fact, Q-learning and related methods can exhibit pathological interactions between the distribution of experience collected by the agent and the policy induced by training on that experience, leading to potential instability, sub-optimal convergence, and poor results when learning from noisy, sparse or delayed rewards. We demonstrate the existence of this problem, both theoretically and empirically. We then show that a specific correction to the data distribution can mitigate this issue. Based on these observations, we propose a new algorithm, DisCor, which computes an approximation to this optimal distribution and uses it to re-weight the transitions used for training, resulting in substantial improvements in a range of challenging RL settings, such as multi-task learning and learning from noisy reward signals. Blog post presenting a summary of this work is available at: https://bair.berkeley.edu/blog/2020/03/16/discor/.}, + file = {:http\://arxiv.org/pdf/2003.07305v1:PDF}, + keywords = {cs.LG, stat.ML}, +} + +@Article{Kumar2019, + author = {Aviral Kumar and Justin Fu and George Tucker and Sergey Levine}, + date = {2019-06-03}, + title = {Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction}, + eprint = {1906.00949v2}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Off-policy reinforcement learning aims to leverage experience collected from prior policies for sample-efficient learning. However, in practice, commonly used off-policy approximate dynamic programming methods based on Q-learning and actor-critic methods are highly sensitive to the data distribution, and can make only limited progress without collecting additional on-policy data. As a step towards more robust off-policy algorithms, we study the setting where the off-policy experience is fixed and there is no further interaction with the environment. We identify bootstrapping error as a key source of instability in current methods. Bootstrapping error is due to bootstrapping from actions that lie outside of the training data distribution, and it accumulates via the Bellman backup operator. We theoretically analyze bootstrapping error, and demonstrate how carefully constraining action selection in the backup can mitigate it. Based on our analysis, we propose a practical algorithm, bootstrapping error accumulation reduction (BEAR). We demonstrate that BEAR is able to learn robustly from different off-policy distributions, including random and suboptimal demonstrations, on a range of continuous control tasks.}, + file = {:http\://arxiv.org/pdf/1906.00949v2:PDF}, + keywords = {cs.LG, stat.ML}, +} + +@Article{Schaul2015, + author = {Tom Schaul and John Quan and Ioannis Antonoglou and David Silver}, + date = {2015-11-18}, + title = {Prioritized Experience Replay}, + eprint = {1511.05952v4}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Experience replay lets online reinforcement learning agents remember and reuse experiences from the past. In prior work, experience transitions were uniformly sampled from a replay memory. However, this approach simply replays transitions at the same frequency that they were originally experienced, regardless of their significance. In this paper we develop a framework for prioritizing experience, so as to replay important transitions more frequently, and therefore learn more efficiently. We use prioritized experience replay in Deep Q-Networks (DQN), a reinforcement learning algorithm that achieved human-level performance across many Atari games. DQN with prioritized experience replay achieves a new state-of-the-art, outperforming DQN with uniform replay on 41 out of 49 games.}, + file = {:http\://arxiv.org/pdf/1511.05952v4:PDF}, + keywords = {cs.LG}, +} + +@Article{DBLP:journals/corr/SchulmanWDRK17, + author = {John Schulman and Filip Wolski and Prafulla Dhariwal and Alec Radford and Oleg Klimov}, + title = {Proximal Policy Optimization Algorithms}, + eprint = {1707.06347}, + url = {http://arxiv.org/abs/1707.06347}, + volume = {abs/1707.06347}, + archiveprefix = {arXiv}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib}, + journal = {CoRR}, + timestamp = {Mon, 13 Aug 2018 16:47:34 +0200}, + year = {2017}, +} + +@Article{Schulman2015, + author = {John Schulman and Sergey Levine and Philipp Moritz and Michael I. Jordan and Pieter Abbeel}, + date = {2015-02-19}, + title = {Trust Region Policy Optimization}, + eprint = {1502.05477v5}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {We describe an iterative procedure for optimizing policies, with guaranteed monotonic improvement. By making several approximations to the theoretically-justified procedure, we develop a practical algorithm, called Trust Region Policy Optimization (TRPO). This algorithm is similar to natural policy gradient methods and is effective for optimizing large nonlinear policies such as neural networks. Our experiments demonstrate its robust performance on a wide variety of tasks: learning simulated robotic swimming, hopping, and walking gaits; and playing Atari games using images of the screen as input. Despite its approximations that deviate from the theory, TRPO tends to give monotonic improvement, with little tuning of hyperparameters.}, + file = {:http\://arxiv.org/pdf/1502.05477v5:PDF}, + keywords = {cs.LG}, +} + +@Article{Wang2019, + author = {Tingwu Wang and Xuchan Bao and Ignasi Clavera and Jerrick Hoang and Yeming Wen and Eric Langlois and Shunshi Zhang and Guodong Zhang and Pieter Abbeel and Jimmy Ba}, + date = {2019-07-03}, + title = {Benchmarking Model-Based Reinforcement Learning}, + eprint = {1907.02057v1}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Model-based reinforcement learning (MBRL) is widely seen as having the potential to be significantly more sample efficient than model-free RL. However, research in model-based RL has not been very standardized. It is fairly common for authors to experiment with self-designed environments, and there are several separate lines of research, which are sometimes closed-sourced or not reproducible. Accordingly, it is an open question how these various existing MBRL algorithms perform relative to each other. To facilitate research in MBRL, in this paper we gather a wide collection of MBRL algorithms and propose over 18 benchmarking environments specially designed for MBRL. We benchmark these algorithms with unified problem settings, including noisy environments. Beyond cataloguing performance, we explore and unify the underlying algorithmic differences across MBRL algorithms. We characterize three key research challenges for future MBRL research: the dynamics bottleneck, the planning horizon dilemma, and the early-termination dilemma. Finally, to maximally facilitate future research on MBRL, we open-source our benchmark in http://www.cs.toronto.edu/~tingwuwang/mbrl.html.}, + file = {:http\://arxiv.org/pdf/1907.02057v1:PDF}, + keywords = {cs.LG, cs.AI, cs.RO, stat.ML}, +} + +@Article{Wang2019a, + author = {Tingwu Wang and Xuchan Bao and Ignasi Clavera and Jerrick Hoang and Yeming Wen and Eric Langlois and Shunshi Zhang and Guodong Zhang and Pieter Abbeel and Jimmy Ba}, + date = {2019-07-03}, + title = {Benchmarking Model-Based Reinforcement Learning}, + eprint = {1907.02057v1}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Model-based reinforcement learning (MBRL) is widely seen as having the potential to be significantly more sample efficient than model-free RL. However, research in model-based RL has not been very standardized. It is fairly common for authors to experiment with self-designed environments, and there are several separate lines of research, which are sometimes closed-sourced or not reproducible. Accordingly, it is an open question how these various existing MBRL algorithms perform relative to each other. To facilitate research in MBRL, in this paper we gather a wide collection of MBRL algorithms and propose over 18 benchmarking environments specially designed for MBRL. We benchmark these algorithms with unified problem settings, including noisy environments. Beyond cataloguing performance, we explore and unify the underlying algorithmic differences across MBRL algorithms. We characterize three key research challenges for future MBRL research: the dynamics bottleneck, the planning horizon dilemma, and the early-termination dilemma. Finally, to maximally facilitate future research on MBRL, we open-source our benchmark in http://www.cs.toronto.edu/~tingwuwang/mbrl.html.}, + file = {:http\://arxiv.org/pdf/1907.02057v1:PDF}, + keywords = {cs.LG, cs.AI, cs.RO, stat.ML}, +} + +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/tex/Untitled.bib.bak b/tex/Untitled.bib.bak new file mode 100644 index 0000000..3dc31e5 --- /dev/null +++ b/tex/Untitled.bib.bak @@ -0,0 +1,15 @@ +% Encoding: UTF-8 + +@Article{Levine2020, + author = {Sergey Levine and Aviral Kumar and George Tucker and Justin Fu}, + title = {Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems}, + abstract = {In this tutorial article, we aim to provide the reader with the conceptual tools needed to get started on research on offline reinforcement learning algorithms: reinforcement learning algorithms that utilize previously collected data, without additional online data collection. Offline reinforcement learning algorithms hold tremendous promise for making it possible to turn large datasets into powerful decision making engines. Effective offline reinforcement learning methods would be able to extract policies with the maximum possible utility out of the available data, thereby allowing automation of a wide range of decision-making domains, from healthcare and education to robotics. However, the limitations of current algorithms make this difficult. We will aim to provide the reader with an understanding of these challenges, particularly in the context of modern deep reinforcement learning methods, and describe some potential solutions that have been explored in recent work to mitigate these challenges, along with recent applications, and a discussion of perspectives on open problems in the field.}, + date = {2020-05-04}, + eprint = {2005.01643v3}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + file = {:http\://arxiv.org/pdf/2005.01643v3:PDF}, + keywords = {cs.LG, cs.AI, stat.ML}, +} + +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/tex/Untitled.bib.sav.bak b/tex/Untitled.bib.sav.bak new file mode 100644 index 0000000..b730ba2 --- /dev/null +++ b/tex/Untitled.bib.sav.bak @@ -0,0 +1,158 @@ +% Encoding: UTF-8 + +@Article{Levine2020, + author = {Sergey Levine and Aviral Kumar and George Tucker and Justin Fu}, + title = {Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems}, + abstract = {In this tutorial article, we aim to provide the reader with the conceptual tools needed to get started on research on offline reinforcement learning algorithms: reinforcement learning algorithms that utilize previously collected data, without additional online data collection. Offline reinforcement learning algorithms hold tremendous promise for making it possible to turn large datasets into powerful decision making engines. Effective offline reinforcement learning methods would be able to extract policies with the maximum possible utility out of the available data, thereby allowing automation of a wide range of decision-making domains, from healthcare and education to robotics. However, the limitations of current algorithms make this difficult. We will aim to provide the reader with an understanding of these challenges, particularly in the context of modern deep reinforcement learning methods, and describe some potential solutions that have been explored in recent work to mitigate these challenges, along with recent applications, and a discussion of perspectives on open problems in the field.}, + date = {2020-05-04}, + eprint = {2005.01643v3}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + file = {:http\://arxiv.org/pdf/2005.01643v3:PDF}, + keywords = {cs.LG, cs.AI, stat.ML}, +} + +@WWW{, + url = {https://gym.openai.com}, +} + +@Article{Dabney2020, + author = {Will Dabney and Zeb Kurth-Nelson and Naoshige Uchida and Clara Kwon Starkweather and Demis Hassabis and R{\'{e}}mi Munos and Matthew Botvinick}, + title = {A distributional code for value in dopamine-based reinforcement learning}, + doi = {10.1038/s41586-019-1924-6}, + number = {7792}, + pages = {671--675}, + volume = {577}, + journal = {Nature}, + month = {jan}, + publisher = {Springer Science and Business Media {LLC}}, + year = {2020}, +} + +@Misc{fujimoto2018addressing, + author = {Scott Fujimoto and Herke van Hoof and David Meger}, + title = {Addressing Function Approximation Error in Actor-Critic Methods}, + eprint = {1802.09477}, + archiveprefix = {arXiv}, + primaryclass = {cs.AI}, + year = {2018}, +} + +@InProceedings{Gal2016, + author = {Gal, Yarin and McAllister, Rowan and Rasmussen, Carl Edward}, + booktitle = {Data-Efficient Machine Learning workshop, ICML}, + title = {Improving PILCO with Bayesian neural network dynamics models}, + pages = {34}, + volume = {4}, + year = {2016}, +} + +@SuppBook{Haarnoja2018, + author = {Tuomas Haarnoja and Aurick Zhou and Pieter Abbeel and Sergey Levine}, + date = {2018-01-04}, + title = {Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor}, + eprint = {1801.01290v2}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Model-free deep reinforcement learning (RL) algorithms have been demonstrated on a range of challenging decision making and control tasks. However, these methods typically suffer from two major challenges: very high sample complexity and brittle convergence properties, which necessitate meticulous hyperparameter tuning. Both of these challenges severely limit the applicability of such methods to complex, real-world domains. In this paper, we propose soft actor-critic, an off-policy actor-critic deep RL algorithm based on the maximum entropy reinforcement learning framework. In this framework, the actor aims to maximize expected reward while also maximizing entropy. That is, to succeed at the task while acting as randomly as possible. Prior deep RL methods based on this framework have been formulated as Q-learning methods. By combining off-policy updates with a stable stochastic actor-critic formulation, our method achieves state-of-the-art performance on a range of continuous control benchmark tasks, outperforming prior on-policy and off-policy methods. Furthermore, we demonstrate that, in contrast to other off-policy algorithms, our approach is very stable, achieving very similar performance across different random seeds.}, + file = {:http\://arxiv.org/pdf/1801.01290v2:PDF}, + keywords = {cs.LG, cs.AI, stat.ML}, +} + +@Misc{Charles2013, + author = {Hirlaender, S.}, + title = {PER-NAF}, + howpublished = {\url{https://github.com/MathPhysSim/PER-NAF}}, + commit = {4f57d6a0e4c030202a07a60bc1bb1ed1544bf679}, + journal = {GitHub repository}, + publisher = {GitHub}, + year = {2013}, +} + +@Article{Kumar2020, + author = {Aviral Kumar and Abhishek Gupta and Sergey Levine}, + date = {2020-03-16}, + title = {DisCor: Corrective Feedback in Reinforcement Learning via Distribution Correction}, + eprint = {2003.07305v1}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Deep reinforcement learning can learn effective policies for a wide range of tasks, but is notoriously difficult to use due to instability and sensitivity to hyperparameters. The reasons for this remain unclear. When using standard supervised methods (e.g., for bandits), on-policy data collection provides "hard negatives" that correct the model in precisely those states and actions that the policy is likely to visit. We call this phenomenon "corrective feedback." We show that bootstrapping-based Q-learning algorithms do not necessarily benefit from this corrective feedback, and training on the experience collected by the algorithm is not sufficient to correct errors in the Q-function. In fact, Q-learning and related methods can exhibit pathological interactions between the distribution of experience collected by the agent and the policy induced by training on that experience, leading to potential instability, sub-optimal convergence, and poor results when learning from noisy, sparse or delayed rewards. We demonstrate the existence of this problem, both theoretically and empirically. We then show that a specific correction to the data distribution can mitigate this issue. Based on these observations, we propose a new algorithm, DisCor, which computes an approximation to this optimal distribution and uses it to re-weight the transitions used for training, resulting in substantial improvements in a range of challenging RL settings, such as multi-task learning and learning from noisy reward signals. Blog post presenting a summary of this work is available at: https://bair.berkeley.edu/blog/2020/03/16/discor/.}, + file = {:http\://arxiv.org/pdf/2003.07305v1:PDF}, + keywords = {cs.LG, stat.ML}, +} + +@Article{Kumar2019, + author = {Aviral Kumar and Justin Fu and George Tucker and Sergey Levine}, + date = {2019-06-03}, + title = {Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction}, + eprint = {1906.00949v2}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Off-policy reinforcement learning aims to leverage experience collected from prior policies for sample-efficient learning. However, in practice, commonly used off-policy approximate dynamic programming methods based on Q-learning and actor-critic methods are highly sensitive to the data distribution, and can make only limited progress without collecting additional on-policy data. As a step towards more robust off-policy algorithms, we study the setting where the off-policy experience is fixed and there is no further interaction with the environment. We identify bootstrapping error as a key source of instability in current methods. Bootstrapping error is due to bootstrapping from actions that lie outside of the training data distribution, and it accumulates via the Bellman backup operator. We theoretically analyze bootstrapping error, and demonstrate how carefully constraining action selection in the backup can mitigate it. Based on our analysis, we propose a practical algorithm, bootstrapping error accumulation reduction (BEAR). We demonstrate that BEAR is able to learn robustly from different off-policy distributions, including random and suboptimal demonstrations, on a range of continuous control tasks.}, + file = {:http\://arxiv.org/pdf/1906.00949v2:PDF}, + keywords = {cs.LG, stat.ML}, +} + +@Article{Schaul2015, + author = {Tom Schaul and John Quan and Ioannis Antonoglou and David Silver}, + date = {2015-11-18}, + title = {Prioritized Experience Replay}, + eprint = {1511.05952v4}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Experience replay lets online reinforcement learning agents remember and reuse experiences from the past. In prior work, experience transitions were uniformly sampled from a replay memory. However, this approach simply replays transitions at the same frequency that they were originally experienced, regardless of their significance. In this paper we develop a framework for prioritizing experience, so as to replay important transitions more frequently, and therefore learn more efficiently. We use prioritized experience replay in Deep Q-Networks (DQN), a reinforcement learning algorithm that achieved human-level performance across many Atari games. DQN with prioritized experience replay achieves a new state-of-the-art, outperforming DQN with uniform replay on 41 out of 49 games.}, + file = {:http\://arxiv.org/pdf/1511.05952v4:PDF}, + keywords = {cs.LG}, +} + +@Article{DBLP:journals/corr/SchulmanWDRK17, + author = {John Schulman and Filip Wolski and Prafulla Dhariwal and Alec Radford and Oleg Klimov}, + title = {Proximal Policy Optimization Algorithms}, + eprint = {1707.06347}, + url = {http://arxiv.org/abs/1707.06347}, + volume = {abs/1707.06347}, + archiveprefix = {arXiv}, + bibsource = {dblp computer science bibliography, https://dblp.org}, + biburl = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib}, + journal = {CoRR}, + timestamp = {Mon, 13 Aug 2018 16:47:34 +0200}, + year = {2017}, +} + +@Article{Schulman2015, + author = {John Schulman and Sergey Levine and Philipp Moritz and Michael I. Jordan and Pieter Abbeel}, + date = {2015-02-19}, + title = {Trust Region Policy Optimization}, + eprint = {1502.05477v5}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {We describe an iterative procedure for optimizing policies, with guaranteed monotonic improvement. By making several approximations to the theoretically-justified procedure, we develop a practical algorithm, called Trust Region Policy Optimization (TRPO). This algorithm is similar to natural policy gradient methods and is effective for optimizing large nonlinear policies such as neural networks. Our experiments demonstrate its robust performance on a wide variety of tasks: learning simulated robotic swimming, hopping, and walking gaits; and playing Atari games using images of the screen as input. Despite its approximations that deviate from the theory, TRPO tends to give monotonic improvement, with little tuning of hyperparameters.}, + file = {:http\://arxiv.org/pdf/1502.05477v5:PDF}, + keywords = {cs.LG}, +} + +@Article{Wang2019, + author = {Tingwu Wang and Xuchan Bao and Ignasi Clavera and Jerrick Hoang and Yeming Wen and Eric Langlois and Shunshi Zhang and Guodong Zhang and Pieter Abbeel and Jimmy Ba}, + date = {2019-07-03}, + title = {Benchmarking Model-Based Reinforcement Learning}, + eprint = {1907.02057v1}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Model-based reinforcement learning (MBRL) is widely seen as having the potential to be significantly more sample efficient than model-free RL. However, research in model-based RL has not been very standardized. It is fairly common for authors to experiment with self-designed environments, and there are several separate lines of research, which are sometimes closed-sourced or not reproducible. Accordingly, it is an open question how these various existing MBRL algorithms perform relative to each other. To facilitate research in MBRL, in this paper we gather a wide collection of MBRL algorithms and propose over 18 benchmarking environments specially designed for MBRL. We benchmark these algorithms with unified problem settings, including noisy environments. Beyond cataloguing performance, we explore and unify the underlying algorithmic differences across MBRL algorithms. We characterize three key research challenges for future MBRL research: the dynamics bottleneck, the planning horizon dilemma, and the early-termination dilemma. Finally, to maximally facilitate future research on MBRL, we open-source our benchmark in http://www.cs.toronto.edu/~tingwuwang/mbrl.html.}, + file = {:http\://arxiv.org/pdf/1907.02057v1:PDF}, + keywords = {cs.LG, cs.AI, cs.RO, stat.ML}, +} + +@Article{Wang2019a, + author = {Tingwu Wang and Xuchan Bao and Ignasi Clavera and Jerrick Hoang and Yeming Wen and Eric Langlois and Shunshi Zhang and Guodong Zhang and Pieter Abbeel and Jimmy Ba}, + date = {2019-07-03}, + title = {Benchmarking Model-Based Reinforcement Learning}, + eprint = {1907.02057v1}, + eprintclass = {cs.LG}, + eprinttype = {arXiv}, + abstract = {Model-based reinforcement learning (MBRL) is widely seen as having the potential to be significantly more sample efficient than model-free RL. However, research in model-based RL has not been very standardized. It is fairly common for authors to experiment with self-designed environments, and there are several separate lines of research, which are sometimes closed-sourced or not reproducible. Accordingly, it is an open question how these various existing MBRL algorithms perform relative to each other. To facilitate research in MBRL, in this paper we gather a wide collection of MBRL algorithms and propose over 18 benchmarking environments specially designed for MBRL. We benchmark these algorithms with unified problem settings, including noisy environments. Beyond cataloguing performance, we explore and unify the underlying algorithmic differences across MBRL algorithms. We characterize three key research challenges for future MBRL research: the dynamics bottleneck, the planning horizon dilemma, and the early-termination dilemma. Finally, to maximally facilitate future research on MBRL, we open-source our benchmark in http://www.cs.toronto.edu/~tingwuwang/mbrl.html.}, + file = {:http\://arxiv.org/pdf/1907.02057v1:PDF}, + keywords = {cs.LG, cs.AI, cs.RO, stat.ML}, +} + +@Comment{jabref-meta: databaseType:bibtex;}