Clip score to [0,1] for clarity = no-op for correct/complete runs.

PiperOrigin-RevId: 313758757 Change-Id: Idd26cfd4434e00fbc033ac38dda658e73f194388
google-deepmind · May 29, 2020 · 933e386 · 933e386
1 parent 889d6be
commit 933e386
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/bsuite/experiments/discounting_chain/analysis.py b/bsuite/experiments/discounting_chain/analysis.py
@@ -35,7 +35,8 @@ def score(df: pd.DataFrame) -> float:
   """Output a single score for discounting_chain."""
   n_eps = np.minimum(df.episode.max(), sweep.NUM_EPISODES)
   ave_return = df.loc[df.episode == n_eps, 'total_return'].mean() / n_eps
-  return 1. - 10. * (1.1 - ave_return)
+  raw_score = 1. - 10. * (1.1 - ave_return)
+  return np.clip(raw_score, 0, 1)
 
 
 def dc_preprocess(df_in: pd.DataFrame) -> pd.DataFrame: