Add num_dropout_sample option to Monte Carlo Dropout.

jereliu · edward-bot · commit 990e3e7fb1ef · 2020-06-09T11:06:55.000-07:00
PiperOrigin-RevId: 315518655
diff --git a/baselines/cifar/README.md b/baselines/cifar/README.md
@@ -6,7 +6,8 @@
 | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- |
 | Deterministic | 1e-3 / 0.159 | 99.9% / 96.0% | 1e-3 / 0.0231 | 1.05 / 76.1% / 0.153 | 1.2 (8 TPUv2 cores) | 36.5M |
 | BatchEnsemble (size=4) | 0.08 / 0.143 | 99.9% / 96.2% |  5e-5 / 0.0206 | 1.02 / 77.5% / 0.129 | 5.4 (8 TPUv2 cores) | 36.6M |
-| Dropout | 2e-3 / 0.160 | 99.9% / 95.9% | 2e-3 / 0.0241 | 1.27 / 68.8% / 0.166 | 1.2 (8 TPUv2 cores) | 36.5M |
+| Monte Carlo Dropout (size=1) | 2e-3 / 0.160 | 99.9% / 95.9% | 2e-3 / 0.0241 | 1.27 / 68.8% / 0.166 | 1.2 (8 TPUv2 cores) | 36.5M |
+| Monte Carlo Dropout (size=30) | 1e-3 / 0.145 | 99.9% / 96.1% | 1.5e-3 / 0.019 | 1.27 / 70.0% / 0.167 | 1.2 (8 TPUv2 cores) | 36.5M |
 | Ensemble (size=4) | 2e-3 / 0.114 | 99.9% / 96.6% | - / 0.010 | 0.81 / 77.9% / 0.087 | 1.2 (32 TPUv2 cores) | 146M |
 | Variational inference | 1e-3 / 0.211 | 99.9% / 94.7% | 1e-3 / 0.029 | 1.46 / 71.3% / 0.181 | 5.5 (8 TPUv2 cores) | 73M |
 
@@ -16,7 +17,8 @@
 | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- |
 | Deterministic<sup>10</sup> | 1e-3 / 0.875 | 99.9% / 79.8% | 2e-3 / 0.0857 | 2.70 / 51.37% / 0.239 | 1.1 (8 TPUv2 cores) | 36.5M |
 | BatchEnsemble (size=4) | 3e-3 / 0.740 | 99.7% / 81.5% | 2e-3 / 0.0561 | 2.49 / 54.1% / 0.191 | 5.5 (8 TPUv2 cores) | 36.6M |
-| Dropout | 1e-2 / 0.830 | 99.9% / 79.6% | 9e-3 / 0.0501 | 2.90 / 42.63%/ 0.202 | 1.1 (8 TPUv2 cores) | 36.5M |
+| Monte Carlo Dropout (size=1) | 1e-2 / 0.830 | 99.9% / 79.6% | 9e-3 / 0.0501 | 2.90 / 42.63% / 0.202 | 1.1 (8 TPUv2 cores) | 36.5M |
+| Monte Carlo Dropout (size=30) | 6e-3 / 0.785 | 99.9% / 80.7% | 5e-3 / 0.0487 | 2.73 / 46.2 / 0.207 | 1.1 (8 TPUv2 cores) | 36.5M |
 | Ensemble (size=4) | 0.003 / 0.666 | 99.9% / 82.7% | - / 0.021 | 2.27 / 54.1% / 0.138 | 1.1 (32 TPUv2 cores) | 146M |
 | Variational inference | 3e-3 / 0.944 | 99.9% / 77.8% | 2e-3 / 0.097 | 3.18 / 48.2% / 0.271 | 5.5 (8 TPUv2 cores) | 73M |
 
diff --git a/baselines/cifar/dropout.py b/baselines/cifar/dropout.py
@@ -29,7 +29,7 @@
 
 flags.DEFINE_integer('seed', 42, 'Random seed.')
 flags.DEFINE_integer('per_core_batch_size', 64, 'Batch size per TPU core/GPU.')
-flags.DEFINE_float('base_learning_rate', 0.1,
+flags.DEFINE_float('base_learning_rate', 0.05,
                    'Base learning rate when total batch size is 128. It is '
                    'scaled by the ratio of the total batch size to 128.')
 flags.DEFINE_integer('lr_warmup_epochs', 1,
@@ -40,6 +40,9 @@
                   'Epochs to decay learning rate by.')
 flags.DEFINE_float('l2', 3e-4, 'L2 regularization coefficient.')
 flags.DEFINE_float('dropout_rate', 0.1, 'Dropout rate.')
+flags.DEFINE_integer('num_dropout_samples', 1,
+                     'Number of dropout samples to use for prediction.')
+
 flags.DEFINE_enum('dataset', 'cifar10',
                   enum_values=['cifar10', 'cifar100'],
                   help='Dataset.')
@@ -50,7 +53,7 @@
 flags.DEFINE_integer('corruptions_interval', 50,
                      'Number of epochs between evaluating on the corrupted '
                      'test data. Use -1 to never evaluate.')
-flags.DEFINE_integer('checkpoint_interval', 25,
+flags.DEFINE_integer('checkpoint_interval', -1,
                      'Number of epochs between saving checkpoints. Use -1 to '
                      'never save checkpoints.')
 flags.DEFINE_integer('num_bins', 15, 'Number of bins for ECE.')
@@ -342,10 +345,19 @@ def test_step(iterator, dataset_name):
     def step_fn(inputs):
       """Per-Replica StepFn."""
       images, labels = inputs
-      logits = model(images, training=False)
-      if FLAGS.use_bfloat16:
-        logits = tf.cast(logits, tf.float32)
-      probs = tf.nn.softmax(logits)
+
+      logits_list = []
+      for _ in range(FLAGS.num_dropout_samples):
+        logits = model(images, training=False)
+        if FLAGS.use_bfloat16:
+          logits = tf.cast(logits, tf.float32)
+        logits_list.append(logits)
+
+      # Logits dimension is (num_samples, batch_size, num_classes).
+      logits_list = tf.stack(logits_list, axis=0)
+      probs_list = tf.nn.softmax(logits_list)
+      probs = tf.reduce_mean(probs_list, axis=0)
+
       negative_log_likelihood = tf.reduce_mean(
           tf.keras.losses.sparse_categorical_crossentropy(labels, probs))