From a54b468ca2379d0c5a1d3d56e40a52beed52b53a Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Mon, 29 May 2023 10:14:08 -0400
Subject: [PATCH 01/13] Fix small bug: only use generation-validation if asked

---
 .../projects/trainers_for_generation.py       | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/dwi_ml/training/projects/trainers_for_generation.py b/dwi_ml/training/projects/trainers_for_generation.py
index ae6c3936..256a0baa 100644
--- a/dwi_ml/training/projects/trainers_for_generation.py
+++ b/dwi_ml/training/projects/trainers_for_generation.py
@@ -132,18 +132,20 @@ def _get_latest_loss_to_supervise_best(self):
     def validate_one_batch(self, data, epoch):
         mean_loss, n = super().validate_one_batch(data, epoch)
 
-        if (epoch + 1) % self.tracking_phase_frequency == 0:
-            logger.info("Additional tracking-like generation validation "
-                        "from batch.")
-            gen_mean_loss, gen_n, percent_inv = self.generate_from_one_batch(data)
-            gen_mean_loss = gen_mean_loss.cpu().item()
-            self.tracking_valid_loss_monitor.update(gen_mean_loss, weight=n)
-            self.tracking_valid_IS_monitor.update(percent_inv, weight=n)
-        else:
-            self.tracking_valid_loss_monitor.update(
-                self.tracking_valid_loss_monitor.average_per_epoch[-1])
-            self.tracking_valid_IS_monitor.update(
-                self.tracking_valid_IS_monitor.average_per_epoch[-1])
+        if self.add_a_tracking_validation_phase:
+            if (epoch + 1) % self.tracking_phase_frequency == 0:
+                logger.info("Additional tracking-like generation validation "
+                            "from batch.")
+                gen_mean_loss, gen_n, percent_inv = \
+                    self.generate_from_one_batch(data)
+                gen_mean_loss = gen_mean_loss.cpu().item()
+                self.tracking_valid_loss_monitor.update(gen_mean_loss, weight=n)
+                self.tracking_valid_IS_monitor.update(percent_inv, weight=n)
+            else:
+                self.tracking_valid_loss_monitor.update(
+                    self.tracking_valid_loss_monitor.average_per_epoch[-1])
+                self.tracking_valid_IS_monitor.update(
+                    self.tracking_valid_IS_monitor.average_per_epoch[-1])
 
         return mean_loss, n
 

From ba75b5a7662917248f65ead1ad801248d1bdd4d7 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Tue, 30 May 2023 10:49:16 -0400
Subject: [PATCH 02/13] Add many graphs. WIP: remove Unique stuff

---
 .../projects/trainers_for_generation.py       | 276 ++++++++++++++----
 dwi_ml/training/trainers.py                   |  16 +-
 2 files changed, 231 insertions(+), 61 deletions(-)

diff --git a/dwi_ml/training/projects/trainers_for_generation.py b/dwi_ml/training/projects/trainers_for_generation.py
index 256a0baa..442dda17 100644
--- a/dwi_ml/training/projects/trainers_for_generation.py
+++ b/dwi_ml/training/projects/trainers_for_generation.py
@@ -6,9 +6,7 @@
 import numpy as np
 import torch
 from torch.nn import PairwiseDistance
-from tqdm import tqdm
 
-from dwi_ml.experiment_utils.tqdm_logging import tqdm_logging_redirect
 from dwi_ml.models.main_models import ModelWithDirectionGetter
 from dwi_ml.tracking.propagation import propagate_multiple_lines
 from dwi_ml.tracking.projects.utils import prepare_tracking_mask
@@ -17,9 +15,13 @@
 
 logger = logging.getLogger('train_logger')
 
-# Emma tests in ISMRM: a box of 30x30x30 mm sounds good.
-# So half of it, max distance = sqrt( 3 * 15^2) =
-IS_THRESHOLD = 25.98
+# Emma tests in ISMRM: sphere of 50 mm of diameter (in MI-Brain, imagining a
+# sphere encapsulated in a cube box of 50x50x50) around any point seems to
+# englobe mostly acceptable streamlines.
+# So half of it, a ray of 25 mm seems ok.
+VERY_CLOSE_THRESHOLD = 15.0
+ACCEPTABLE_THRESHOLD = 25.0
+VERY_FAR_THRESHOLD = 40.0
 
 
 class DWIMLTrainerForTrackingOneInput(DWIMLTrainerOneInput):
@@ -35,9 +37,6 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
         self.add_a_tracking_validation_phase = add_a_tracking_validation_phase
         self.tracking_phase_frequency = tracking_phase_frequency
         self.tracking_phase_nb_steps_init = tracking_phase_nb_steps_init
-        self.tracking_valid_time_monitor = TimeMonitor()
-        self.tracking_valid_IS_monitor = BatchHistoryMonitor(weighted=True)
-        self.tracking_valid_loss_monitor = BatchHistoryMonitor(weighted=True)
         self.tracking_mask_group = tracking_phase_mask_group
 
         self.tracking_mask = None
@@ -59,6 +58,21 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
                         mask_interp='nearest')
                     self.tracking_mask.move_to(self.device)
 
+        # -------- Monitors
+        self.tracking_valid_time_monitor = TimeMonitor()
+
+        # Percentage of streamlines inside a radius
+        self.tracking_very_good_IS_monitor = BatchHistoryMonitor(weighted=True)
+        self.tracking_acceptable_IS_monitor = BatchHistoryMonitor(weighted=True)
+        self.tracking_very_far_IS_monitor = BatchHistoryMonitor(weighted=True)
+
+        # Point where the streamline start diverging from "acceptable"
+        self.tracking_valid_diverg_monitor = BatchHistoryMonitor(weighted=True)
+
+        # Final distance from expected point
+        self.tracking_mean_final_distance_monitor = BatchHistoryMonitor(weighted=True)
+        self.tracking_clipped_final_distance_monitor = BatchHistoryMonitor(weighted=True)
+
     @property
     def params_for_checkpoint(self):
         p = super().params_for_checkpoint
@@ -73,41 +87,87 @@ def params_for_checkpoint(self):
 
     def _update_states_from_checkpoint(self, current_states):
         super()._update_states_from_checkpoint(current_states)
-        self.tracking_valid_loss_monitor.set_state(
+        self.tracking_very_good_IS_monitor.set_state(
+           current_states['tracking_very_good_IS_monitor_state'])
+        self.tracking_acceptable_IS_monitor.set_state(
+            current_states['tracking_acceptable_IS_monitor_state'])
+        self.tracking_very_far_IS_monitor.set_state(
+            current_states['tracking_very_far_IS_monitor_state'])
+
+        self.tracking_valid_diverg_monitor.set_state(
+            current_states['tracking_valid_diverg_monitor_state'])
+
+        self.tracking_mean_final_distance_monitor.set_state(
             current_states['tracking_valid_loss_monitor_state'])
-        self.tracking_valid_IS_monitor.set_state(
-           current_states['tracking_valid_IS_monitor_state'])
+        self.tracking_clipped_final_distance_monitor.set_state(
+            current_states['tracking_clipped_valid_loss_monitor_state'])
 
     def _prepare_checkpoint_info(self) -> dict:
         checkpoint_info = super()._prepare_checkpoint_info()
         checkpoint_info['current_states'].update({
+            'tracking_very_good_IS_monitor_state':
+                self.tracking_very_good_IS_monitor.get_state(),
+            'tracking_acceptable_IS_monitor_state':
+                self.tracking_acceptable_IS_monitor.get_state(),
+            'tracking_very_far_IS_monitor_state':
+                self.tracking_very_far_IS_monitor.get_state(),
+
+            'tracking_valid_diverg_monitor_state':
+                self.tracking_valid_diverg_monitor.get_state(),
+
             'tracking_valid_loss_monitor_state':
-                self.tracking_valid_loss_monitor.get_state(),
-            'tracking_valid_IS_monitor_state':
-                self.tracking_valid_IS_monitor.get_state(),
+                self.tracking_mean_final_distance_monitor.get_state(),
+            'tracking_clipped_valid_loss_monitor_state':
+                self.tracking_clipped_final_distance_monitor.get_state(),
         })
         return checkpoint_info
 
     def save_local_logs(self):
         super().save_local_logs()
+
         self._save_log_locally(
-            self.tracking_valid_loss_monitor.average_per_epoch,
+            self.tracking_very_good_IS_monitor.average_per_epoch,
+            "tracking_validation_very_good_IS_per_epoch_{}.npy"
+            .format(VERY_CLOSE_THRESHOLD))
+        self._save_log_locally(
+            self.tracking_acceptable_IS_monitor.average_per_epoch,
+            "tracking_validation_acceptable_IS_per_epoch_{}.npy"
+            .format(ACCEPTABLE_THRESHOLD))
+        self._save_log_locally(
+            self.tracking_very_far_IS_monitor.average_per_epoch,
+            "tracking_validation_very_far_IS_per_epoch_{}.npy"
+            .format(VERY_FAR_THRESHOLD))
+
+        self._save_log_locally(
+            self.tracking_valid_diverg_monitor.average_per_epoch,
+            "tracking_validation_diverg_per_epoch.npy")
+
+        self._save_log_locally(
+            self.tracking_mean_final_distance_monitor.average_per_epoch,
             "tracking_validation_loss_per_epoch.npy")
         self._save_log_locally(
-            self.tracking_valid_IS_monitor.average_per_epoch,
-            "tracking_validation_IS_per_epoch.npy")
+            self.tracking_clipped_final_distance_monitor.average_per_epoch,
+            "tracking_clipped_validation_loss_per_epoch.npy")
 
     def validate_one_epoch(self, epoch):
         if self.add_a_tracking_validation_phase:
-            self.tracking_valid_loss_monitor.start_new_epoch()
-            self.tracking_valid_IS_monitor.start_new_epoch()
+            self.tracking_very_good_IS_monitor.start_new_epoch()
+            self.tracking_acceptable_IS_monitor.start_new_epoch()
+            self.tracking_very_far_IS_monitor.start_new_epoch()
+            self.tracking_valid_diverg_monitor.start_new_epoch()
+            self.tracking_mean_final_distance_monitor.start_new_epoch()
+            self.tracking_clipped_final_distance_monitor.start_new_epoch()
             self.tracking_valid_time_monitor.start_new_epoch()
 
         super().validate_one_epoch(epoch)
 
         if self.add_a_tracking_validation_phase:
-            self.tracking_valid_loss_monitor.end_epoch()
-            self.tracking_valid_IS_monitor.end_epoch()
+            self.tracking_very_good_IS_monitor.end_epoch()
+            self.tracking_acceptable_IS_monitor.end_epoch()
+            self.tracking_very_far_IS_monitor.end_epoch()
+            self.tracking_valid_diverg_monitor.end_epoch()
+            self.tracking_mean_final_distance_monitor.end_epoch()
+            self.tracking_clipped_final_distance_monitor.end_epoch()
             self.tracking_valid_time_monitor.end_epoch()
 
             # Save info
@@ -119,9 +179,10 @@ def _get_latest_loss_to_supervise_best(self):
         if self.use_validation:
             if self.add_a_tracking_validation_phase:
                 # Compared to super, replacing by tracking_valid loss.
-                mean_epoch_loss = self.tracking_valid_loss_monitor.average_per_epoch[-1]
+                mean_epoch_loss = self.tracking_clipped_final_distance_monitor.average_per_epoch[-1]
 
-                # Could use IS instead. Not implemented.
+                # Could use IS instead, or non-clipped, or diverging point.
+                # Not implemented.
             else:
                 mean_epoch_loss = self.valid_loss_monitor.average_per_epoch[-1]
         else:
@@ -136,36 +197,93 @@ def validate_one_batch(self, data, epoch):
             if (epoch + 1) % self.tracking_phase_frequency == 0:
                 logger.info("Additional tracking-like generation validation "
                             "from batch.")
-                gen_mean_loss, gen_n, percent_inv = \
-                    self.generate_from_one_batch(data)
-                gen_mean_loss = gen_mean_loss.cpu().item()
-                self.tracking_valid_loss_monitor.update(gen_mean_loss, weight=n)
-                self.tracking_valid_IS_monitor.update(percent_inv, weight=n)
+                (gen_n, mean_final_dist, mean_clipped_final_dist,
+                 percent_IS_very_good, percent_IS_acceptable, percent_IS_very_far,
+                 diverging_pnt) = self.generate_from_one_batch(data)
+
+                self.tracking_very_good_IS_monitor.update(percent_IS_very_good, weight=n)
+                self.tracking_acceptable_IS_monitor.update(percent_IS_acceptable, weight=n)
+                self.tracking_very_far_IS_monitor.update(percent_IS_very_far, weight=n)
+
+                self.tracking_mean_final_distance_monitor.update(mean_final_dist, weight=n)
+                self.tracking_clipped_final_distance_monitor.update(mean_clipped_final_dist, weight=n)
+                self.tracking_valid_diverg_monitor.update(diverging_pnt, weight=n)
+            elif len(self.tracking_mean_final_distance_monitor.average_per_epoch) == 0:
+                # Fake values at the beginning
+                # Bad IS = 100%
+                self.tracking_very_good_IS_monitor.update(100.0)
+                self.tracking_acceptable_IS_monitor.update(100.0)
+                self.tracking_very_far_IS_monitor.update(100.0)
+
+                # Bad diverging = very far from 0. Either 100% (if diverged at
+                # first point) or anything >0 if diverged further than expected
+                # point.
+                self.tracking_valid_diverg_monitor.update(100.0)
+
+                # Bad mean dist = very far. ex, 100, or clipped.
+                self.tracking_mean_final_distance_monitor.update(100.0)
+                self.tracking_clipped_final_distance_monitor.update(ACCEPTABLE_THRESHOLD)
             else:
-                self.tracking_valid_loss_monitor.update(
-                    self.tracking_valid_loss_monitor.average_per_epoch[-1])
-                self.tracking_valid_IS_monitor.update(
-                    self.tracking_valid_IS_monitor.average_per_epoch[-1])
+                # Copy previous value
+                for monitor in [self.tracking_very_good_IS_monitor,
+                                self.tracking_acceptable_IS_monitor,
+                                self.tracking_very_far_IS_monitor,
+                                self.tracking_valid_diverg_monitor,
+                                self.tracking_mean_final_distance_monitor,
+                                self.tracking_clipped_final_distance_monitor]:
+                    monitor.update(monitor.average_per_epoch[-1])
 
         return mean_loss, n
 
     def _update_comet_after_epoch(self, context: str, epoch: int,
                                   tracking_phase=False):
         if tracking_phase:
-            loss = self.tracking_valid_loss_monitor.average_per_epoch[-1]
-            logger.info("   Mean tracking loss for this epoch: {}".format(loss))
-
-            percent_inv = self.tracking_valid_IS_monitor.average_per_epoch[-1]
-            logger.info("   Mean simili-IS ratio for this epoch: {}"
-                        " (threshold {})".format(percent_inv, IS_THRESHOLD))
+            torch.set_printoptions(precision=4)
+            np.set_printoptions(precision=4)
+
+            final_dist = self.tracking_mean_final_distance_monitor.average_per_epoch[-1]
+            clipped = self.tracking_clipped_final_distance_monitor.average_per_epoch[-1]
+            logger.info("   Mean final distance for this epoch: {}\n"
+                        "       (Clipped at {}: {})"
+                        .format(final_dist, ACCEPTABLE_THRESHOLD, clipped))
+
+            percent_IS_good = self.tracking_very_good_IS_monitor.average_per_epoch[-1]
+            percent_IS_ok = self.tracking_acceptable_IS_monitor.average_per_epoch[-1]
+            percent_IS_bad = self.tracking_very_far_IS_monitor.average_per_epoch[-1]
+
+            logger.info("Mean simili-IS ratio for this epoch:\n"
+                        "                          Threshold {}: {}\n"
+                        "                          Threshold {}: {}\n"
+                        "                          Threshold {}: {}"
+                        .format(VERY_CLOSE_THRESHOLD, percent_IS_good,
+                                ACCEPTABLE_THRESHOLD, percent_IS_ok,
+                                VERY_FAR_THRESHOLD, percent_IS_bad))
+
+            diverg = self.tracking_valid_diverg_monitor.average_per_epoch[-1]
+            logger.info("Mean diverging point for this epoch: {}\n"
+                        " (percentage of streamline where distance becomes >{}, "
+                        "or percentage above 100% for streamlines longer than "
+                        "expected)".format(diverg, ACCEPTABLE_THRESHOLD))
 
             if self.comet_exp:
                 comet_context = self.comet_exp.validate
                 with comet_context():
                     self.comet_exp.log_metric(
-                        "generation_loss_per_epoch", loss, step=epoch)
+                        "Mean final distance", final_dist, step=epoch)
+                    self.comet_exp.log_metric(
+                        "Mean final distance (clipped {})"
+                        .format(ACCEPTABLE_THRESHOLD), clipped, step=epoch)
+                    self.comet_exp.log_metric(
+                        "IS ratio at dist {}".format(VERY_CLOSE_THRESHOLD),
+                        percent_IS_good, step=epoch)
+                    self.comet_exp.log_metric(
+                        "IS ratio at dist {}".format(ACCEPTABLE_THRESHOLD),
+                        percent_IS_ok, step=epoch)
                     self.comet_exp.log_metric(
-                        "generation_IS_ratio_per_epoch", percent_inv, step=epoch)
+                        "IS ratio at dist {}".format(VERY_FAR_THRESHOLD),
+                        percent_IS_bad, step=epoch)
+                    self.comet_exp.log_metric(
+                        "Diverging point", diverg, step=epoch)
 
         else:
             super()._update_comet_after_epoch(context, epoch)
@@ -174,36 +292,74 @@ def generate_from_one_batch(self, data):
         # Data interpolation has not been done yet. GPU computations are done
         # here in the main thread.
         torch.set_printoptions(precision=4)
-        np.set_printoptions(precision=4)
+        np.set_printoptions(precision=2)
 
-        lines, ids_per_subj = data
-        lines = [line.to(self.device, non_blocking=True, dtype=torch.float)
-                 for line in lines]
-        last_pos = torch.vstack([line[-1, :] for line in lines])
-        mean_length = np.mean([len(s) for s in lines])
+        real_lines, ids_per_subj = data
+        real_lines = [line.to(self.device, non_blocking=True, dtype=torch.float)
+                      for line in real_lines]
+        last_pos = torch.vstack([line[-1, :] for line in real_lines])
+        mean_length = np.mean([len(s) for s in real_lines])
 
         # Dataloader always works on CPU. Sending to right device.
         # (model is already moved). Using only the n first points
         lines = [s[0:min(len(s), self.tracking_phase_nb_steps_init), :]
-                 for s in lines]
+                 for s in real_lines]
         lines = self.propagate_multiple_lines(lines, ids_per_subj)
 
-        # Verify "loss", i.e. the differences in coordinates
-        computed_last_pos = torch.vstack([line[-1, :] for line in lines])
         compute_mean_length = np.mean([len(s) for s in lines])
+        logger.info("-> Average streamline length (nb pts) in this batch: {} \n"
+                    "                   Average recovered streamline length: {}"
+                    .format(mean_length.astype(np.float64),
+                            compute_mean_length.astype(np.float64)))
 
-        logging.debug("   Average streamline length (nb pts) in this batch: {} \n"
-                      "                Average recovered streamline length: {}"
-                      .format(mean_length, compute_mean_length))
+        # 1. Final distance compared to expected point.
+        computed_last_pos = torch.vstack([line[-1, :] for line in lines])
         l2_loss = PairwiseDistance(p=2)
-        loss = l2_loss(computed_last_pos, last_pos)
-
-        logging.info("    Best / Worst loss: {} / {}"
-                     .format(torch.max(loss), torch.min(loss)))
-
-        IS_ratio = torch.sum(loss > IS_THRESHOLD).cpu() / len(lines) * 100
-
-        return torch.mean(loss), len(lines), IS_ratio
+        final_dist = l2_loss(computed_last_pos, last_pos)
+
+        # Verify "IS ratio", i.e. percentage of streamlines ending inside a
+        # predefined radius.
+        IS_ratio_good = torch.sum(final_dist > VERY_CLOSE_THRESHOLD) / len(lines) * 100
+        IS_ratio_ok = torch.sum(final_dist > ACCEPTABLE_THRESHOLD) / len(lines) * 100
+        IS_ratio_bad = torch.sum(final_dist > VERY_FAR_THRESHOLD) / len(lines) * 100
+
+        final_dist_clipped = torch.clip(final_dist, min=None,
+                                        max=ACCEPTABLE_THRESHOLD)
+        final_dist = torch.mean(final_dist)
+        final_dist_clipped = torch.mean(final_dist_clipped)
+
+        # Verify point where streamline starts diverging.
+        # 0% = error at first point --> really bad.
+        # 100% = reached exactly the right point.
+        # >100% = went too far (longer than expected).
+        # We want a decreasing value towards 0.
+        # abs(100 - score): 0 = good. 100 = bad.
+        # Using 100 - x, so the score is diminishing, from 100 = perfect.
+        total_point = 0
+        for line, real_line in zip(lines, real_lines):
+            expected_nb = len(real_line)
+            diff_nb = abs(len(real_line) - len(line))
+            if len(line) < expected_nb:
+                diff_nb = len(real_line) - len(line)
+                line = torch.vstack((line, line[-1, :].repeat(diff_nb, 1)))
+            elif len(line) > expected_nb:
+                real_line = torch.vstack((real_line,
+                                          real_line[-1, :].repeat(diff_nb, 1)))
+            dist = l2_loss(line, real_line).detach().cpu().numpy()
+            point, = np.where(dist > ACCEPTABLE_THRESHOLD)
+            if len(point) > 0:  # (else: score = 0. Never out of range).
+                div_point = point[0] / expected_nb * 100.0
+                total_point += abs(100 - div_point)
+        diverging_point = total_point / len(lines)
+
+        IS_ratio_good = IS_ratio_good.cpu().numpy().astype(np.float32)
+        IS_ratio_ok = IS_ratio_ok.cpu().numpy().astype(np.float32)
+        IS_ratio_bad = IS_ratio_bad.cpu().numpy().astype(np.float32)
+        final_dist = final_dist.cpu().numpy().astype(np.float32)
+        final_dist_clipped = final_dist_clipped.cpu().numpy().astype(np.float32)
+        diverging_point = np.asarray(diverging_point, dtype=np.float32)
+        return (len(lines), final_dist, final_dist_clipped,
+                IS_ratio_good, IS_ratio_ok, IS_ratio_bad, diverging_point)
 
     def propagate_multiple_lines(self, lines: List[torch.Tensor], ids_per_subj):
         assert self.model.step_size is not None, \
diff --git a/dwi_ml/training/trainers.py b/dwi_ml/training/trainers.py
index e4f9c59e..d6a57f2a 100644
--- a/dwi_ml/training/trainers.py
+++ b/dwi_ml/training/trainers.py
@@ -616,7 +616,7 @@ def train_and_validate(self):
             if self.comet_exp:
                 self.comet_exp.set_epoch(epoch)
 
-            logger.info("******* STARTING : Epoch {} (i.e. #{}) *******"
+            logger.info("\n\n******* STARTING : Epoch {} (i.e. #{}) *******"
                         .format(epoch, epoch + 1))
 
             # Set learning rate to either current value or last value
@@ -820,8 +820,22 @@ def validate_one_epoch(self, epoch):
 
                 self.valid_loss_monitor.update(mean_loss, weight=n)
 
+        with tqdm_logging_redirect(self.valid_dataloader, ncols=100,
+                                   total=self.nb_batches_valid,
+                                   loggers=[logging.root],
+                                   tqdm_class=tqdm) as pbar:
+
             # Explicitly delete iterator to kill threads and free memory before
             # running training again
+            valid_iterator = enumerate(pbar)
+            for batch_id, data in valid_iterator:
+                logging.warning("BATCH: ******************** REMOVE THIS. FOR UNIQUE PICTURE!")
+                if batch_id == self.nb_batches_valid:
+                    # Explicitly close tqdm's progress bar to fix possible bugs
+                    # when breaking the loop
+                    pbar.close()
+                    break
+
             del valid_iterator
 
         # Save info

From edadaf485834f3708fd0f10e0e1873144cc636b3 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Fri, 2 Jun 2023 13:51:13 -0400
Subject: [PATCH 03/13] Small fixes to include generation validation to
 transformers

---
 .../projects/transforming_tractography.py     |  3 +-
 .../projects/trainers_for_generation.py       | 14 ++++++---
 .../training/projects/transformer_trainer.py  | 29 ++++---------------
 dwi_ml/training/trainers.py                   |  3 +-
 dwi_ml/training/utils/monitoring.py           |  2 +-
 scripts_python/tto_train_model.py             | 12 ++++++--
 6 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/dwi_ml/models/projects/transforming_tractography.py b/dwi_ml/models/projects/transforming_tractography.py
index 62ed8250..02645f69 100644
--- a/dwi_ml/models/projects/transforming_tractography.py
+++ b/dwi_ml/models/projects/transforming_tractography.py
@@ -488,7 +488,8 @@ def forward(self, inputs: List[torch.tensor],
         # Outputs will be all streamlines merged.
         # To compute loss = ok. During tracking, we will need to split back.
         outputs = self.direction_getter(outputs)
-        outputs = copy_prev_dir + outputs
+        if self.start_from_copy_prev:
+            outputs = copy_prev_dir + outputs
 
         if self._context != 'tracking':
             outputs = list(torch.split(outputs, list(unpad_lengths)))
diff --git a/dwi_ml/training/projects/trainers_for_generation.py b/dwi_ml/training/projects/trainers_for_generation.py
index 442dda17..fd03b6e9 100644
--- a/dwi_ml/training/projects/trainers_for_generation.py
+++ b/dwi_ml/training/projects/trainers_for_generation.py
@@ -159,6 +159,7 @@ def validate_one_epoch(self, epoch):
             self.tracking_clipped_final_distance_monitor.start_new_epoch()
             self.tracking_valid_time_monitor.start_new_epoch()
 
+        # This will run our modified "validate one batch" for each batch.
         super().validate_one_epoch(epoch)
 
         if self.add_a_tracking_validation_phase:
@@ -172,12 +173,12 @@ def validate_one_epoch(self, epoch):
 
             # Save info
             if self.comet_exp:
-                self._update_comet_after_epoch(self.comet_exp.validate, epoch,
+                self._update_comet_after_epoch('validation', epoch,
                                                tracking_phase=True)
 
     def _get_latest_loss_to_supervise_best(self):
         if self.use_validation:
-            if self.add_a_tracking_validation_phase:
+            if False: # self.add_a_tracking_validation_phase:
                 # Compared to super, replacing by tracking_valid loss.
                 mean_epoch_loss = self.tracking_clipped_final_distance_monitor.average_per_epoch[-1]
 
@@ -209,6 +210,8 @@ def validate_one_batch(self, data, epoch):
                 self.tracking_clipped_final_distance_monitor.update(mean_clipped_final_dist, weight=n)
                 self.tracking_valid_diverg_monitor.update(diverging_pnt, weight=n)
             elif len(self.tracking_mean_final_distance_monitor.average_per_epoch) == 0:
+                logger.info("Skipping tracking-like generation validation from "
+                            "batch. No values yet: adding fake initial values.")
                 # Fake values at the beginning
                 # Bad IS = 100%
                 self.tracking_very_good_IS_monitor.update(100.0)
@@ -224,6 +227,8 @@ def validate_one_batch(self, data, epoch):
                 self.tracking_mean_final_distance_monitor.update(100.0)
                 self.tracking_clipped_final_distance_monitor.update(ACCEPTABLE_THRESHOLD)
             else:
+                logger.info("Skipping tracking-like generation validation from "
+                            "batch. Copying previous epoch's values.")
                 # Copy previous value
                 for monitor in [self.tracking_very_good_IS_monitor,
                                 self.tracking_acceptable_IS_monitor,
@@ -285,8 +290,7 @@ def _update_comet_after_epoch(self, context: str, epoch: int,
                     self.comet_exp.log_metric(
                         "Diverging point", diverg, step=epoch)
 
-        else:
-            super()._update_comet_after_epoch(context, epoch)
+        super()._update_comet_after_epoch(context, epoch)
 
     def generate_from_one_batch(self, data):
         # Data interpolation has not been done yet. GPU computations are done
@@ -304,7 +308,9 @@ def generate_from_one_batch(self, data):
         # (model is already moved). Using only the n first points
         lines = [s[0:min(len(s), self.tracking_phase_nb_steps_init), :]
                  for s in real_lines]
+        self.model.set_context('tracking')
         lines = self.propagate_multiple_lines(lines, ids_per_subj)
+        self.model.set_context('validation')
 
         compute_mean_length = np.mean([len(s) for s in lines])
         logger.info("-> Average streamline length (nb pts) in this batch: {} \n"
diff --git a/dwi_ml/training/projects/transformer_trainer.py b/dwi_ml/training/projects/transformer_trainer.py
index 8ae99c0d..bb76872c 100644
--- a/dwi_ml/training/projects/transformer_trainer.py
+++ b/dwi_ml/training/projects/transformer_trainer.py
@@ -7,35 +7,16 @@
 from dwi_ml.models.projects.transforming_tractography import AbstractTransformerModel
 from dwi_ml.training.batch_samplers import DWIMLBatchIDSampler
 from dwi_ml.training.batch_loaders import DWIMLBatchLoaderOneInput
-from dwi_ml.training.trainers import DWIMLTrainerOneInput
+from dwi_ml.training.projects.trainers_for_generation import \
+    DWIMLTrainerForTrackingOneInput
 
 
-class TransformerTrainer(DWIMLTrainerOneInput):
-    def __init__(self,
-                 model: AbstractTransformerModel, experiments_path: str,
-                 experiment_name: str,
-                 batch_sampler: DWIMLBatchIDSampler,
-                 batch_loader: DWIMLBatchLoaderOneInput,
-                 learning_rates: List = None, weight_decay: float = 0.01,
-                 optimizer='Adam', max_epochs: int = 10,
-                 max_batches_per_epoch_training: int = 1000,
-                 max_batches_per_epoch_validation: int = 1000,
-                 patience: int = None, patience_delta: float = 1e-6,
-                 nb_cpu_processes: int = 0, use_gpu: bool = False,
-                 comet_workspace: str = None, comet_project: str = None,
-                 from_checkpoint: bool = False, log_level=logging.root.level):
+class TransformerTrainer(DWIMLTrainerForTrackingOneInput):
+    def __init__(self, **kwargs):
         """
         See Super for parameter description. No additional parameters here.
         """
-        super().__init__(model, experiments_path, experiment_name,
-                         batch_sampler, batch_loader,
-                         learning_rates, weight_decay,
-                         optimizer, max_epochs,
-                         max_batches_per_epoch_training,
-                         max_batches_per_epoch_validation,
-                         patience, patience_delta, nb_cpu_processes, use_gpu,
-                         comet_workspace, comet_project,
-                         from_checkpoint, log_level)
+        super().__init__(**kwargs)
 
     def run_model(self, batch_inputs, batch_streamlines):
         dirs = self.model.format_directions(batch_streamlines)
diff --git a/dwi_ml/training/trainers.py b/dwi_ml/training/trainers.py
index d6a57f2a..7df0a6a3 100644
--- a/dwi_ml/training/trainers.py
+++ b/dwi_ml/training/trainers.py
@@ -863,7 +863,8 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
         elif context == 'validation':
             loss = self.valid_loss_monitor.average_per_epoch[-1]
         else:
-            raise ValueError("Unexpected context.")
+            raise ValueError("Unexpected context ({}) for comet. Expecting "
+                             "training or validation.")
         logger.info("   Mean loss for this epoch: {}".format(loss))
 
         if self.comet_exp:
diff --git a/dwi_ml/training/utils/monitoring.py b/dwi_ml/training/utils/monitoring.py
index 44c6cf2a..65e1c2bb 100644
--- a/dwi_ml/training/utils/monitoring.py
+++ b/dwi_ml/training/utils/monitoring.py
@@ -58,7 +58,7 @@ def __init__(self, weighted: bool = False):
         self.average_per_epoch = []
         self.current_epoch = -1
 
-    def update(self, value, weight=None):
+    def update(self, value, weight=1):
         """
         Note. Does not save the update if value is inf.
 
diff --git a/scripts_python/tto_train_model.py b/scripts_python/tto_train_model.py
index b3fdb266..39e6706d 100755
--- a/scripts_python/tto_train_model.py
+++ b/scripts_python/tto_train_model.py
@@ -43,7 +43,7 @@ def prepare_arg_parser():
     add_logging_arg(p)
     add_args_batch_sampler(p)
     add_args_batch_loader(p)
-    add_training_args(p)
+    add_training_args(p, add_a_tracking_validation_phase=True)
 
     # Specific to Transformers:
     gt = add_abstract_model_args(p)
@@ -102,8 +102,9 @@ def init_from_args(args, sub_loggers_level):
     with Timer("\n\nPreparing trainer", newline=True, color='red'):
         lr = format_lr(args.learning_rate)
         trainer = TransformerTrainer(
-            model, args.experiments_path, args.experiment_name,
-            batch_sampler, batch_loader,
+            model=model, experiments_path=args.experiments_path,
+            experiment_name=args.experiment_name, batch_sampler=batch_sampler,
+            batch_loader=batch_loader,
             # COMET
             comet_project=args.comet_project,
             comet_workspace=args.comet_workspace,
@@ -114,6 +115,11 @@ def init_from_args(args, sub_loggers_level):
             max_batches_per_epoch_validation=args.max_batches_per_epoch_validation,
             patience=args.patience, patience_delta=args.patience_delta,
             from_checkpoint=False,
+            # (generation validation:)
+            add_a_tracking_validation_phase=args.add_a_tracking_validation_phase,
+            tracking_phase_frequency=args.tracking_phase_frequency,
+            tracking_phase_nb_steps_init=5,  # args.tracking_phase_nb_steps_init
+            tracking_phase_mask_group=args.tracking_mask,
             # MEMORY
             nb_cpu_processes=args.nbr_processes, use_gpu=args.use_gpu,
             log_level=args.logging)

From f45a866e995cd4773facf54c9b40e0032e9b5cbb Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Thu, 8 Jun 2023 09:04:46 -0400
Subject: [PATCH 04/13] Clarify code. Add connectivity matrix metric to
 generation

---
 dwi_ml/data/dataset/mri_data_containers.py    |   6 +-
 .../data/dataset/multi_subject_containers.py  |   4 +-
 .../data/dataset/single_subject_containers.py |  25 +-
 dwi_ml/data/dataset/streamline_containers.py  |  91 +++++-
 dwi_ml/data/hdf5/hdf5_creation.py             |  49 +++-
 dwi_ml/data/hdf5/utils.py                     |  18 +-
 .../processing/streamlines/post_processing.py |  90 +++++-
 dwi_ml/models/projects/learn2track_model.py   |   7 +-
 .../tracking/projects/learn2track_tracker.py  |   2 +-
 dwi_ml/training/batch_loaders.py              |   8 +-
 .../training/projects/learn2track_trainer.py  |   9 +-
 .../training/projects/transformer_trainer.py  |   7 +-
 dwi_ml/training/trainers.py                   | 140 +++++----
 dwi_ml/training/utils/batch_loaders.py        |   5 +-
 dwi_ml/training/utils/monitoring.py           |   9 +-
 dwi_ml/training/with_generation/__init__.py   |   0
 .../training/with_generation/batch_loader.py  |  38 +++
 .../trainer.py}                               | 272 +++++++-----------
 dwi_ml/unit_tests/test_connectivity_matrix.py |  29 ++
 scripts_python/dwiml_create_hdf5_dataset.py   |   9 +-
 20 files changed, 516 insertions(+), 302 deletions(-)
 create mode 100644 dwi_ml/training/with_generation/__init__.py
 create mode 100644 dwi_ml/training/with_generation/batch_loader.py
 rename dwi_ml/training/{projects/trainers_for_generation.py => with_generation/trainer.py} (56%)
 create mode 100644 dwi_ml/unit_tests/test_connectivity_matrix.py

diff --git a/dwi_ml/data/dataset/mri_data_containers.py b/dwi_ml/data/dataset/mri_data_containers.py
index 378bd7e8..43b292a3 100644
--- a/dwi_ml/data/dataset/mri_data_containers.py
+++ b/dwi_ml/data/dataset/mri_data_containers.py
@@ -44,7 +44,7 @@ def __init__(self, data: Union[torch.Tensor, h5py.Group],
         self._data = data
 
     @classmethod
-    def init_from_hdf_info(cls, hdf_group: h5py.Group):
+    def init_mri_data_from_hdf_info(cls, hdf_group: h5py.Group):
         """
         Allows initiating an instance of this class by sending only the
         hdf handle. This method will define how to load the data from it
@@ -74,7 +74,7 @@ def __init__(self, data: torch.Tensor, voxres: np.ndarray,
         super().__init__(data, voxres, affine)
 
     @classmethod
-    def init_from_hdf_info(cls, hdf_group: h5py.Group):
+    def init_mri_data_from_hdf_info(cls, hdf_group: h5py.Group):
         """
         Creating class instance from the hdf in cases where data is not
         loaded yet. Non-lazy = loading the data here.
@@ -106,7 +106,7 @@ def __init__(self, data: Union[h5py.Group, None], voxres: np.ndarray,
         super().__init__(data, voxres, affine)
 
     @classmethod
-    def init_from_hdf_info(cls, hdf_group: h5py.Group):
+    def init_mri_data_from_hdf_info(cls, hdf_group: h5py.Group):
         """
         Creating class instance from the hdf in cases where data is not
         loaded yet. Not loading the data, but loading the voxres.
diff --git a/dwi_ml/data/dataset/multi_subject_containers.py b/dwi_ml/data/dataset/multi_subject_containers.py
index 860bb7a4..fe4c72d6 100644
--- a/dwi_ml/data/dataset/multi_subject_containers.py
+++ b/dwi_ml/data/dataset/multi_subject_containers.py
@@ -326,11 +326,11 @@ def _build_empty_data_list(self):
     def _init_subj_from_hdf(self, hdf_handle, subject_id, volume_groups,
                             nb_features, streamline_groups):
         if self.is_lazy:
-            return LazySubjectData.init_from_hdf(
+            return LazySubjectData.init_single_subject_from_hdf(
                 subject_id, hdf_handle,
                 (volume_groups, nb_features, streamline_groups))
         else:
-            return SubjectData.init_from_hdf(
+            return SubjectData.init_single_subject_from_hdf(
                 subject_id, hdf_handle,
                 (volume_groups, nb_features, streamline_groups))
 
diff --git a/dwi_ml/data/dataset/single_subject_containers.py b/dwi_ml/data/dataset/single_subject_containers.py
index e0439b6d..8aafb65b 100644
--- a/dwi_ml/data/dataset/single_subject_containers.py
+++ b/dwi_ml/data/dataset/single_subject_containers.py
@@ -48,7 +48,8 @@ def sft_data_list(self):
         raise NotImplementedError
 
     @classmethod
-    def init_from_hdf(cls, subject_id: str, hdf_file, group_info=None):
+    def init_single_subject_from_hdf(
+            cls, subject_id: str, hdf_file, group_info=None):
         """Returns an instance of this class, initiated by sending only the
         hdf handle. The child class's method will define how to load the
         data based on the child data management."""
@@ -88,7 +89,8 @@ def sft_data_list(self):
         return self._sft_data_list
 
     @classmethod
-    def init_from_hdf(cls, subject_id: str, hdf_file, group_info=None):
+    def init_single_subject_from_hdf(
+            cls, subject_id: str, hdf_file, group_info=None):
         """
         Instantiating a single subject data: load info and use __init__
         """
@@ -102,13 +104,14 @@ def init_from_hdf(cls, subject_id: str, hdf_file, group_info=None):
             logger.debug('        Loading volume group "{}": '.format(group))
             # Creating a SubjectMRIData or a LazySubjectMRIData based on
             # lazy or non-lazy version.
-            subject_mri_group_data = MRIData.init_from_hdf_info(
+            subject_mri_group_data = MRIData.init_mri_data_from_hdf_info(
                 hdf_file[subject_id][group])
             subject_mri_data_list.append(subject_mri_group_data)
 
         for group in streamline_groups:
             logger.debug("        Loading subject's streamlines")
-            sft_data = SFTData.init_from_hdf_info(hdf_file[subject_id][group])
+            sft_data = SFTData.init_sft_data_from_hdf_info(
+                hdf_file[subject_id][group])
             subject_sft_data_list.append(sft_data)
 
         subj_data = cls(subject_id,
@@ -140,7 +143,8 @@ def __init__(self, volume_groups: List[str], nb_features: List[int],
         self.is_lazy = True
 
     @classmethod
-    def init_from_hdf(cls, subject_id: str, hdf_file, group_info=None):
+    def init_single_subject_from_hdf(
+            cls, subject_id: str, hdf_file, group_info=None):
         """
         Instantiating a single subject data: NOT LOADING info and use __init__
         (so in short: this does basically nothing, the lazy data is kept
@@ -168,7 +172,6 @@ def init_from_hdf(cls, subject_id: str, hdf_file, group_info=None):
     def mri_data_list(self) -> Union[List[LazyMRIData], None]:
         """As a property, this is only computed if called by the user.
         Returns a List[LazyMRIData]"""
-
         if self.hdf_handle is not None:
             if not self.hdf_handle.id.valid:
                 logger.warning("Tried to access subject's volumes but its "
@@ -176,7 +179,8 @@ def mri_data_list(self) -> Union[List[LazyMRIData], None]:
             mri_data_list = []
             for group in self.volume_groups:
                 hdf_group = self.hdf_handle[self.subject_id][group]
-                mri_data_list.append(LazyMRIData.init_from_hdf_info(hdf_group))
+                mri_data_list.append(
+                    LazyMRIData.init_mri_data_from_hdf_info(hdf_group))
 
             return mri_data_list
         else:
@@ -187,11 +191,16 @@ def mri_data_list(self) -> Union[List[LazyMRIData], None]:
     def sft_data_list(self) -> Union[List[LazySFTData], None]:
         """As a property, this is only computed if called by the user.
         Returns a List[LazyMRIData]"""
+        # toDo. Reloads the basic information (ex: origin, corner, etc)
+        #  everytime we acces a subject. They are lazy subjects! Why can't
+        #  we keep this list of lazysftdata in memory?
+
         if self.hdf_handle is not None:
             sft_data_list = []
             for group in self.streamline_groups:
                 hdf_group = self.hdf_handle[self.subject_id][group]
-                sft_data_list.append(LazySFTData.init_from_hdf_info(hdf_group))
+                sft_data_list.append(
+                    LazySFTData.init_sft_data_from_hdf_info(hdf_group))
 
             return sft_data_list
         else:
diff --git a/dwi_ml/data/dataset/streamline_containers.py b/dwi_ml/data/dataset/streamline_containers.py
index 262755d2..9f66bfe7 100644
--- a/dwi_ml/data/dataset/streamline_containers.py
+++ b/dwi_ml/data/dataset/streamline_containers.py
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 """
 We expect the classes here to be used in single_subject_containers
-
-
 """
 import logging
 from typing import Tuple, Union, List
@@ -135,6 +133,14 @@ def lengths_mm(self):
 
         return lengths
 
+    def connectivity_matrix(self, indxyz: Tuple = None):
+        if indxyz:
+            indx, indy, indz = indxyz
+            return np.asarray(
+                self.hdf_group['connectivity_matrix'][indx, indy, indy],
+                dtype=int)
+        return np.asarray(self.hdf_group['connectivity_matrix'], dtype=int)
+
     def __len__(self):
         return len(self.hdf_group['offsets'])
 
@@ -157,7 +163,9 @@ class SFTDataAbstract(object):
     """
     def __init__(self,
                  streamlines: Union[ArraySequence, _LazyStreamlinesGetter],
-                 space_attributes: Tuple, space: Space, origin: Origin):
+                 space_attributes: Tuple, space: Space, origin: Origin,
+                 contains_connectivity: bool,
+                 downsampled_size_for_connectivity: List):
         """
         Params
         ------
@@ -181,6 +189,8 @@ def __init__(self,
         self.origin = origin
         self.streamlines = streamlines
         self.is_lazy = None
+        self.contains_connectivity = contains_connectivity
+        self.downsampled_size_for_connectivity = downsampled_size_for_connectivity
 
     @property
     def lengths(self):
@@ -198,8 +208,25 @@ def lengths_mm(self):
         streamlines."""
         raise NotImplementedError
 
+    def connectivity_matrix_and_info(self, ind=None):
+        """New method compared to SFTs: access pre-computed connectivity
+        matrix. Returns the subject's connectivity matrix associated with
+        current tractogram, together with information required to recompute
+        a similar matrix: reference volume's shape and downsampled shape."""
+        if not self.contains_connectivity:
+            raise ValueError("No pre-computed connectivity matrix found for "
+                             "this subject.")
+
+        (_, ref_volume_shape, _, _) = self.space_attributes
+
+        return (self._access_connectivity_matrix(ind), ref_volume_shape,
+                self.downsampled_size_for_connectivity)
+
+    def _access_connectivity_matrix(self, ind):
+        raise NotImplementedError
+
     @classmethod
-    def init_from_hdf_info(cls, hdf_group: h5py.Group):
+    def init_sft_data_from_hdf_info(cls, hdf_group: h5py.Group):
         """Create an instance of this class by sending directly the hdf5 file.
         The child class's method will define how to load the data according to
         the class's data management."""
@@ -230,18 +257,27 @@ def as_sft(self, streamline_ids: List = None):
 
 
 class SFTData(SFTDataAbstract):
-    def __init__(self, streamlines: ArraySequence, space_attributes: Tuple,
-                 space: Space, origin: Origin, lengths_mm: List):
-        super().__init__(streamlines, space_attributes, space, origin)
+    streamlines: ArraySequence
+
+    def __init__(self, lengths_mm: List, connectivity_matrix: np.ndarray,
+                 **kwargs):
+        super().__init__(**kwargs)
         self._lengths_mm = lengths_mm
+        self._connectivity_matrix = connectivity_matrix
         self.is_lazy = False
 
     @property
     def lengths_mm(self):
         return np.array(self._lengths_mm)
 
+    def _access_connectivity_matrix(self, indxyz: Tuple = None):
+        if indxyz:
+            indx, indy, indz = indxyz
+            return self._connectivity_matrix[indx, indy, indy]
+        return self._connectivity_matrix
+
     @classmethod
-    def init_from_hdf_info(cls, hdf_group: h5py.Group):
+    def init_sft_data_from_hdf_info(cls, hdf_group: h5py.Group):
         """
         Creating class instance from the hdf in cases where data is not
         loaded yet. Non-lazy = loading the data here.
@@ -249,12 +285,25 @@ def init_from_hdf_info(cls, hdf_group: h5py.Group):
         streamlines = _load_streamlines_from_hdf(hdf_group)
         # Adding non-hidden parameters for nicer later access
         lengths_mm = hdf_group['euclidean_lengths']
+        if 'connectivity_matrix' in hdf_group:
+            contains_connectivity = True
+            connectivity_matrix = np.asarray(hdf_group['connectivity_matrix'],
+                                             dtype=int)
+            downsampled_size = hdf_group.attrs['downsampled_size']
+        else:
+            contains_connectivity = False
+            connectivity_matrix = None
+            downsampled_size = None
 
         space_attributes, space, origin = _load_space_from_hdf(hdf_group)
 
         # Return an instance of SubjectMRIData instantiated through __init__
         # with this loaded data:
-        return cls(streamlines, space_attributes, space, origin, lengths_mm)
+        return cls(lengths_mm, connectivity_matrix,
+                   streamlines=streamlines, space_attributes=space_attributes,
+                   space=space, origin=origin,
+                   contains_connectivity=contains_connectivity,
+                   downsampled_size_for_connectivity=downsampled_size)
 
     def _subset_streamlines(self, streamline_ids):
         if streamline_ids is not None:
@@ -265,9 +314,10 @@ def _subset_streamlines(self, streamline_ids):
 
 
 class LazySFTData(SFTDataAbstract):
-    def __init__(self, streamlines: _LazyStreamlinesGetter,
-                 space_attributes: Tuple, space: Space, origin: Origin):
-        super().__init__(streamlines, space_attributes, space, origin)
+    streamlines: _LazyStreamlinesGetter
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
         self.is_lazy = True
 
     @property
@@ -275,13 +325,26 @@ def lengths_mm(self):
         # Fetching from the lazy streamline getter
         return np.array(self.streamlines.lengths_mm)
 
+    def _access_connectivity_matrix(self, indxyz: Tuple = None):
+        # Fetching in a lazy way
+        return self.streamlines.connectivity_matrix(indxyz)
+
     @classmethod
-    def init_from_hdf_info(cls, hdf_group: h5py.Group):
+    def init_sft_data_from_hdf_info(cls, hdf_group: h5py.Group):
         space_attributes, space, origin = _load_space_from_hdf(hdf_group)
+        if 'connectivity_matrix' in hdf_group:
+            contains_connectivity = True
+            downsampled_size = hdf_group.attrs['downsampled_size']
+        else:
+            contains_connectivity = False
+            downsampled_size = None
 
         streamlines = _LazyStreamlinesGetter(hdf_group)
 
-        return cls(streamlines, space_attributes, space, origin)
+        return cls(streamlines=streamlines, space_attributes=space_attributes,
+                   space=space, origin=origin,
+                   contains_connectivity=contains_connectivity,
+                   downsampled_size_for_connectivity=downsampled_size)
 
     def _subset_streamlines(self, streamline_ids):
         streamlines = self.streamlines.get_array_sequence(streamline_ids)
diff --git a/dwi_ml/data/hdf5/hdf5_creation.py b/dwi_ml/data/hdf5/hdf5_creation.py
index f255ef6a..e2209c65 100644
--- a/dwi_ml/data/hdf5/hdf5_creation.py
+++ b/dwi_ml/data/hdf5/hdf5_creation.py
@@ -3,13 +3,14 @@
 import logging
 import os
 from pathlib import Path
-from typing import List
+from typing import List, Union
 
 from dipy.io.stateful_tractogram import set_sft_logger_level, Space
 from dipy.io.streamline import load_tractogram, save_tractogram
 from dipy.io.utils import is_header_compatible
 from dipy.tracking.utils import length
 import h5py
+
 from dwi_ml.data.processing.streamlines.data_augmentation import \
     resample_or_compress
 from nested_lookup import nested_lookup
@@ -20,6 +21,8 @@
 
 from dwi_ml.data.io import load_file_to4d
 from dwi_ml.data.processing.dwi.dwi import standardize_data
+from dwi_ml.data.processing.streamlines.post_processing import \
+    compute_triu_connectivity
 
 
 def _load_and_verify_file(filename: str, subj_input_path, group_name: str,
@@ -106,7 +109,10 @@ def __init__(self, root_folder: Path, out_hdf_filename: Path,
                  training_subjs: List[str], validation_subjs: List[str],
                  testing_subjs: List[str], groups_config: dict,
                  std_mask: str, step_size: float = None,
-                 compress: float = None, enforce_files_presence: bool = True,
+                 compress: float = None,
+                 compute_connectivity_matrix: bool = False,
+                 downsampled_size_for_connectivity: Union[int, list] = 20,
+                 enforce_files_presence: bool = True,
                  save_intermediate: bool = False,
                  intermediate_folder: Path = None):
         """
@@ -129,6 +135,11 @@ def __init__(self, root_folder: Path, out_hdf_filename: Path,
             Step size to resample streamlines. Default: None.
         compress: float
             Compress streamlines. Default: None.
+        compute_connectivity_matrix: bool
+            Compute connectivity matrix for each streamline group.
+            Default: False.
+        downsampled_size_for_connectivity: int or List
+            See compute_connectivity_matrix's doc.
         enforce_files_presence: bool
             If true, will stop if some files are not available for a subject.
             Default: True.
@@ -147,6 +158,18 @@ def __init__(self, root_folder: Path, out_hdf_filename: Path,
         self.groups_config = groups_config
         self.step_size = step_size
         self.compress = compress
+        self.compute_connectivity = compute_connectivity_matrix
+        if isinstance(downsampled_size_for_connectivity, List):
+            assert len(downsampled_size_for_connectivity) == 3, \
+                "Expecting to work with 3D volumes. Expecting connectivity " \
+                "downsample size to be a list of 3 values."
+            self.connectivity_downsample_size = downsampled_size_for_connectivity
+        else:
+            assert isinstance(downsampled_size_for_connectivity, int), \
+                "Expecting the connectivity matrix size to be either a 3D " \
+                "list or an integer, but got {}"\
+                .format(downsampled_size_for_connectivity)
+            self.connectivity_downsample_size = [downsampled_size_for_connectivity] * 3
 
         # Optional
         self.std_mask = std_mask  # (could be None)
@@ -556,12 +579,14 @@ def _create_streamline_groups(self, ref, subj_input_dir, subj_id,
             streamlines_group.attrs['dimensions'] = d
             streamlines_group.attrs['voxel_sizes'] = vs
             streamlines_group.attrs['voxel_order'] = vo
+            if self.compute_connectivity:
+                streamlines_group.attrs['downsampled_size'] = \
+                    self.connectivity_downsample_size
 
             if len(sft.data_per_point) > 0:
                 logging.debug('sft contained data_per_point. Data not kept.')
             if len(sft.data_per_streamline) > 0:
-                logging.debug('sft contained data_per_streamlines. Data not '
-                              'kept.')
+                logging.debug('sft contained data_per_streamlines. Data not kept.')
 
             # Accessing private Dipy values, but necessary.
             # We need to deconstruct the streamlines into arrays with
@@ -574,6 +599,18 @@ def _create_streamline_groups(self, ref, subj_input_dir, subj_id,
                                              data=sft.streamlines._lengths)
             streamlines_group.create_dataset('euclidean_lengths', data=lengths)
 
+            if self.compute_connectivity:
+                # Can be reduced using sparse tensors notation... always
+                # minimum 50% of zeros! Then we could save separately
+                # the indices, values, size of the tensor. But unclear how
+                # much sparse they need to be to actually save memory.
+                # Skipping for now.
+                streamlines_group.create_dataset(
+                    'connectivity_matrix',
+                    data=compute_triu_connectivity(
+                        sft.streamlines, d, self.connectivity_downsample_size,
+                        binary=True, to_sparse_tensor=False))
+
     def _process_one_streamline_group(
             self, subj_dir: Path, group: str, subj_id: str,
             header: nib.Nifti1Header):
@@ -602,7 +639,7 @@ def _process_one_streamline_group(
         final_tractogram : StatefulTractogram
             All streamlines in voxel space.
         output_lengths : List[float]
-            The euclidean length of each streamline
+            The Euclidean length of each streamline
         """
         tractograms = self.groups_config[group]['files']
 
@@ -621,7 +658,7 @@ def _process_one_streamline_group(
 
         for instructions in tractograms:
             if instructions.endswith('/ALL'):
-                # instructions is to get all tractograms in given folder.
+                # instructions are to get all tractograms in given folder.
                 tractograms_dir = instructions.split('/ALL')
                 tractograms_dir = ''.join(tractograms_dir[:-1])
                 tractograms_sublist = [
diff --git a/dwi_ml/data/hdf5/utils.py b/dwi_ml/data/hdf5/utils.py
index 488b5cc4..16d795ff 100644
--- a/dwi_ml/data/hdf5/utils.py
+++ b/dwi_ml/data/hdf5/utils.py
@@ -11,7 +11,7 @@
 from dwi_ml.io_utils import add_resample_or_compress_arg
 
 
-def add_basic_args(p: ArgumentParser):
+def add_hdf5_creation_args(p: ArgumentParser):
 
     # Positional arguments
     p.add_argument('dwi_ml_ready_folder',
@@ -73,6 +73,18 @@ def add_mri_processing_args(p: ArgumentParser):
 def add_streamline_processing_args(p: ArgumentParser):
     g = p.add_argument_group('Streamlines processing options:')
     add_resample_or_compress_arg(g)
+    g.add_argument(
+        '--compute_connectivity_matrix', action='store_true',
+        help="If set, computes the 3D connectivity matrix for each streamline "
+             "group. \nDefined from downsampled image, not from anatomy! \n"
+             "Ex: can be used at validation time with our trainer's "
+             "'generation-validation' step.")
+    g.add_argument(
+        '--connectivity_downsample_size',  metavar='m', type=int, nargs='+',
+        help="Number of 3D blocks (m x m x m) for the connectivity matrix. \n"
+             "(The matrix will be m^3 x m^3). If more than one values are "
+             "provided, expected to be one per dimension. \n"
+             "Default: 20x20x20.")
 
 
 def _initialize_intermediate_subdir(hdf5_file, save_intermediate):
@@ -127,7 +139,9 @@ def prepare_hdf5_creator(args):
     creator = HDF5Creator(Path(args.dwi_ml_ready_folder), args.out_hdf5_file,
                           training_subjs, validation_subjs, testing_subjs,
                           groups_config, args.std_mask, args.step_size,
-                          args.compress, args.enforce_files_presence,
+                          args.compress, args.compute_connectivity_matrix,
+                          args.connectivity_downsample_size,
+                          args.enforce_files_presence,
                           args.save_intermediate, intermediate_subdir)
 
     return creator
diff --git a/dwi_ml/data/processing/streamlines/post_processing.py b/dwi_ml/data/processing/streamlines/post_processing.py
index f750222e..40851788 100644
--- a/dwi_ml/data/processing/streamlines/post_processing.py
+++ b/dwi_ml/data/processing/streamlines/post_processing.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import logging
 from typing import List
 
 import numpy as np
@@ -260,6 +261,93 @@ def weight_value_with_angle(values: List, streamlines: List = None,
         # loss^0 = 1. loss^1 = loss. Also adding 1.
         # But if values are < 1, pow becomes smaller.
         # Our losses tend toward 0.  Adding 1 before.
-        #values[i] = torch.pow(1.0 + values[i], angles + 1.0) - 1.0
+        # values[i] = torch.pow(1.0 + values[i], angles + 1.0) - 1.0
 
     return values
+
+
+def compute_triu_connectivity(
+        streamlines, volume_size, downsampled_volume_size,
+        binary: bool = False, to_sparse_tensor: bool = False, device=None):
+    """
+    Compute a connectivity matrix.
+
+    Parameters
+    ----------
+    streamlines: list of np arrays or list of tensors.
+        Streamlines, in vox space, corner origin.
+    volume_size: list
+        The 3D dimension of the reference volume.
+    downsampled_volume_size:
+        Either a 3D size or the size m of the m x m x m downsampled volume
+        coordinates for the connectivity matrix. This means that the matrix
+        will be a m^d x m^d triangular matrix. In 3D, with 20x20x20, this is an
+        8000 x 8000 matrix (triangular = half of it in memory). It probably
+        contains a lot of zeros with the background being included. Saved as
+        sparse.
+    binary: bool
+        If true, return a binary matrix.
+    to_sparse_tensor:
+        If true, return the sparse matrix.
+    device:
+        If true and to_sparse_tensor, the matrix will be hosted on device.
+    """
+    # Getting endpoint coordinates
+    #  + Fix types
+    volume_size = np.asarray(volume_size)
+    downsampled_volume_size = np.asarray(downsampled_volume_size)
+    if isinstance(streamlines[0], list):
+        start_values = [s[0] for s in streamlines]
+        end_values = [s[-1] for s in streamlines]
+    elif isinstance(streamlines[0], torch.Tensor):
+        start_values = [s[0, :].cpu().numpy() for s in streamlines]
+        end_values = [s[-1, :].cpu().numpy() for s in streamlines]
+    else:  # expecting numpy arrays
+        start_values = [s[0, :] for s in streamlines]
+        end_values = [s[-1, :] for s in streamlines]
+
+    assert len(downsampled_volume_size) == len(volume_size)
+    nb_dims = len(downsampled_volume_size)
+    nb_voxels_pre = np.prod(volume_size)
+    nb_voxels_post = np.prod(downsampled_volume_size)
+    logging.debug("Preparing connectivity matrix of downsampled volume: from "
+                  "{} to {}. Gives a matrix of size {} x {} rather than {} "
+                  "voxels)."
+                  .format(volume_size, downsampled_volume_size,
+                          nb_voxels_post, nb_voxels_post, nb_voxels_pre))
+
+    # Downsampling
+    mult_factor = downsampled_volume_size / volume_size
+    start_values = np.clip((start_values * mult_factor).astype(int),
+                           a_min=0, a_max=downsampled_volume_size - 1)
+    end_values = np.clip((end_values * mult_factor).astype(int),
+                         a_min=0, a_max=downsampled_volume_size - 1)
+
+    # Blocs go from 0 to m1*m2*m3.
+    start_block = np.ravel_multi_index(
+        [start_values[:, d] for d in range(nb_dims)], downsampled_volume_size)
+    end_block = np.ravel_multi_index(
+        [end_values[:, d] for d in range(nb_dims)], downsampled_volume_size)
+
+    total_size = np.prod(downsampled_volume_size)
+    matrix = np.zeros((total_size, total_size), dtype=int)
+    for s_start, s_end in zip(start_block, end_block):
+        matrix[s_start, s_end] += 1
+
+        # Either, at the end, sum lower triangular + upper triangular (except
+        # diagonal), or:
+        if s_end != s_start:
+            matrix[s_end, s_start] += 1
+
+    matrix = np.triu(matrix)
+    assert matrix.sum() == len(streamlines)
+
+    if binary:
+        matrix = matrix.astype(bool)
+
+    if to_sparse_tensor:
+        logging.debug("Converting matrix to sparse. Contained {}% of zeros."
+                      .format((1 - np.count_nonzero(matrix) / total_size) * 100))
+        matrix = torch.as_tensor(matrix, device=device).to_sparse()
+
+    return matrix
diff --git a/dwi_ml/models/projects/learn2track_model.py b/dwi_ml/models/projects/learn2track_model.py
index d7c1e8c4..74391293 100644
--- a/dwi_ml/models/projects/learn2track_model.py
+++ b/dwi_ml/models/projects/learn2track_model.py
@@ -386,7 +386,12 @@ def copy_prev_dir(self, dirs, n_prev_dirs):
 
         return copy_prev_dir
 
-    def update_hidden_state(self, hidden_recurrent_states, lines_to_keep):
+    def remove_lines_in_hidden_state(
+            self, hidden_recurrent_states, lines_to_keep):
+        """
+        Utilitary method to remove a few streamlines from the hidden
+        state.
+        """
         if self.rnn_model.rnn_torch_key == 'lstm':
             # LSTM: For each layer, states are tuples; (h_t, C_t)
             # Size of tensors are each [1, nb_streamlines, nb_neurons]
diff --git a/dwi_ml/tracking/projects/learn2track_tracker.py b/dwi_ml/tracking/projects/learn2track_tracker.py
index ee877362..06ec3216 100644
--- a/dwi_ml/tracking/projects/learn2track_tracker.py
+++ b/dwi_ml/tracking/projects/learn2track_tracker.py
@@ -89,5 +89,5 @@ def update_memory_after_removing_lines(self, can_continue: np.ndarray, _):
             Indexes of lines that are kept.
         """
         # Hidden states: list[states] (One value per layer).
-        self.hidden_recurrent_states = self.model.update_hidden_state(
+        self.hidden_recurrent_states = self.model.remove_lines_in_hidden_state(
             self.hidden_recurrent_states, can_continue)
diff --git a/dwi_ml/training/batch_loaders.py b/dwi_ml/training/batch_loaders.py
index 9a00343a..80ea704a 100644
--- a/dwi_ml/training/batch_loaders.py
+++ b/dwi_ml/training/batch_loaders.py
@@ -5,9 +5,9 @@
 These classes define how to sample the streamlines available in the
 MultiSubjectData.
 
-AbstractBatchSampler:
+AbstractBatchLoader:
 
-- Define the load_batch method:
+- Defines the load_batch method:
     - Loads the streamlines associated to sampled ids. Can resample them.
 
     - Performs data augmentation (on-the-fly to avoid having to multiply data
@@ -21,9 +21,9 @@
 ----------
                         Implemented child classes
 
-BatchStreamlinesSamplerOneInput:
+BatchLoaderOneInput:
 
-- Redefines the load_batch method:
+- Defines the load_batch_inputs method:
     - Now also loads the input data under each point of the streamline (and
     possibly its neighborhood), for one input volume.
 
diff --git a/dwi_ml/training/projects/learn2track_trainer.py b/dwi_ml/training/projects/learn2track_trainer.py
index c37dedcb..50e17af5 100644
--- a/dwi_ml/training/projects/learn2track_trainer.py
+++ b/dwi_ml/training/projects/learn2track_trainer.py
@@ -7,7 +7,7 @@
 
 from dwi_ml.models.projects.learn2track_model import Learn2TrackModel
 from dwi_ml.tracking.propagation import propagate_multiple_lines
-from dwi_ml.training.projects.trainers_for_generation import \
+from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput
 
 logger = logging.getLogger('trainer_logger')
@@ -72,8 +72,8 @@ def propagate_multiple_lines(self, lines: List[torch.Tensor], ids_per_subj):
 
         def update_memory_after_removing_lines(can_continue: np.ndarray, _):
             nonlocal hidden_states
-            hidden_states = self.model.update_hidden_state(hidden_states,
-                                                           can_continue)
+            hidden_states = self.model.remove_lines_in_hidden_state(
+                hidden_states, can_continue)
 
         def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
             nonlocal hidden_states
@@ -83,7 +83,8 @@ def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
                                                                ids_per_subj)
 
             _model_outputs, hidden_states = self.model(
-                batch_inputs, _lines, return_hidden=True, point_idx=-1)
+                batch_inputs, _lines, hidden_recurrent_states=hidden_states,
+                return_hidden=True, point_idx=-1)
 
             next_dirs = self.model.get_tracking_directions(
                 _model_outputs, algo='det', eos_stopping_thresh=0.5)
diff --git a/dwi_ml/training/projects/transformer_trainer.py b/dwi_ml/training/projects/transformer_trainer.py
index bb76872c..2553f2f6 100644
--- a/dwi_ml/training/projects/transformer_trainer.py
+++ b/dwi_ml/training/projects/transformer_trainer.py
@@ -1,13 +1,8 @@
 # -*- coding: utf-8 -*-
-import logging
-from typing import List
 
 import torch
 
-from dwi_ml.models.projects.transforming_tractography import AbstractTransformerModel
-from dwi_ml.training.batch_samplers import DWIMLBatchIDSampler
-from dwi_ml.training.batch_loaders import DWIMLBatchLoaderOneInput
-from dwi_ml.training.projects.trainers_for_generation import \
+from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput
 
 
diff --git a/dwi_ml/training/trainers.py b/dwi_ml/training/trainers.py
index 7df0a6a3..93ce44f2 100644
--- a/dwi_ml/training/trainers.py
+++ b/dwi_ml/training/trainers.py
@@ -267,17 +267,26 @@ def __init__(self,
         # D. Monitors
         # grad_norm = The total norm (sqrt(sum(params**2))) of parameters
         # before gradient clipping, if any.
-        self.train_loss_monitor = BatchHistoryMonitor(weighted=True)
-        self.valid_loss_monitor = BatchHistoryMonitor(weighted=True)
-        self.grad_norm_monitor = BatchHistoryMonitor(weighted=False)
-        self.training_time_monitor = TimeMonitor()
-        self.validation_time_monitor = TimeMonitor()
-        if patience:
-            self.best_epoch_monitor = BestEpochMonitor(patience, patience_delta)
-        else:
-            # We won't use early stopping to stop the epoch, but we will use
-            # it as monitor of the best epochs.
-            self.best_epoch_monitor = BestEpochMonitor(patience=np.inf)
+        self.train_loss_monitor = BatchHistoryMonitor(
+            'train_loss_monitor', weighted=True)
+        self.valid_loss_monitor = BatchHistoryMonitor(
+            'valid_loss_monitor', weighted=True)
+        self.grad_norm_monitor = BatchHistoryMonitor(
+            'grad_norm_monitor', weighted=False)
+        self.training_time_monitor = TimeMonitor('training_time_monitor')
+        self.validation_time_monitor = TimeMonitor('validation_time_monitor')
+        if not patience:
+            patience = np.inf
+        self.best_epoch_monitor = BestEpochMonitor(
+            'best_epoch_monitor', patience, patience_delta)
+        self.monitors = [self.train_loss_monitor, self.valid_loss_monitor,
+                         self.grad_norm_monitor, self.training_time_monitor,
+                         self.validation_time_monitor, self.best_epoch_monitor]
+        self.training_monitors = [self.train_loss_monitor,
+                                  self.grad_norm_monitor,
+                                  self.training_time_monitor]
+        self.validation_monitors = [self.valid_loss_monitor,
+                                    self.validation_time_monitor]
 
         # E. Comet Experiment
         # Values will be instantiated in train().
@@ -401,19 +410,16 @@ def _prepare_checkpoint_info(self) -> dict:
             # C. Nb of batches per epoch.
             'nb_batches_train': self.nb_batches_train,
             'nb_batches_valid': self.nb_batches_valid,
-            # D. Monitors
-            'best_epoch_monitoring_state': self.best_epoch_monitor.get_state(),
-            'train_loss_monitor_state': self.train_loss_monitor.get_state(),
-            'valid_loss_monitor_state': self.valid_loss_monitor.get_state(),
-            'grad_norm_monitor_state': self.grad_norm_monitor.get_state(),
-            'training_time_monitor_state': self.training_time_monitor.get_state(),
-            'validation_time_monitor_state': self.validation_time_monitor.get_state(),
-            # E. Comet Experiment
+            # D. Comet Experiment
             'comet_key': self.comet_key,
-            # F. Optimizer
+            # E. Optimizer
             'optimizer_state': self.optimizer.state_dict(),
         }
 
+        # F. Monitors
+        for monitor in self.monitors:
+            current_states[monitor.name + '_state'] = monitor.get_state()
+
         # Additional params are the parameters necessary to load data, batch
         # samplers/loaders (see the example script dwiml_train_model.py).
         checkpoint_info = {
@@ -483,21 +489,17 @@ def _update_states_from_checkpoint(self, current_states):
         self.nb_batches_train = current_states['nb_batches_train']
         self.nb_batches_valid = current_states['nb_batches_valid']
 
-        # D. Monitors
-        self.best_epoch_monitor.set_state(current_states['best_epoch_monitoring_state'])
-        self.train_loss_monitor.set_state(current_states['train_loss_monitor_state'])
-        self.valid_loss_monitor.set_state(current_states['valid_loss_monitor_state'])
-        self.grad_norm_monitor.set_state(current_states['grad_norm_monitor_state'])
-        self.training_time_monitor.set_state(current_states['training_time_monitor_state'])
-        self.validation_time_monitor.set_state(current_states['validation_time_monitor_state'])
-
-        # E. Comet Experiment
+        # D. Comet Experiment
         # Experiment will be instantiated in train().
         self.comet_key = current_states['comet_key']
 
-        # F. Optimizer
+        # E. Optimizer
         self.optimizer.load_state_dict(current_states['optimizer_state'])
 
+        # F. Monitors
+        for monitor in self.monitors:
+            monitor.set_state(current_states[monitor.name + '_state'])
+
     def _init_comet(self):
         """
         For more information on comet, see our doc/Getting Started
@@ -670,6 +672,8 @@ def train_and_validate(self):
                 break
 
     def _get_latest_loss_to_supervise_best(self):
+        # This can be overriden by child classes if you possesss other
+        # test metrics than the loss.
         if self.use_validation:
             mean_epoch_loss = self.valid_loss_monitor.average_per_epoch[-1]
         else:
@@ -678,16 +682,13 @@ def _get_latest_loss_to_supervise_best(self):
         return mean_epoch_loss
 
     def save_local_logs(self):
-        self._save_log_locally(self.train_loss_monitor.average_per_epoch,
-                               "training_loss_per_epoch.npy")
-        self._save_log_locally(self.valid_loss_monitor.average_per_epoch,
-                               "validation_loss_per_epoch.npy")
-        self._save_log_locally(self.grad_norm_monitor.average_per_epoch,
-                               "gradient_norm.npy")
-        self._save_log_locally(self.training_time_monitor.epoch_durations,
-                               "training_epochs_duration")
-        self._save_log_locally(self.validation_time_monitor.epoch_durations,
-                               "validation_epochs_duration")
+        for monitor in self.monitors:
+            if isinstance(monitor, BatchHistoryMonitor):
+                self._save_log_locally(monitor.average_per_epoch,
+                                       monitor.name + '_per_epoch.npy')
+            elif isinstance(monitor, TimeMonitor):
+                self._save_log_locally(monitor.epoch_durations,
+                                       monitor.name + '_duration.npy')
 
     def _clear_handles(self):
         # Make sure there are no existing HDF handles if using parallel workers
@@ -719,9 +720,8 @@ def train_one_epoch(self, epoch):
         """
         Train one epoch of the model: loop on all batches (forward + backward).
         """
-        self.training_time_monitor.start_new_epoch()
-        self.train_loss_monitor.start_new_epoch()
-        self.grad_norm_monitor.start_new_epoch()
+        for monitor in self.training_monitors:
+            monitor.start_new_epoch()
 
         # Setting contexts
         self.batch_loader.set_context('training')
@@ -768,9 +768,8 @@ def train_one_epoch(self, epoch):
             del train_iterator
 
         # Saving epoch's information
-        self.train_loss_monitor.end_epoch()
-        self.grad_norm_monitor.end_epoch()
-        self.training_time_monitor.end_epoch()
+        for monitor in self.training_monitors:
+            monitor.end_epoch()
         self._update_comet_after_epoch('training', epoch)
 
         all_n = self.train_loss_monitor.current_epoch_batch_weights
@@ -781,8 +780,8 @@ def validate_one_epoch(self, epoch):
         """
         Validate one epoch of the model: loop on all batches.
         """
-        self.validation_time_monitor.start_new_epoch()
-        self.valid_loss_monitor.start_new_epoch()
+        for monitor in self.validation_monitors:
+            monitor.start_new_epoch()
 
         # Setting contexts
         # Turn gradients off (no back-propagation)
@@ -839,8 +838,8 @@ def validate_one_epoch(self, epoch):
             del valid_iterator
 
         # Save info
-        self.valid_loss_monitor.end_epoch()
-        self.validation_time_monitor.end_epoch()
+        for monitor in self.validation_monitors:
+            monitor.end_epoch()
         self._update_comet_after_epoch('validation', epoch)
 
     def validate_one_batch(self, data, epoch):
@@ -858,21 +857,16 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
         local_context: prefix when saving log. Training_ or Validate_ for
         instance.
         """
-        if context == 'training':
-            loss = self.train_loss_monitor.average_per_epoch[-1]
-        elif context == 'validation':
-            loss = self.valid_loss_monitor.average_per_epoch[-1]
-        else:
-            raise ValueError("Unexpected context ({}) for comet. Expecting "
-                             "training or validation.")
-        logger.info("   Mean loss for this epoch: {}".format(loss))
-
         if self.comet_exp:
             if context == 'training':
                 comet_context = self.comet_exp.train
-                self._update_gradnorm_logs_after_epoch(comet_context, epoch)
-            else:  # context == 'validation':
+                monitors = self.training_monitors
+            elif context == 'validation':
                 comet_context = self.comet_exp.validate
+                monitors = self.validation_monitors
+            else:
+                raise ValueError("Unexpected context ({}) for comet. Expecting "
+                                 "training or validation.")
 
             with comet_context():
                 # Not really implemented yet.
@@ -880,16 +874,17 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
                 # Cheating. To have a correct plotting per epoch (no step)
                 # using step = epoch. In comet_ml, it is intended to be
                 # step = batch.
-                self.comet_exp.log_metric("loss_per_epoch", loss, epoch=0,
-                                          step=epoch)
-
-    def _update_gradnorm_logs_after_epoch(self, comet_context, epoch: int):
-        if self.comet_exp:
-            with comet_context():
-                self.comet_exp.log_metric(
-                    "mean_gradient_norm_per_epoch",
-                    self.grad_norm_monitor.average_per_epoch[epoch],
-                    epoch=None, step=epoch)
+                for monitor in monitors:
+                    if isinstance(monitor, BatchHistoryMonitor):
+                        value = monitor.average_per_epoch[-1]
+                    elif isinstance(monitor, TimeMonitor):
+                        value = monitor.epoch_durations[-1]
+                    else:
+                        continue
+                    logger.info("   Mean {} for this epoch: {}"
+                                .format(monitor.name, value))
+                    self.comet_exp.log_metric(
+                        monitor.name, value, epoch=0, step=epoch)
 
     def _save_best_model(self):
         logger.info("   Best epoch yet! Saving model and loss history.")
@@ -945,7 +940,6 @@ def fix_parameters(self):
         """
         pass
 
-
     def _save_log_locally(self, array: np.ndarray, fname: str):
         np.save(os.path.join(self.log_dir, fname), array)
 
@@ -969,7 +963,7 @@ def check_stopping_cause(checkpoint_state, new_patience=None,
 
         # 1. Check if early stopping had been triggered.
         best_monitor_state = \
-            checkpoint_state['current_states']['best_epoch_monitoring_state']
+            checkpoint_state['current_states']['best_epoch_monitor_state']
         bad_epochs = best_monitor_state['n_bad_epochs']
         if new_patience is None:
             # No new patience: checking if early stopping had been triggered.
diff --git a/dwi_ml/training/utils/batch_loaders.py b/dwi_ml/training/utils/batch_loaders.py
index 3f0d8e7d..4a9652b3 100644
--- a/dwi_ml/training/utils/batch_loaders.py
+++ b/dwi_ml/training/utils/batch_loaders.py
@@ -4,7 +4,8 @@
 
 from dwi_ml.experiment_utils.prints import format_dict_to_str
 from dwi_ml.experiment_utils.timer import Timer
-from dwi_ml.training.batch_loaders import DWIMLBatchLoaderOneInput
+from dwi_ml.training.with_generation.batch_loader import \
+    DWIMLBatchLoaderWithConnectivity
 
 
 def add_args_batch_loader(p: argparse.ArgumentParser):
@@ -37,7 +38,7 @@ def add_args_batch_loader(p: argparse.ArgumentParser):
 def prepare_batch_loader(dataset, model, args, sub_loggers_level):
     # Preparing the batch loader.
     with Timer("\nPreparing batch loader...", newline=True, color='pink'):
-        batch_loader = DWIMLBatchLoaderOneInput(
+        batch_loader = DWIMLBatchLoaderWithConnectivity(
             dataset=dataset, model=model,
             input_group_name=args.input_group_name,
             streamline_group_name=args.streamline_group_name,
diff --git a/dwi_ml/training/utils/monitoring.py b/dwi_ml/training/utils/monitoring.py
index 65e1c2bb..89fd9eac 100644
--- a/dwi_ml/training/utils/monitoring.py
+++ b/dwi_ml/training/utils/monitoring.py
@@ -8,7 +8,8 @@
 
 
 class TimeMonitor(object):
-    def __init__(self):
+    def __init__(self, name):
+        self.name = name
         self.epoch_durations = []
         self._start_time = None
 
@@ -49,7 +50,8 @@ class BatchHistoryMonitor(object):
         loss_monitor.epochs_means  # returns the loss curve as a list
     """
 
-    def __init__(self, weighted: bool = False):
+    def __init__(self, name, weighted: bool = False):
+        self.name = name
         self.is_weighted = weighted
 
         # State:
@@ -127,7 +129,7 @@ class BestEpochMonitor(object):
     number of epochs ("patience").
     """
 
-    def __init__(self, patience: int, patience_delta: float = 1e-6):
+    def __init__(self, name, patience: int, patience_delta: float = 1e-6):
         """
         Parameters
         -----------
@@ -137,6 +139,7 @@ def __init__(self, patience: int, patience_delta: float = 1e-6):
             Precision term to define what we consider as "improving": when the
             loss is at least min_eps smaller than the previous best loss.
         """
+        self.name = name
         self.patience = patience
         self.min_eps = patience_delta
 
diff --git a/dwi_ml/training/with_generation/__init__.py b/dwi_ml/training/with_generation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dwi_ml/training/with_generation/batch_loader.py b/dwi_ml/training/with_generation/batch_loader.py
new file mode 100644
index 00000000..a56a0a8c
--- /dev/null
+++ b/dwi_ml/training/with_generation/batch_loader.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+from typing import List, Dict
+
+import torch
+
+from dwi_ml.training.batch_loaders import DWIMLBatchLoaderOneInput
+
+
+class DWIMLBatchLoaderWithConnectivity(DWIMLBatchLoaderOneInput):
+    def __init__(self, **kwargs):
+        assert "hdf5 contains connectivity"
+        super().__init__(**kwargs)
+
+    def load_batch_connectivity_matrices(
+            self, streamline_ids_per_subj: Dict[int, slice]):
+        # The batch's streamline ids will change throughout processing because
+        # of data augmentation, so we need to do it subject by subject to
+        # keep track of the streamline ids. These final ids will correspond to
+        # the loaded, processed streamlines, not to the ids in the hdf5 file.
+        subjs = list(streamline_ids_per_subj.keys())
+        nb_subjs = len(subjs)
+        matrices = [None] * nb_subjs
+        volume_sizes = [None] * nb_subjs
+        downsampled_sizes = [None] * nb_subjs
+        for i, subj in enumerate(subjs):
+            # No cache for the sft data. Accessing it directly.
+            # Note: If this is used through the dataloader, multiprocessing
+            # is used. Each process will open a handle.
+            subj_data = \
+                self.context_subset.subjs_data_list.get_subj_with_handle(subj)
+            subj_sft_data = subj_data.sft_data_list[self.streamline_group_idx]
+
+            # We could access it only at required index maybe. Loading the
+            # whole matrix here.
+            matrices[i], volume_sizes[i], downsampled_sizes[i] = \
+                subj_sft_data.connectivity_matrix_and_info()
+
+        return matrices, volume_sizes, downsampled_sizes
diff --git a/dwi_ml/training/projects/trainers_for_generation.py b/dwi_ml/training/with_generation/trainer.py
similarity index 56%
rename from dwi_ml/training/projects/trainers_for_generation.py
rename to dwi_ml/training/with_generation/trainer.py
index fd03b6e9..12423e27 100644
--- a/dwi_ml/training/projects/trainers_for_generation.py
+++ b/dwi_ml/training/with_generation/trainer.py
@@ -7,11 +7,15 @@
 import torch
 from torch.nn import PairwiseDistance
 
+from dwi_ml.data.processing.streamlines.post_processing import \
+    compute_triu_connectivity
 from dwi_ml.models.main_models import ModelWithDirectionGetter
 from dwi_ml.tracking.propagation import propagate_multiple_lines
 from dwi_ml.tracking.projects.utils import prepare_tracking_mask
 from dwi_ml.training.trainers import DWIMLTrainerOneInput
 from dwi_ml.training.utils.monitoring import BatchHistoryMonitor, TimeMonitor
+from dwi_ml.training.with_generation.batch_loader import \
+    DWIMLBatchLoaderWithConnectivity
 
 logger = logging.getLogger('train_logger')
 
@@ -26,6 +30,7 @@
 
 class DWIMLTrainerForTrackingOneInput(DWIMLTrainerOneInput):
     model: ModelWithDirectionGetter
+    batch_loader: DWIMLBatchLoaderWithConnectivity
 
     def __init__(self, add_a_tracking_validation_phase: bool = False,
                  tracking_phase_frequency: int = 5,
@@ -59,19 +64,44 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
                     self.tracking_mask.move_to(self.device)
 
         # -------- Monitors
-        self.tracking_valid_time_monitor = TimeMonitor()
+        self.tracking_valid_time_monitor = TimeMonitor(
+            'tracking_valid_time_monitor')
+
+        # A lot of exploratory metrics monitors:
 
         # Percentage of streamlines inside a radius
-        self.tracking_very_good_IS_monitor = BatchHistoryMonitor(weighted=True)
-        self.tracking_acceptable_IS_monitor = BatchHistoryMonitor(weighted=True)
-        self.tracking_very_far_IS_monitor = BatchHistoryMonitor(weighted=True)
+        self.tracking_very_good_IS_monitor = BatchHistoryMonitor(
+            'tracking_very_good_IS_monitor', weighted=True)
+        self.tracking_acceptable_IS_monitor = BatchHistoryMonitor(
+            'tracking_acceptable_IS_monitor', weighted=True)
+        self.tracking_very_far_IS_monitor = BatchHistoryMonitor(
+            'tracking_very_far_IS_monitor', weighted=True)
 
         # Point where the streamline start diverging from "acceptable"
-        self.tracking_valid_diverg_monitor = BatchHistoryMonitor(weighted=True)
+        self.tracking_valid_diverg_monitor = BatchHistoryMonitor(
+            'tracking_valid_diverg_monitor', weighted=True)
 
         # Final distance from expected point
-        self.tracking_mean_final_distance_monitor = BatchHistoryMonitor(weighted=True)
-        self.tracking_clipped_final_distance_monitor = BatchHistoryMonitor(weighted=True)
+        self.tracking_mean_final_distance_monitor = BatchHistoryMonitor(
+            'tracking_mean_final_distance_monitor', weighted=True)
+        self.tracking_clipped_final_distance_monitor = BatchHistoryMonitor(
+            'tracking_clipped_final_distance_monitor', weighted=True)
+
+        # Connectivity matrix accordance
+        self.tracking_connectivity_score_monitor = BatchHistoryMonitor(
+            'tracking_connectivity_score_monitor', weighted=True)
+
+        if self.add_a_tracking_validation_phase:
+            new_monitors = [self.tracking_valid_time_monitor,
+                            self.tracking_very_good_IS_monitor,
+                            self.tracking_acceptable_IS_monitor,
+                            self.tracking_very_far_IS_monitor,
+                            self.tracking_valid_diverg_monitor,
+                            self.tracking_mean_final_distance_monitor,
+                            self.tracking_clipped_final_distance_monitor,
+                            self.tracking_connectivity_score_monitor]
+            self.monitors += new_monitors
+            self.validation_monitors += new_monitors
 
     @property
     def params_for_checkpoint(self):
@@ -85,102 +115,12 @@ def params_for_checkpoint(self):
 
         return p
 
-    def _update_states_from_checkpoint(self, current_states):
-        super()._update_states_from_checkpoint(current_states)
-        self.tracking_very_good_IS_monitor.set_state(
-           current_states['tracking_very_good_IS_monitor_state'])
-        self.tracking_acceptable_IS_monitor.set_state(
-            current_states['tracking_acceptable_IS_monitor_state'])
-        self.tracking_very_far_IS_monitor.set_state(
-            current_states['tracking_very_far_IS_monitor_state'])
-
-        self.tracking_valid_diverg_monitor.set_state(
-            current_states['tracking_valid_diverg_monitor_state'])
-
-        self.tracking_mean_final_distance_monitor.set_state(
-            current_states['tracking_valid_loss_monitor_state'])
-        self.tracking_clipped_final_distance_monitor.set_state(
-            current_states['tracking_clipped_valid_loss_monitor_state'])
-
-    def _prepare_checkpoint_info(self) -> dict:
-        checkpoint_info = super()._prepare_checkpoint_info()
-        checkpoint_info['current_states'].update({
-            'tracking_very_good_IS_monitor_state':
-                self.tracking_very_good_IS_monitor.get_state(),
-            'tracking_acceptable_IS_monitor_state':
-                self.tracking_acceptable_IS_monitor.get_state(),
-            'tracking_very_far_IS_monitor_state':
-                self.tracking_very_far_IS_monitor.get_state(),
-
-            'tracking_valid_diverg_monitor_state':
-                self.tracking_valid_diverg_monitor.get_state(),
-
-            'tracking_valid_loss_monitor_state':
-                self.tracking_mean_final_distance_monitor.get_state(),
-            'tracking_clipped_valid_loss_monitor_state':
-                self.tracking_clipped_final_distance_monitor.get_state(),
-        })
-        return checkpoint_info
-
-    def save_local_logs(self):
-        super().save_local_logs()
-
-        self._save_log_locally(
-            self.tracking_very_good_IS_monitor.average_per_epoch,
-            "tracking_validation_very_good_IS_per_epoch_{}.npy"
-            .format(VERY_CLOSE_THRESHOLD))
-        self._save_log_locally(
-            self.tracking_acceptable_IS_monitor.average_per_epoch,
-            "tracking_validation_acceptable_IS_per_epoch_{}.npy"
-            .format(ACCEPTABLE_THRESHOLD))
-        self._save_log_locally(
-            self.tracking_very_far_IS_monitor.average_per_epoch,
-            "tracking_validation_very_far_IS_per_epoch_{}.npy"
-            .format(VERY_FAR_THRESHOLD))
-
-        self._save_log_locally(
-            self.tracking_valid_diverg_monitor.average_per_epoch,
-            "tracking_validation_diverg_per_epoch.npy")
-
-        self._save_log_locally(
-            self.tracking_mean_final_distance_monitor.average_per_epoch,
-            "tracking_validation_loss_per_epoch.npy")
-        self._save_log_locally(
-            self.tracking_clipped_final_distance_monitor.average_per_epoch,
-            "tracking_clipped_validation_loss_per_epoch.npy")
-
-    def validate_one_epoch(self, epoch):
-        if self.add_a_tracking_validation_phase:
-            self.tracking_very_good_IS_monitor.start_new_epoch()
-            self.tracking_acceptable_IS_monitor.start_new_epoch()
-            self.tracking_very_far_IS_monitor.start_new_epoch()
-            self.tracking_valid_diverg_monitor.start_new_epoch()
-            self.tracking_mean_final_distance_monitor.start_new_epoch()
-            self.tracking_clipped_final_distance_monitor.start_new_epoch()
-            self.tracking_valid_time_monitor.start_new_epoch()
-
-        # This will run our modified "validate one batch" for each batch.
-        super().validate_one_epoch(epoch)
-
-        if self.add_a_tracking_validation_phase:
-            self.tracking_very_good_IS_monitor.end_epoch()
-            self.tracking_acceptable_IS_monitor.end_epoch()
-            self.tracking_very_far_IS_monitor.end_epoch()
-            self.tracking_valid_diverg_monitor.end_epoch()
-            self.tracking_mean_final_distance_monitor.end_epoch()
-            self.tracking_clipped_final_distance_monitor.end_epoch()
-            self.tracking_valid_time_monitor.end_epoch()
-
-            # Save info
-            if self.comet_exp:
-                self._update_comet_after_epoch('validation', epoch,
-                                               tracking_phase=True)
-
     def _get_latest_loss_to_supervise_best(self):
         if self.use_validation:
-            if False: # self.add_a_tracking_validation_phase:
+            if self.add_a_tracking_validation_phase:
                 # Compared to super, replacing by tracking_valid loss.
-                mean_epoch_loss = self.tracking_clipped_final_distance_monitor.average_per_epoch[-1]
+                mean_epoch_loss = \
+                    self.tracking_connectivity_score_monitor.average_per_epoch[-1]
 
                 # Could use IS instead, or non-clipped, or diverging point.
                 # Not implemented.
@@ -200,15 +140,23 @@ def validate_one_batch(self, data, epoch):
                             "from batch.")
                 (gen_n, mean_final_dist, mean_clipped_final_dist,
                  percent_IS_very_good, percent_IS_acceptable, percent_IS_very_far,
-                 diverging_pnt) = self.generate_from_one_batch(data)
-
-                self.tracking_very_good_IS_monitor.update(percent_IS_very_good, weight=n)
-                self.tracking_acceptable_IS_monitor.update(percent_IS_acceptable, weight=n)
-                self.tracking_very_far_IS_monitor.update(percent_IS_very_far, weight=n)
-
-                self.tracking_mean_final_distance_monitor.update(mean_final_dist, weight=n)
-                self.tracking_clipped_final_distance_monitor.update(mean_clipped_final_dist, weight=n)
-                self.tracking_valid_diverg_monitor.update(diverging_pnt, weight=n)
+                 diverging_pnt, connectivity) = self.generate_from_one_batch(data)
+
+                self.tracking_very_good_IS_monitor.update(
+                    percent_IS_very_good, weight=n)
+                self.tracking_acceptable_IS_monitor.update(
+                    percent_IS_acceptable, weight=n)
+                self.tracking_very_far_IS_monitor.update(
+                    percent_IS_very_far, weight=n)
+
+                self.tracking_mean_final_distance_monitor.update(
+                    mean_final_dist, weight=n)
+                self.tracking_clipped_final_distance_monitor.update(
+                    mean_clipped_final_dist, weight=n)
+                self.tracking_valid_diverg_monitor.update(
+                    diverging_pnt, weight=n)
+
+                self.tracking_connectivity_score_monitor.update(connectivity)
             elif len(self.tracking_mean_final_distance_monitor.average_per_epoch) == 0:
                 logger.info("Skipping tracking-like generation validation from "
                             "batch. No values yet: adding fake initial values.")
@@ -226,6 +174,8 @@ def validate_one_batch(self, data, epoch):
                 # Bad mean dist = very far. ex, 100, or clipped.
                 self.tracking_mean_final_distance_monitor.update(100.0)
                 self.tracking_clipped_final_distance_monitor.update(ACCEPTABLE_THRESHOLD)
+
+                self.tracking_connectivity_score_monitor.update(1)
             else:
                 logger.info("Skipping tracking-like generation validation from "
                             "batch. Copying previous epoch's values.")
@@ -240,58 +190,6 @@ def validate_one_batch(self, data, epoch):
 
         return mean_loss, n
 
-    def _update_comet_after_epoch(self, context: str, epoch: int,
-                                  tracking_phase=False):
-        if tracking_phase:
-            torch.set_printoptions(precision=4)
-            np.set_printoptions(precision=4)
-
-            final_dist = self.tracking_mean_final_distance_monitor.average_per_epoch[-1]
-            clipped = self.tracking_clipped_final_distance_monitor.average_per_epoch[-1]
-            logger.info("   Mean final distance for this epoch: {}\n"
-                        "       (Clipped at {}: {})"
-                        .format(final_dist, ACCEPTABLE_THRESHOLD, clipped))
-
-            percent_IS_good = self.tracking_very_good_IS_monitor.average_per_epoch[-1]
-            percent_IS_ok = self.tracking_acceptable_IS_monitor.average_per_epoch[-1]
-            percent_IS_bad = self.tracking_very_far_IS_monitor.average_per_epoch[-1]
-
-            logger.info("Mean simili-IS ratio for this epoch:\n"
-                        "                          Threshold {}: {}\n"
-                        "                          Threshold {}: {}\n"
-                        "                          Threshold {}: {}"
-                        .format(VERY_CLOSE_THRESHOLD, percent_IS_good,
-                                ACCEPTABLE_THRESHOLD, percent_IS_ok,
-                                VERY_FAR_THRESHOLD, percent_IS_bad))
-
-            diverg = self.tracking_valid_diverg_monitor.average_per_epoch[-1]
-            logger.info("Mean diverging point for this epoch: {}\n"
-                        " (percentage of streamline where distance becomes >{}, "
-                        "or percentage above 100% for streamlines longer than "
-                        "expected)".format(diverg, ACCEPTABLE_THRESHOLD))
-
-            if self.comet_exp:
-                comet_context = self.comet_exp.validate
-                with comet_context():
-                    self.comet_exp.log_metric(
-                        "Mean final distance", final_dist, step=epoch)
-                    self.comet_exp.log_metric(
-                        "Mean final distance (clipped {})"
-                        .format(ACCEPTABLE_THRESHOLD), clipped, step=epoch)
-                    self.comet_exp.log_metric(
-                        "IS ratio at dist {}".format(VERY_CLOSE_THRESHOLD),
-                        percent_IS_good, step=epoch)
-                    self.comet_exp.log_metric(
-                        "IS ratio at dist {}".format(ACCEPTABLE_THRESHOLD),
-                        percent_IS_ok, step=epoch)
-                    self.comet_exp.log_metric(
-                        "IS ratio at dist {}".format(VERY_FAR_THRESHOLD),
-                        percent_IS_bad, step=epoch)
-                    self.comet_exp.log_metric(
-                        "Diverging point", diverg, step=epoch)
-
-        super()._update_comet_after_epoch(context, epoch)
-
     def generate_from_one_batch(self, data):
         # Data interpolation has not been done yet. GPU computations are done
         # here in the main thread.
@@ -308,10 +206,15 @@ def generate_from_one_batch(self, data):
         # (model is already moved). Using only the n first points
         lines = [s[0:min(len(s), self.tracking_phase_nb_steps_init), :]
                  for s in real_lines]
+
+        # Propagation: no backward tracking.
         self.model.set_context('tracking')
         lines = self.propagate_multiple_lines(lines, ids_per_subj)
         self.model.set_context('validation')
 
+        # 1. Connectivity scores
+        connectivity_score = self._compare_connectivity(lines, ids_per_subj)
+
         compute_mean_length = np.mean([len(s) for s in lines])
         logger.info("-> Average streamline length (nb pts) in this batch: {} \n"
                     "                   Average recovered streamline length: {}"
@@ -323,18 +226,18 @@ def generate_from_one_batch(self, data):
         l2_loss = PairwiseDistance(p=2)
         final_dist = l2_loss(computed_last_pos, last_pos)
 
-        # Verify "IS ratio", i.e. percentage of streamlines ending inside a
+        # 2. Verify "IS ratio", i.e. percentage of streamlines ending inside a
         # predefined radius.
-        IS_ratio_good = torch.sum(final_dist > VERY_CLOSE_THRESHOLD) / len(lines) * 100
-        IS_ratio_ok = torch.sum(final_dist > ACCEPTABLE_THRESHOLD) / len(lines) * 100
-        IS_ratio_bad = torch.sum(final_dist > VERY_FAR_THRESHOLD) / len(lines) * 100
+        invalid_ratio_severe = torch.sum(final_dist > VERY_CLOSE_THRESHOLD) / len(lines) * 100
+        invalid_ratio_acceptable = torch.sum(final_dist > ACCEPTABLE_THRESHOLD) / len(lines) * 100
+        invalid_ratio_loose = torch.sum(final_dist > VERY_FAR_THRESHOLD) / len(lines) * 100
 
         final_dist_clipped = torch.clip(final_dist, min=None,
                                         max=ACCEPTABLE_THRESHOLD)
         final_dist = torch.mean(final_dist)
         final_dist_clipped = torch.mean(final_dist_clipped)
 
-        # Verify point where streamline starts diverging.
+        # 3. Verify point where streamline starts diverging.
         # 0% = error at first point --> really bad.
         # 100% = reached exactly the right point.
         # >100% = went too far (longer than expected).
@@ -358,14 +261,43 @@ def generate_from_one_batch(self, data):
                 total_point += abs(100 - div_point)
         diverging_point = total_point / len(lines)
 
-        IS_ratio_good = IS_ratio_good.cpu().numpy().astype(np.float32)
-        IS_ratio_ok = IS_ratio_ok.cpu().numpy().astype(np.float32)
-        IS_ratio_bad = IS_ratio_bad.cpu().numpy().astype(np.float32)
+        invalid_ratio_severe = invalid_ratio_severe.cpu().numpy().astype(np.float32)
+        invalid_ratio_acceptable = invalid_ratio_acceptable.cpu().numpy().astype(np.float32)
+        invalid_ratio_loose = invalid_ratio_loose.cpu().numpy().astype(np.float32)
         final_dist = final_dist.cpu().numpy().astype(np.float32)
         final_dist_clipped = final_dist_clipped.cpu().numpy().astype(np.float32)
         diverging_point = np.asarray(diverging_point, dtype=np.float32)
         return (len(lines), final_dist, final_dist_clipped,
-                IS_ratio_good, IS_ratio_ok, IS_ratio_bad, diverging_point)
+                invalid_ratio_severe, invalid_ratio_acceptable, invalid_ratio_loose, diverging_point,
+                connectivity_score)
+
+    def _compare_connectivity(self, lines, ids_per_subj):
+        connectivity_matrices, volume_sizes, downsampled_sizes = \
+            self.batch_loader.load_batch_connectivity_matrices(ids_per_subj)
+
+        score = 0.0
+        for i, subj in enumerate(ids_per_subj.keys()):
+            real_matrix = connectivity_matrices[i]
+            volume_size = volume_sizes[i]
+            downsampled_size = downsampled_sizes[i]
+            _lines = lines[ids_per_subj[subj]]
+
+            batch_matrix = compute_triu_connectivity(
+                _lines, volume_size, downsampled_size,
+                binary=False, to_sparse_tensor=False, device=self.device)
+
+            # Where our batch has a 1, if there was really a one: score should
+            # be 0. Else, score should be 1.
+            # If two streamlines in a voxel, score is 0 or 2.
+
+            # Real matrices are saved as binary in create_hdf5.
+            where_one = np.where(batch_matrix > 0)
+            score += np.sum(batch_matrix[where_one] *
+                            (1.0 - real_matrix[where_one]))
+
+        # Average for batch
+        score = score / len(lines)
+        return score
 
     def propagate_multiple_lines(self, lines: List[torch.Tensor], ids_per_subj):
         assert self.model.step_size is not None, \
diff --git a/dwi_ml/unit_tests/test_connectivity_matrix.py b/dwi_ml/unit_tests/test_connectivity_matrix.py
new file mode 100644
index 00000000..9fbac217
--- /dev/null
+++ b/dwi_ml/unit_tests/test_connectivity_matrix.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import numpy as np
+
+from dwi_ml.data.processing.streamlines.post_processing import compute_triu_connectivity
+
+
+def test_connectivity():
+    # Ex: Volume is 16 x 16
+
+    # Streamline starting at the lowest left side to the highest right side.
+    streamline = [[0, 0], [15.9, 15.9]]
+    streamlines = [streamline, streamline]
+
+    # to a 4x4 matrix, should have two values from "ROI" 0 to "ROI" 15.
+    expected_m = np.zeros((16, 16), dtype=int)
+    expected_m[0, 15] = 2
+    #  expected_m[15, 0] = 2  ----> but triu
+    print("Expected connectivity matrix: {}".format(expected_m))
+
+    m = compute_triu_connectivity(streamlines, (16, 16), (4, 4))
+    print("Got {}".format(m))
+    assert np.array_equal(m, expected_m)
+
+    m = compute_triu_connectivity(streamlines, (16, 16), (4, 4),
+                                  to_sparse_tensor=True)
+    m2 = m.to_dense().numpy().astype(int)
+    print("Converting to sparse and back to dense: {}".format(m2))
+    assert np.array_equal(m2, expected_m)
diff --git a/scripts_python/dwiml_create_hdf5_dataset.py b/scripts_python/dwiml_create_hdf5_dataset.py
index 94759b27..a0445178 100644
--- a/scripts_python/dwiml_create_hdf5_dataset.py
+++ b/scripts_python/dwiml_create_hdf5_dataset.py
@@ -29,7 +29,7 @@
 from dipy.io.stateful_tractogram import set_sft_logger_level
 
 from dwi_ml.data.hdf5.utils import (
-    add_basic_args, add_mri_processing_args, add_streamline_processing_args,
+    add_hdf5_creation_args, add_mri_processing_args, add_streamline_processing_args,
     prepare_hdf5_creator)
 from dwi_ml.experiment_utils.timer import Timer
 
@@ -38,7 +38,7 @@ def _parse_args():
     p = argparse.ArgumentParser(description=__doc__,
                                 formatter_class=argparse.RawTextHelpFormatter)
 
-    add_basic_args(p)
+    add_hdf5_creation_args(p)
     add_mri_processing_args(p)
     add_streamline_processing_args(p)
     add_overwrite_arg(p)
@@ -74,6 +74,11 @@ def main():
                       "received {}".format(ext))
     assert_outputs_exist(p, args, args.out_hdf5_file)
 
+    # Default value with arparser '+' not possible. Setting manually.
+    if args.compute_connectivity_matrix and \
+            args.connectivity_downsample_size is None:
+        args.connectivity_downsample_size = 20
+
     # Prepare creator and load config file.
     creator = prepare_hdf5_creator(args)
 

From 51cfb515a879349edea0b114274fef35823195dd Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Thu, 8 Jun 2023 10:56:21 -0400
Subject: [PATCH 05/13] Fix generation for transformer

---
 dwi_ml/data/hdf5/hdf5_creation.py             |  21 ++-
 dwi_ml/models/direction_getter_models.py      |   7 +-
 dwi_ml/models/main_models.py                  |  11 +-
 .../projects/transforming_tractography.py     |   3 +-
 dwi_ml/testing/visu_loss.py                   |   7 +-
 dwi_ml/tracking/propagation.py                |   2 +-
 dwi_ml/tracking/tracker.py                    |  13 +-
 .../training/projects/learn2track_trainer.py  |   6 +-
 .../training/projects/transformer_trainer.py  |  69 +++----
 dwi_ml/training/trainers.py                   | 123 +++++++------
 dwi_ml/training/utils/trainer.py              |   8 +-
 dwi_ml/training/with_generation/trainer.py    | 174 +++++++++---------
 .../utils/data_and_models_for_tests.py        |   2 +-
 scripts_python/l2t_train_from_pretrained.py   | 141 ++++++++++++++
 scripts_python/l2t_train_model.py             |   2 +-
 scripts_python/tto_train_model.py             |   2 +-
 scripts_python/ttst_train_model.py            |  12 +-
 17 files changed, 385 insertions(+), 218 deletions(-)
 create mode 100644 scripts_python/l2t_train_from_pretrained.py

diff --git a/dwi_ml/data/hdf5/hdf5_creation.py b/dwi_ml/data/hdf5/hdf5_creation.py
index e2209c65..3ed62346 100644
--- a/dwi_ml/data/hdf5/hdf5_creation.py
+++ b/dwi_ml/data/hdf5/hdf5_creation.py
@@ -159,16 +159,17 @@ def __init__(self, root_folder: Path, out_hdf_filename: Path,
         self.step_size = step_size
         self.compress = compress
         self.compute_connectivity = compute_connectivity_matrix
-        if isinstance(downsampled_size_for_connectivity, List):
-            assert len(downsampled_size_for_connectivity) == 3, \
-                "Expecting to work with 3D volumes. Expecting connectivity " \
-                "downsample size to be a list of 3 values."
-            self.connectivity_downsample_size = downsampled_size_for_connectivity
-        else:
-            assert isinstance(downsampled_size_for_connectivity, int), \
-                "Expecting the connectivity matrix size to be either a 3D " \
-                "list or an integer, but got {}"\
-                .format(downsampled_size_for_connectivity)
+        if self.compute_connectivity:
+            if isinstance(downsampled_size_for_connectivity, List):
+                assert len(downsampled_size_for_connectivity) == 3, \
+                    "Expecting to work with 3D volumes. Expecting " \
+                    "connectivity downsample size to be a list of 3 values."
+                self.connectivity_downsample_size = downsampled_size_for_connectivity
+            else:
+                assert isinstance(downsampled_size_for_connectivity, int), \
+                    "Expecting the connectivity matrix size to be either " \
+                    "a 3D list or an integer, but got {}" \
+                    .format(downsampled_size_for_connectivity)
             self.connectivity_downsample_size = [downsampled_size_for_connectivity] * 3
 
         # Optional
diff --git a/dwi_ml/models/direction_getter_models.py b/dwi_ml/models/direction_getter_models.py
index 41865982..61c3d648 100644
--- a/dwi_ml/models/direction_getter_models.py
+++ b/dwi_ml/models/direction_getter_models.py
@@ -291,9 +291,8 @@ def get_tracking_directions(self, outputs, algo: str,
 
         Returns
         -------
-        next_dirs: list
-            A list of numpy arrays (one per streamline), each of size (1, 3):
-            the three coordinates of the next direction's vector.
+        next_dirs: torch.Tensor
+            A tensor of shape [n, 3] with the next direction for each output.
         """
         if algo == 'det':
             next_dirs = self._get_tracking_direction_det(
@@ -301,7 +300,7 @@ def get_tracking_directions(self, outputs, algo: str,
         else:
             next_dirs = self._sample_tracking_direction_prob(
                 outputs, eos_stopping_thresh)
-        return next_dirs.detach()
+        return next_dirs
 
 
 class AbstractRegressionDG(AbstractDirectionGetterModel):
diff --git a/dwi_ml/models/main_models.py b/dwi_ml/models/main_models.py
index 9cc864d8..74a8f928 100644
--- a/dwi_ml/models/main_models.py
+++ b/dwi_ml/models/main_models.py
@@ -98,6 +98,10 @@ def set_context(self, context):
         assert context in ['training', 'tracking']
         self._context = context
 
+    @property
+    def context(self):
+        return self._context
+
     def move_to(self, device):
         """
         Careful. Calling model.to(a_device) does not influence the self.device.
@@ -514,11 +518,12 @@ def get_tracking_directions(self, model_outputs: Tensor, algo: str,
 
         Returns
         -------
-        next_dir: list[array(3,)]
-            Numpy arrays with x,y,z value, one per streamline data point.
+        next_dir: torch.Tensor
+            A tensor of shape [n, 3] with the next direction for each output.
         """
-        return self.direction_getter.get_tracking_directions(
+        dirs = self.direction_getter.get_tracking_directions(
             model_outputs, algo, eos_stopping_thresh)
+        return dirs
 
     def compute_loss(self, model_outputs: List[Tensor], target_streamlines,
                      average_results=True, **kw):
diff --git a/dwi_ml/models/projects/transforming_tractography.py b/dwi_ml/models/projects/transforming_tractography.py
index 02645f69..dfff723c 100644
--- a/dwi_ml/models/projects/transforming_tractography.py
+++ b/dwi_ml/models/projects/transforming_tractography.py
@@ -469,7 +469,8 @@ def forward(self, inputs: List[torch.tensor],
         #   restack when computing loss.  [Chosen here. See if we can improve]
         #   b) loop on direction getter. Stack when computing loss.
         if self._context == 'tracking':
-            outputs = outputs.detach()
+            # If needs to detach: error? Should be using witch torch.no_grad.
+            outputs = outputs
             # No need to actually unpad, we only take the last (unpadded)
             # point, newly created. (-1 for python indexing)
             if use_padding:  # Not all the same length (backward tracking)
diff --git a/dwi_ml/testing/visu_loss.py b/dwi_ml/testing/visu_loss.py
index eef186b2..18b420ff 100644
--- a/dwi_ml/testing/visu_loss.py
+++ b/dwi_ml/testing/visu_loss.py
@@ -263,9 +263,10 @@ def run_visu_save_colored_displacement(
 
         # Either concat, run, split or (chosen:) loop
         # Use eos_thresh of 1 to be sure we don't output a NaN
-        out_dirs = [model.get_tracking_directions(
-            s_output, algo='det', eos_stopping_thresh=1.0).numpy()
-                    for s_output in outputs]
+        with torch.no_grad():
+            out_dirs = [model.get_tracking_directions(
+                s_output, algo='det', eos_stopping_thresh=1.0).numpy()
+                        for s_output in outputs]
 
         # Save error together with ref
         sft = combine_displacement_with_ref(out_dirs, sft, model.step_size)
diff --git a/dwi_ml/tracking/propagation.py b/dwi_ml/tracking/propagation.py
index ecf95786..edd2ee8e 100644
--- a/dwi_ml/tracking/propagation.py
+++ b/dwi_ml/tracking/propagation.py
@@ -147,7 +147,7 @@ def _take_one_step_or_go_straight(
         next_dirs = torch.vstack(next_dirs)
 
     if normalize_directions:
-        next_dirs /= torch.linalg.norm(next_dirs, dim=-1)[:, None]
+        next_dirs = next_dirs / torch.linalg.norm(next_dirs, dim=-1)[:, None]
 
     if previous_dirs is not None:
         # Verify angle
diff --git a/dwi_ml/tracking/tracker.py b/dwi_ml/tracking/tracker.py
index 68d7b958..9f0bd582 100644
--- a/dwi_ml/tracking/tracker.py
+++ b/dwi_ml/tracking/tracker.py
@@ -449,12 +449,13 @@ def _get_multiple_lines_both_directions(self, seeds: List[np.ndarray]):
         return clean_lines, clean_seeds
 
     def _propagate_multiple_lines(self, lines: List[Tensor]):
-        return propagate_multiple_lines(
-            lines, self.update_memory_after_removing_lines,
-            self.get_next_dirs, self.theta, self.step_size,
-            self.verify_opposite_direction, self.mask, self.max_nbr_pts,
-            append_last_point=self.append_last_point,
-            normalize_directions=self.normalize_directions)
+        with torch.no_grad():
+            return propagate_multiple_lines(
+                lines, self.update_memory_after_removing_lines,
+                self.get_next_dirs, self.theta, self.step_size,
+                self.verify_opposite_direction, self.mask, self.max_nbr_pts,
+                append_last_point=self.append_last_point,
+                normalize_directions=self.normalize_directions)
 
     def get_next_dirs(self, lines: List[Tensor], n_last_pos: List[Tensor]):
         """
diff --git a/dwi_ml/training/projects/learn2track_trainer.py b/dwi_ml/training/projects/learn2track_trainer.py
index 50e17af5..5267b3ce 100644
--- a/dwi_ml/training/projects/learn2track_trainer.py
+++ b/dwi_ml/training/projects/learn2track_trainer.py
@@ -82,15 +82,14 @@ def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
             batch_inputs = self.batch_loader.load_batch_inputs(n_last_pos,
                                                                ids_per_subj)
 
-            _model_outputs, hidden_states = self.model(
+            model_outputs, hidden_states = self.model(
                 batch_inputs, _lines, hidden_recurrent_states=hidden_states,
                 return_hidden=True, point_idx=-1)
 
             next_dirs = self.model.get_tracking_directions(
-                _model_outputs, algo='det', eos_stopping_thresh=0.5)
+                model_outputs, algo='det', eos_stopping_thresh=0.5)
             return next_dirs
 
-        self.model.set_context('tracking')
         theta = 2 * np.pi  # theta = 360 degrees
         max_nbr_pts = int(200 / self.model.step_size)
         results = propagate_multiple_lines(
@@ -99,5 +98,4 @@ def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
             verify_opposite_direction=False, mask=self.tracking_mask,
             max_nbr_pts=max_nbr_pts, append_last_point=False,
             normalize_directions=True)
-        self.model.set_context('training')
         return results
diff --git a/dwi_ml/training/projects/transformer_trainer.py b/dwi_ml/training/projects/transformer_trainer.py
index 2553f2f6..88e34a96 100644
--- a/dwi_ml/training/projects/transformer_trainer.py
+++ b/dwi_ml/training/projects/transformer_trainer.py
@@ -1,6 +1,9 @@
 # -*- coding: utf-8 -*-
+from typing import List
 
+import numpy as np
 import torch
+from dwi_ml.tracking.propagation import propagate_multiple_lines
 
 from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput
@@ -13,34 +16,38 @@ def __init__(self, **kwargs):
         """
         super().__init__(**kwargs)
 
-    def run_model(self, batch_inputs, batch_streamlines):
-        dirs = self.model.format_directions(batch_streamlines)
-
-        # Formatting the previous dirs for all points.
-        n_prev_dirs = self.model.format_previous_dirs(dirs, self.device)
-
-        # Not keeping the last point: only useful to get the last direction
-        # (last target), but won't be used as an input.
-        if n_prev_dirs is not None:
-            n_prev_dirs = [s[:-1] for s in n_prev_dirs]
-
-        try:
-            # Apply model. This calls our model's forward function
-            # (the hidden states are not used here, neither as input nor
-            # outputs. We need them only during tracking).
-            model_outputs, _ = self.model(batch_inputs, n_prev_dirs,
-                                          self.device)
-        except RuntimeError:
-            # Training RNNs with variable-length sequences on the GPU can
-            # cause memory fragmentation in the pytorch-managed cache,
-            # possibly leading to "random" OOM RuntimeError during
-            # training. Emptying the GPU cache seems to fix the problem for
-            # now. We don't do it every update because it can be time
-            # consuming.
-            torch.cuda.empty_cache()
-            model_outputs, _ = self.model(batch_inputs, n_prev_dirs,
-                                          self.device)
-
-        # Returning the directions too, to be re-used in compute_loss
-        # later instead of computing them twice.
-        return model_outputs, dirs
+    def propagate_multiple_lines(self, lines: List[torch.Tensor], ids_per_subj):
+        assert self.model.step_size is not None, \
+            "We can't propagate compressed streamlines."
+
+        # Getting the first inputs
+        tmp_lines = [line[:-1, :] for line in lines]
+        batch_inputs = self.batch_loader.load_batch_inputs(tmp_lines, ids_per_subj)
+        del tmp_lines
+
+        def update_memory_after_removing_lines(can_continue: np.ndarray, __):
+            nonlocal batch_inputs
+            batch_inputs = [inp for i, inp in enumerate(batch_inputs) if
+                            can_continue[i]]
+
+        def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
+            nonlocal batch_inputs
+            n_last_pos = [pos[None, :] for pos in n_last_pos]
+            latest_inputs = self.batch_loader.load_batch_inputs(
+                n_last_pos, ids_per_subj)
+            batch_inputs = [torch.vstack((first, last)) for first, last in
+                            zip(batch_inputs, latest_inputs)]
+
+            model_outputs = self.model(batch_inputs, _lines)
+            next_dirs = self.model.get_tracking_directions(
+                model_outputs, algo='det', eos_stopping_thresh=0.5)
+            return next_dirs
+
+        theta = 2 * np.pi  # theta = 360 degrees
+        max_nbr_pts = int(200 / self.model.step_size)
+        return propagate_multiple_lines(
+            lines, update_memory_after_removing_lines, get_dirs_at_last_pos,
+            theta=theta, step_size=self.model.step_size,
+            verify_opposite_direction=False, mask=self.tracking_mask,
+            max_nbr_pts=max_nbr_pts, append_last_point=False,
+            normalize_directions=True)
diff --git a/dwi_ml/training/trainers.py b/dwi_ml/training/trainers.py
index 93ce44f2..f8dcbaaa 100644
--- a/dwi_ml/training/trainers.py
+++ b/dwi_ml/training/trainers.py
@@ -267,10 +267,14 @@ def __init__(self,
         # D. Monitors
         # grad_norm = The total norm (sqrt(sum(params**2))) of parameters
         # before gradient clipping, if any.
+
+        # Training: only one monitor.
         self.train_loss_monitor = BatchHistoryMonitor(
             'train_loss_monitor', weighted=True)
-        self.valid_loss_monitor = BatchHistoryMonitor(
-            'valid_loss_monitor', weighted=True)
+
+        # Validation: As many supervision losses as we want.
+        self.valid_local_loss_monitor = BatchHistoryMonitor(
+            'valid_local_loss_monitor', weighted=True)
         self.grad_norm_monitor = BatchHistoryMonitor(
             'grad_norm_monitor', weighted=False)
         self.training_time_monitor = TimeMonitor('training_time_monitor')
@@ -279,13 +283,14 @@ def __init__(self,
             patience = np.inf
         self.best_epoch_monitor = BestEpochMonitor(
             'best_epoch_monitor', patience, patience_delta)
-        self.monitors = [self.train_loss_monitor, self.valid_loss_monitor,
+        self.monitors = [self.train_loss_monitor,
+                         self.valid_local_loss_monitor,
                          self.grad_norm_monitor, self.training_time_monitor,
                          self.validation_time_monitor, self.best_epoch_monitor]
         self.training_monitors = [self.train_loss_monitor,
                                   self.grad_norm_monitor,
                                   self.training_time_monitor]
-        self.validation_monitors = [self.valid_loss_monitor,
+        self.validation_monitors = [self.valid_local_loss_monitor,
                                     self.validation_time_monitor]
 
         # E. Comet Experiment
@@ -343,8 +348,7 @@ def params_for_checkpoint(self):
     def save_params_to_json(self):
         """
         Utility method to save the parameters to a json file in the same
-        folder as the experiment. Suggestion, call this after instantiating
-        your trainer.
+        folder as the experiment.
         """
         now = datetime.now()
         json_filename = os.path.join(self.saving_path, "parameters_{}.json"
@@ -399,24 +403,24 @@ def _prepare_checkpoint_info(self) -> dict:
 
         # Note. batch sampler's rng state and batch loader's are the same.
         current_states = {
-            # A. Rng value.
+            # Rng value.
             'torch_rng_state': torch.random.get_rng_state(),
             'torch_cuda_state':
                 torch.cuda.get_rng_state() if self.use_gpu else None,
             'sampler_np_rng_state': self.batch_sampler.np_rng.get_state(),
             'loader_np_rng_state': self.batch_loader.np_rng.get_state(),
-            # B. Current epoch.
+            # Current epoch.
             'current_epoch': self.current_epoch,
-            # C. Nb of batches per epoch.
+            # Nb of batches per epoch.
             'nb_batches_train': self.nb_batches_train,
             'nb_batches_valid': self.nb_batches_valid,
-            # D. Comet Experiment
+            # Comet Experiment
             'comet_key': self.comet_key,
-            # E. Optimizer
+            # Optimizer
             'optimizer_state': self.optimizer.state_dict(),
         }
 
-        # F. Monitors
+        # Monitors
         for monitor in self.monitors:
             current_states[monitor.name + '_state'] = monitor.get_state()
 
@@ -675,7 +679,7 @@ def _get_latest_loss_to_supervise_best(self):
         # This can be overriden by child classes if you possesss other
         # test metrics than the loss.
         if self.use_validation:
-            mean_epoch_loss = self.valid_loss_monitor.average_per_epoch[-1]
+            mean_epoch_loss = self.valid_local_loss_monitor.average_per_epoch[-1]
         else:
             mean_epoch_loss = self.train_loss_monitor.average_per_epoch[-1]
 
@@ -754,13 +758,9 @@ def train_one_epoch(self, epoch):
                 # Enable gradients for backpropagation. Uses torch's module
                 # train(), which "turns on" the training mode.
                 with grad_context():
-                    mean_loss, n = self.run_one_batch(data)
+                    mean_loss = self.train_one_batch(data, epoch)
 
                 grad_norm = self.back_propagation(mean_loss)
-
-                # Update information and logs
-                mean_loss = mean_loss.cpu().item()
-                self.train_loss_monitor.update(mean_loss, weight=n)
                 self.grad_norm_monitor.update(grad_norm)
 
             # Explicitly delete iterator to kill threads and free memory before
@@ -790,7 +790,6 @@ def validate_one_epoch(self, epoch):
         self.batch_sampler.set_context('validation')
         self.model.set_context('validation')
         self.model.eval()
-        grad_context = torch.no_grad
 
         # Make sure there are no existing HDF handles if using parallel workers
         if (self.nb_cpu_processes > 0 and
@@ -812,29 +811,11 @@ def validate_one_epoch(self, epoch):
                     break
 
                 # Validate this batch: forward propagation + loss
-                with grad_context():
-                    mean_loss, n = self.validate_one_batch(data, epoch)
-
-                mean_loss = mean_loss.cpu().item()
-
-                self.valid_loss_monitor.update(mean_loss, weight=n)
-
-        with tqdm_logging_redirect(self.valid_dataloader, ncols=100,
-                                   total=self.nb_batches_valid,
-                                   loggers=[logging.root],
-                                   tqdm_class=tqdm) as pbar:
+                with torch.no_grad():
+                    self.validate_one_batch(data, epoch)
 
             # Explicitly delete iterator to kill threads and free memory before
             # running training again
-            valid_iterator = enumerate(pbar)
-            for batch_id, data in valid_iterator:
-                logging.warning("BATCH: ******************** REMOVE THIS. FOR UNIQUE PICTURE!")
-                if batch_id == self.nb_batches_valid:
-                    # Explicitly close tqdm's progress bar to fix possible bugs
-                    # when breaking the loop
-                    pbar.close()
-                    break
-
             del valid_iterator
 
         # Save info
@@ -842,10 +823,21 @@ def validate_one_epoch(self, epoch):
             monitor.end_epoch()
         self._update_comet_after_epoch('validation', epoch)
 
+    def train_one_batch(self, data, epoch):
+        """
+        Returns: The loss to be backpropagated.
+        """
+        # Encapsulated for easier management of child classes.
+        mean_local_loss, n = self.run_one_batch(data)
+        self.train_loss_monitor.update(mean_local_loss.cpu().item(),
+                                       weight=n)
+        return mean_local_loss
+
     def validate_one_batch(self, data, epoch):
         # Encapsulated for easier management of child classes.
-        mean_loss, n = self.run_one_batch(data)
-        return mean_loss, n
+        mean_local_loss, n = self.run_one_batch(data)
+        self.valid_local_loss_monitor.update(mean_local_loss.cpu().item(),
+                                             weight=n)
 
     def _update_comet_after_epoch(self, context: str, epoch: int):
         """
@@ -857,16 +849,31 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
         local_context: prefix when saving log. Training_ or Validate_ for
         instance.
         """
+        if context == 'training':
+            monitors = self.training_monitors
+        elif context == 'validation':
+            monitors = self.validation_monitors
+        else:
+            raise ValueError("Unexpected context ({}). Expecting "
+                             "training or validation.")
+
+        logs = []
+        for monitor in monitors:
+            if isinstance(monitor, BatchHistoryMonitor):
+                value = monitor.average_per_epoch[-1]
+            elif isinstance(monitor, TimeMonitor):
+                value = monitor.epoch_durations[-1]
+            else:
+                continue
+            logger.info("   Mean {} for this epoch: {}"
+                        .format(monitor.name, value))
+            logs.append((value, monitor.name))
+
         if self.comet_exp:
             if context == 'training':
                 comet_context = self.comet_exp.train
-                monitors = self.training_monitors
-            elif context == 'validation':
+            else:  # context == 'validation':
                 comet_context = self.comet_exp.validate
-                monitors = self.validation_monitors
-            else:
-                raise ValueError("Unexpected context ({}) for comet. Expecting "
-                                 "training or validation.")
 
             with comet_context():
                 # Not really implemented yet.
@@ -874,17 +881,9 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
                 # Cheating. To have a correct plotting per epoch (no step)
                 # using step = epoch. In comet_ml, it is intended to be
                 # step = batch.
-                for monitor in monitors:
-                    if isinstance(monitor, BatchHistoryMonitor):
-                        value = monitor.average_per_epoch[-1]
-                    elif isinstance(monitor, TimeMonitor):
-                        value = monitor.epoch_durations[-1]
-                    else:
-                        continue
-                    logger.info("   Mean {} for this epoch: {}"
-                                .format(monitor.name, value))
+                for log in logs:
                     self.comet_exp.log_metric(
-                        monitor.name, value, epoch=0, step=epoch)
+                        log[1], log[0], epoch=0, step=epoch)
 
     def _save_best_model(self):
         logger.info("   Best epoch yet! Saving model and loss history.")
@@ -1003,7 +1002,7 @@ def check_stopping_cause(checkpoint_state, new_patience=None,
 class DWIMLTrainerOneInput(DWIMLAbstractTrainer):
     batch_loader: DWIMLBatchLoaderOneInput
 
-    def run_one_batch(self, data):
+    def run_one_batch(self, data, average_results=True):
         """
         Run a batch of data through the model (calling its forward method)
         and return the mean loss. If training, run the backward method too.
@@ -1066,12 +1065,14 @@ def run_one_batch(self, data):
 
         logger.debug('*** Computing loss')
         if self.model.loss_uses_streamlines:
-            mean_loss, n = self.model.compute_loss(model_outputs, targets)
+            results = self.model.compute_loss(model_outputs, targets,
+                                              average_results=average_results)
         else:
-            mean_loss, n = self.model.compute_loss(model_outputs)
+            results = self.model.compute_loss(model_outputs,
+                                              average_results=average_results)
 
         if self.use_gpu:
             log_gpu_memory_usage(logger)
 
         # The mean tensor is a single value. Converting to float using item().
-        return mean_loss, n
+        return results
diff --git a/dwi_ml/training/utils/trainer.py b/dwi_ml/training/utils/trainer.py
index 1eb30b67..78ed94a0 100644
--- a/dwi_ml/training/utils/trainer.py
+++ b/dwi_ml/training/utils/trainer.py
@@ -54,7 +54,13 @@ def add_training_args(p: argparse.ArgumentParser,
         training_group.add_argument(
             '--tracking_phase_frequency', type=int, default=5)
         training_group.add_argument(
-            '--tracking_mask')
+            '--tracking_mask',
+            help="Volume group to use as tracking mask during the generation "
+                 "phase.")
+        training_group.add_argument(
+            '--tracking_phase_nb_steps_init', type=int, default=5,
+            help="Number of segments copied from the 'real' streamlines "
+                 "before starting propagation during generation phases.")
 
     comet_g = p.add_argument_group("Comet")
     comet_g.add_argument(
diff --git a/dwi_ml/training/with_generation/trainer.py b/dwi_ml/training/with_generation/trainer.py
index 12423e27..971a8c7e 100644
--- a/dwi_ml/training/with_generation/trainer.py
+++ b/dwi_ml/training/with_generation/trainer.py
@@ -13,7 +13,7 @@
 from dwi_ml.tracking.propagation import propagate_multiple_lines
 from dwi_ml.tracking.projects.utils import prepare_tracking_mask
 from dwi_ml.training.trainers import DWIMLTrainerOneInput
-from dwi_ml.training.utils.monitoring import BatchHistoryMonitor, TimeMonitor
+from dwi_ml.training.utils.monitoring import BatchHistoryMonitor
 from dwi_ml.training.with_generation.batch_loader import \
     DWIMLBatchLoaderWithConnectivity
 
@@ -35,8 +35,7 @@ class DWIMLTrainerForTrackingOneInput(DWIMLTrainerOneInput):
     def __init__(self, add_a_tracking_validation_phase: bool = False,
                  tracking_phase_frequency: int = 5,
                  tracking_phase_nb_steps_init: int = 5,
-                 tracking_phase_mask_group: str = None,
-                 *args, **kw):
+                 tracking_phase_mask_group: str = None, *args, **kw):
         super().__init__(*args, **kw)
 
         self.add_a_tracking_validation_phase = add_a_tracking_validation_phase
@@ -64,10 +63,8 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
                     self.tracking_mask.move_to(self.device)
 
         # -------- Monitors
-        self.tracking_valid_time_monitor = TimeMonitor(
-            'tracking_valid_time_monitor')
-
-        # A lot of exploratory metrics monitors:
+        # At training time: only the one metric used for training.
+        # At validation time: A lot of exploratory metrics monitors.
 
         # Percentage of streamlines inside a radius
         self.tracking_very_good_IS_monitor = BatchHistoryMonitor(
@@ -77,7 +74,7 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
         self.tracking_very_far_IS_monitor = BatchHistoryMonitor(
             'tracking_very_far_IS_monitor', weighted=True)
 
-        # Point where the streamline start diverging from "acceptable"
+        # Point where the streamline starts diverging from "acceptable"
         self.tracking_valid_diverg_monitor = BatchHistoryMonitor(
             'tracking_valid_diverg_monitor', weighted=True)
 
@@ -92,8 +89,7 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
             'tracking_connectivity_score_monitor', weighted=True)
 
         if self.add_a_tracking_validation_phase:
-            new_monitors = [self.tracking_valid_time_monitor,
-                            self.tracking_very_good_IS_monitor,
+            new_monitors = [self.tracking_very_good_IS_monitor,
                             self.tracking_acceptable_IS_monitor,
                             self.tracking_very_far_IS_monitor,
                             self.tracking_valid_diverg_monitor,
@@ -110,7 +106,7 @@ def params_for_checkpoint(self):
             'add_a_tracking_validation_phase': self.add_a_tracking_validation_phase,
             'tracking_phase_frequency': self.tracking_phase_frequency,
             'tracking_phase_nb_steps_init': self.tracking_phase_nb_steps_init,
-            'tracking_phase_mask_group': self.tracking_mask_group
+            'tracking_phase_mask_group': self.tracking_mask_group,
         })
 
         return p
@@ -118,45 +114,47 @@ def params_for_checkpoint(self):
     def _get_latest_loss_to_supervise_best(self):
         if self.use_validation:
             if self.add_a_tracking_validation_phase:
-                # Compared to super, replacing by tracking_valid loss.
+                # Choosing connectivity.
                 mean_epoch_loss = \
                     self.tracking_connectivity_score_monitor.average_per_epoch[-1]
-
-                # Could use IS instead, or non-clipped, or diverging point.
-                # Not implemented.
             else:
-                mean_epoch_loss = self.valid_loss_monitor.average_per_epoch[-1]
+                mean_epoch_loss = self.valid_local_loss_monitor.average_per_epoch[-1]
         else:
+            # Without a validation set: take the training loss.
             mean_epoch_loss = self.train_loss_monitor.average_per_epoch[-1]
 
         return mean_epoch_loss
 
     def validate_one_batch(self, data, epoch):
-        mean_loss, n = super().validate_one_batch(data, epoch)
+        # 1. Compute local loss.
+        super().validate_one_batch(data, epoch)
 
+        # 2. Compute generation losses.
         if self.add_a_tracking_validation_phase:
             if (epoch + 1) % self.tracking_phase_frequency == 0:
-                logger.info("Additional tracking-like generation validation "
-                            "from batch.")
+                logger.debug("Additional tracking-like generation validation "
+                             "from batch.")
                 (gen_n, mean_final_dist, mean_clipped_final_dist,
                  percent_IS_very_good, percent_IS_acceptable, percent_IS_very_far,
-                 diverging_pnt, connectivity) = self.generate_from_one_batch(data)
+                 diverging_pnt, connectivity) = self.generate_from_one_batch(
+                    data, compute_all_scores=True)
 
                 self.tracking_very_good_IS_monitor.update(
-                    percent_IS_very_good, weight=n)
+                    percent_IS_very_good, weight=gen_n)
                 self.tracking_acceptable_IS_monitor.update(
-                    percent_IS_acceptable, weight=n)
+                    percent_IS_acceptable, weight=gen_n)
                 self.tracking_very_far_IS_monitor.update(
-                    percent_IS_very_far, weight=n)
+                    percent_IS_very_far, weight=gen_n)
 
                 self.tracking_mean_final_distance_monitor.update(
-                    mean_final_dist, weight=n)
+                    mean_final_dist, weight=gen_n)
                 self.tracking_clipped_final_distance_monitor.update(
-                    mean_clipped_final_dist, weight=n)
+                    mean_clipped_final_dist, weight=gen_n)
                 self.tracking_valid_diverg_monitor.update(
-                    diverging_pnt, weight=n)
+                    diverging_pnt, weight=gen_n)
 
-                self.tracking_connectivity_score_monitor.update(connectivity)
+                self.tracking_connectivity_score_monitor.update(
+                    connectivity, weight=gen_n)
             elif len(self.tracking_mean_final_distance_monitor.average_per_epoch) == 0:
                 logger.info("Skipping tracking-like generation validation from "
                             "batch. No values yet: adding fake initial values.")
@@ -173,7 +171,8 @@ def validate_one_batch(self, data, epoch):
 
                 # Bad mean dist = very far. ex, 100, or clipped.
                 self.tracking_mean_final_distance_monitor.update(100.0)
-                self.tracking_clipped_final_distance_monitor.update(ACCEPTABLE_THRESHOLD)
+                self.tracking_clipped_final_distance_monitor.update(
+                    ACCEPTABLE_THRESHOLD)
 
                 self.tracking_connectivity_score_monitor.update(1)
             else:
@@ -185,12 +184,11 @@ def validate_one_batch(self, data, epoch):
                                 self.tracking_very_far_IS_monitor,
                                 self.tracking_valid_diverg_monitor,
                                 self.tracking_mean_final_distance_monitor,
-                                self.tracking_clipped_final_distance_monitor]:
+                                self.tracking_clipped_final_distance_monitor,
+                                self.tracking_connectivity_score_monitor]:
                     monitor.update(monitor.average_per_epoch[-1])
 
-        return mean_loss, n
-
-    def generate_from_one_batch(self, data):
+    def generate_from_one_batch(self, data, compute_all_scores=False):
         # Data interpolation has not been done yet. GPU computations are done
         # here in the main thread.
         torch.set_printoptions(precision=4)
@@ -200,7 +198,6 @@ def generate_from_one_batch(self, data):
         real_lines = [line.to(self.device, non_blocking=True, dtype=torch.float)
                       for line in real_lines]
         last_pos = torch.vstack([line[-1, :] for line in real_lines])
-        mean_length = np.mean([len(s) for s in real_lines])
 
         # Dataloader always works on CPU. Sending to right device.
         # (model is already moved). Using only the n first points
@@ -208,68 +205,71 @@ def generate_from_one_batch(self, data):
                  for s in real_lines]
 
         # Propagation: no backward tracking.
+        previous_context = self.model.context
         self.model.set_context('tracking')
         lines = self.propagate_multiple_lines(lines, ids_per_subj)
-        self.model.set_context('validation')
-
-        # 1. Connectivity scores
-        connectivity_score = self._compare_connectivity(lines, ids_per_subj)
-
-        compute_mean_length = np.mean([len(s) for s in lines])
-        logger.info("-> Average streamline length (nb pts) in this batch: {} \n"
-                    "                   Average recovered streamline length: {}"
-                    .format(mean_length.astype(np.float64),
-                            compute_mean_length.astype(np.float64)))
+        self.model.set_context(previous_context)
 
         # 1. Final distance compared to expected point.
         computed_last_pos = torch.vstack([line[-1, :] for line in lines])
         l2_loss = PairwiseDistance(p=2)
         final_dist = l2_loss(computed_last_pos, last_pos)
 
-        # 2. Verify "IS ratio", i.e. percentage of streamlines ending inside a
-        # predefined radius.
-        invalid_ratio_severe = torch.sum(final_dist > VERY_CLOSE_THRESHOLD) / len(lines) * 100
-        invalid_ratio_acceptable = torch.sum(final_dist > ACCEPTABLE_THRESHOLD) / len(lines) * 100
-        invalid_ratio_loose = torch.sum(final_dist > VERY_FAR_THRESHOLD) / len(lines) * 100
-
-        final_dist_clipped = torch.clip(final_dist, min=None,
-                                        max=ACCEPTABLE_THRESHOLD)
-        final_dist = torch.mean(final_dist)
-        final_dist_clipped = torch.mean(final_dist_clipped)
-
-        # 3. Verify point where streamline starts diverging.
-        # 0% = error at first point --> really bad.
-        # 100% = reached exactly the right point.
-        # >100% = went too far (longer than expected).
-        # We want a decreasing value towards 0.
-        # abs(100 - score): 0 = good. 100 = bad.
-        # Using 100 - x, so the score is diminishing, from 100 = perfect.
-        total_point = 0
-        for line, real_line in zip(lines, real_lines):
-            expected_nb = len(real_line)
-            diff_nb = abs(len(real_line) - len(line))
-            if len(line) < expected_nb:
-                diff_nb = len(real_line) - len(line)
-                line = torch.vstack((line, line[-1, :].repeat(diff_nb, 1)))
-            elif len(line) > expected_nb:
-                real_line = torch.vstack((real_line,
-                                          real_line[-1, :].repeat(diff_nb, 1)))
-            dist = l2_loss(line, real_line).detach().cpu().numpy()
-            point, = np.where(dist > ACCEPTABLE_THRESHOLD)
-            if len(point) > 0:  # (else: score = 0. Never out of range).
-                div_point = point[0] / expected_nb * 100.0
-                total_point += abs(100 - div_point)
-        diverging_point = total_point / len(lines)
-
-        invalid_ratio_severe = invalid_ratio_severe.cpu().numpy().astype(np.float32)
-        invalid_ratio_acceptable = invalid_ratio_acceptable.cpu().numpy().astype(np.float32)
-        invalid_ratio_loose = invalid_ratio_loose.cpu().numpy().astype(np.float32)
-        final_dist = final_dist.cpu().numpy().astype(np.float32)
-        final_dist_clipped = final_dist_clipped.cpu().numpy().astype(np.float32)
-        diverging_point = np.asarray(diverging_point, dtype=np.float32)
-        return (len(lines), final_dist, final_dist_clipped,
-                invalid_ratio_severe, invalid_ratio_acceptable, invalid_ratio_loose, diverging_point,
-                connectivity_score)
+        if not compute_all_scores:
+            return final_dist
+        else:
+            # 1. Also clipping final dist
+            final_dist_clipped = torch.clip(final_dist, min=None,
+                                            max=ACCEPTABLE_THRESHOLD)
+            final_dist = torch.mean(final_dist)
+            final_dist_clipped = torch.mean(final_dist_clipped)
+
+            # 2. Connectivity scores
+            connectivity_score = self._compare_connectivity(lines, ids_per_subj)
+
+            # 3. Verify "IS ratio", i.e. percentage of streamlines ending
+            # inside a predefined radius.
+            invalid_ratio_severe = torch.sum(
+                final_dist > VERY_CLOSE_THRESHOLD) / len(lines) * 100
+            invalid_ratio_acceptable = torch.sum(
+                final_dist > ACCEPTABLE_THRESHOLD) / len(lines) * 100
+            invalid_ratio_loose = torch.sum(
+                final_dist > VERY_FAR_THRESHOLD) / len(lines) * 100
+
+            # 4. Verify point where streamline starts diverging.
+            # 0% = error at first point --> really bad.
+            # 100% = reached exactly the right point.
+            # >100% = went too far (longer than expected).
+            # We want a decreasing value towards 0.
+            # abs(100 - score): 0 = good. 100 = bad.
+            # Using 100 - x, so the score is diminishing, from 100 = perfect.
+            total_point = 0
+            for line, real_line in zip(lines, real_lines):
+                expected_nb = len(real_line)
+                diff_nb = abs(len(real_line) - len(line))
+                if len(line) < expected_nb:
+                    diff_nb = len(real_line) - len(line)
+                    line = torch.vstack((line, line[-1, :].repeat(diff_nb, 1)))
+                elif len(line) > expected_nb:
+                    real_line = torch.vstack((real_line,
+                                              real_line[-1, :].repeat(diff_nb, 1)))
+                dist = l2_loss(line, real_line).cpu().numpy()
+                point, = np.where(dist > ACCEPTABLE_THRESHOLD)
+                if len(point) > 0:  # (else: score = 0. Never out of range).
+                    div_point = point[0] / expected_nb * 100.0
+                    total_point += abs(100 - div_point)
+            diverging_point = total_point / len(lines)
+
+            invalid_ratio_severe = invalid_ratio_severe.cpu().numpy().astype(np.float32)
+            invalid_ratio_acceptable = invalid_ratio_acceptable.cpu().numpy().astype(np.float32)
+            invalid_ratio_loose = invalid_ratio_loose.cpu().numpy().astype(np.float32)
+            final_dist = final_dist.cpu().numpy().astype(np.float32)
+            final_dist_clipped = final_dist_clipped.cpu().numpy().astype(np.float32)
+            diverging_point = np.asarray(diverging_point, dtype=np.float32)
+            return (len(lines), final_dist, final_dist_clipped,
+                    invalid_ratio_severe, invalid_ratio_acceptable,
+                    invalid_ratio_loose, diverging_point,
+                    connectivity_score)
 
     def _compare_connectivity(self, lines, ids_per_subj):
         connectivity_matrices, volume_sizes, downsampled_sizes = \
diff --git a/dwi_ml/unit_tests/utils/data_and_models_for_tests.py b/dwi_ml/unit_tests/utils/data_and_models_for_tests.py
index 28b2cad0..bdfac4ba 100644
--- a/dwi_ml/unit_tests/utils/data_and_models_for_tests.py
+++ b/dwi_ml/unit_tests/utils/data_and_models_for_tests.py
@@ -140,7 +140,7 @@ def compute_loss(self, model_outputs: List[torch.Tensor],
 
     def get_tracking_directions(self, regressed_dirs, algo):
         if algo == 'det':
-            return regressed_dirs.detach()
+            return regressed_dirs
         elif algo == 'prob':
             raise NotImplementedError(
                 "Our test model uses (fake) regression and does not allow "
diff --git a/scripts_python/l2t_train_from_pretrained.py b/scripts_python/l2t_train_from_pretrained.py
new file mode 100644
index 00000000..5fe5bc16
--- /dev/null
+++ b/scripts_python/l2t_train_from_pretrained.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Train a model for Learn2Track
+"""
+import argparse
+import logging
+import os
+
+# comet_ml not used, but comet_ml requires to be imported before torch.
+# See bug report here https://github.com/Lightning-AI/lightning/issues/5829
+# Importing now to solve issues later.
+import comet_ml
+import torch
+
+from scilpy.io.utils import assert_inputs_exist, assert_outputs_exist
+
+from dwi_ml.data.dataset.utils import prepare_multisubjectdataset
+from dwi_ml.experiment_utils.prints import format_dict_to_str
+from dwi_ml.experiment_utils.timer import Timer
+from dwi_ml.io_utils import add_logging_arg, add_memory_args
+from dwi_ml.models.projects.learn2track_model import Learn2TrackModel
+from dwi_ml.training.projects.learn2track_trainer import Learn2TrackTrainer
+from dwi_ml.training.utils.batch_samplers import (add_args_batch_sampler,
+                                                  prepare_batch_sampler)
+from dwi_ml.training.utils.batch_loaders import (add_args_batch_loader,
+                                                 prepare_batch_loader)
+from dwi_ml.training.utils.experiment import (
+    add_mandatory_args_training_experiment)
+from dwi_ml.training.utils.trainer import run_experiment, add_training_args, \
+    format_lr
+
+
+def prepare_arg_parser():
+    p = argparse.ArgumentParser(description=__doc__,
+                                formatter_class=argparse.RawTextHelpFormatter)
+    add_mandatory_args_training_experiment(p)
+    p.add_argument('pretrained_model',
+                   help="Name of the pretrained experiment (from the same "
+                        "experiments path) from which to load the model. "
+                        "Should contain a 'best_model' folder with pickle "
+                        "information to load the model")
+    add_args_batch_sampler(p)
+    add_args_batch_loader(p)
+    training_group = add_training_args(p, add_a_tracking_validation_phase=True)
+    add_memory_args(p, add_lazy_options=True, add_rng=True)
+    add_logging_arg(p)
+
+    # Additional arg for projects
+    training_group.add_argument(
+        '--clip_grad', type=float, default=None,
+        help="Value to which the gradient norms to avoid exploding gradients."
+             "\nDefault = None (not clipping).")
+
+    return p
+
+
+def init_from_args(args, sub_loggers_level):
+    torch.manual_seed(args.rng)  # Set torch seed
+
+    # Prepare the dataset
+    dataset = prepare_multisubjectdataset(args, load_testing=False,
+                                          log_level=sub_loggers_level)
+
+    # Loading an existing model
+    logging.info("Loading existing model")
+    best_model_path = os.path.join(args.experiments_path,
+                                   args.pretrained_model, 'best_model')
+    model = Learn2TrackModel.load_params_and_state(
+        best_model_path, sub_loggers_level)
+
+    # Preparing the batch samplers
+    batch_sampler = prepare_batch_sampler(dataset, args, sub_loggers_level)
+    batch_loader = prepare_batch_loader(dataset, model, args, sub_loggers_level)
+
+    # Instantiate trainer
+    with Timer("\n\nPreparing trainer", newline=True, color='red'):
+        lr = format_lr(args.learning_rate)
+        trainer = Learn2TrackTrainer(
+            model=model, experiments_path=args.experiments_path,
+            experiment_name=args.experiment_name, batch_sampler=batch_sampler,
+            batch_loader=batch_loader,
+            # COMET
+            comet_project=args.comet_project,
+            comet_workspace=args.comet_workspace,
+            # TRAINING
+            learning_rates=lr, weight_decay=args.weight_decay,
+            optimizer=args.optimizer, max_epochs=args.max_epochs,
+            max_batches_per_epoch_training=args.max_batches_per_epoch_training,
+            max_batches_per_epoch_validation=args.max_batches_per_epoch_validation,
+            patience=args.patience, patience_delta=args.patience_delta,
+            from_checkpoint=False, clip_grad=args.clip_grad,
+            # (generation validation:)
+            add_a_tracking_validation_phase=args.add_a_tracking_validation_phase,
+            tracking_phase_frequency=args.tracking_phase_frequency,
+            tracking_phase_nb_steps_init=args.tracking_phase_nb_steps_init,
+            tracking_phase_mask_group=args.tracking_mask,
+            # MEMORY
+            nb_cpu_processes=args.nbr_processes, use_gpu=args.use_gpu,
+            log_level=args.logging)
+        logging.info("Trainer params : " +
+                     format_dict_to_str(trainer.params_for_checkpoint))
+
+    return trainer
+
+
+def main():
+    p = prepare_arg_parser()
+    args = p.parse_args()
+
+    # Setting log level to INFO maximum for sub-loggers, else it becomes ugly,
+    # but we will set trainer to user-defined level.
+    sub_loggers_level = args.logging
+    if args.logging == 'DEBUG':
+        sub_loggers_level = 'INFO'
+
+    logging.getLogger().setLevel(level=logging.INFO)
+
+    # Check that all files exist
+    assert_inputs_exist(p, [args.hdf5_file])
+    assert_outputs_exist(p, args, args.experiments_path)
+
+    # Verify if a checkpoint has been saved. Else create an experiment.
+    if os.path.exists(os.path.join(args.experiments_path, args.experiment_name,
+                                   "checkpoint")):
+        raise FileExistsError("This experiment already exists. Delete or use "
+                              "script l2t_resume_training_from_checkpoint.py.")
+
+    trainer = init_from_args(args, sub_loggers_level)
+
+    # Supervising that we loaded everything correctly.
+    print("Validation 0 = Initial verification: pre-trained results!")
+    trainer.validate_one_epoch(-1)
+
+    print("Now starting training")
+    run_experiment(trainer)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts_python/l2t_train_model.py b/scripts_python/l2t_train_model.py
index 38cc1215..184f28a6 100755
--- a/scripts_python/l2t_train_model.py
+++ b/scripts_python/l2t_train_model.py
@@ -125,7 +125,7 @@ def init_from_args(args, sub_loggers_level):
             # (generation validation:)
             add_a_tracking_validation_phase=args.add_a_tracking_validation_phase,
             tracking_phase_frequency=args.tracking_phase_frequency,
-            tracking_phase_nb_steps_init=5,  # args.tracking_phase_nb_steps_init
+            tracking_phase_nb_steps_init=args.tracking_phase_nb_steps_init,
             tracking_phase_mask_group=args.tracking_mask,
             # MEMORY
             nb_cpu_processes=args.nbr_processes, use_gpu=args.use_gpu,
diff --git a/scripts_python/tto_train_model.py b/scripts_python/tto_train_model.py
index 39e6706d..96d1eee8 100755
--- a/scripts_python/tto_train_model.py
+++ b/scripts_python/tto_train_model.py
@@ -118,7 +118,7 @@ def init_from_args(args, sub_loggers_level):
             # (generation validation:)
             add_a_tracking_validation_phase=args.add_a_tracking_validation_phase,
             tracking_phase_frequency=args.tracking_phase_frequency,
-            tracking_phase_nb_steps_init=5,  # args.tracking_phase_nb_steps_init
+            tracking_phase_nb_steps_init=args.tracking_phase_nb_steps_init,
             tracking_phase_mask_group=args.tracking_mask,
             # MEMORY
             nb_cpu_processes=args.nbr_processes, use_gpu=args.use_gpu,
diff --git a/scripts_python/ttst_train_model.py b/scripts_python/ttst_train_model.py
index d917e837..8a8a9227 100755
--- a/scripts_python/ttst_train_model.py
+++ b/scripts_python/ttst_train_model.py
@@ -48,7 +48,7 @@ def prepare_arg_parser():
     add_logging_arg(p)
     add_args_batch_sampler(p)
     add_args_batch_loader(p)
-    add_training_args(p)
+    add_training_args(p, add_a_tracking_validation_phase=True)
 
     # Specific to Transformers:
     gt = add_abstract_model_args(p)
@@ -110,8 +110,9 @@ def init_from_args(args, sub_loggers_level):
     with Timer("\n\nPreparing trainer", newline=True, color='red'):
         lr = format_lr(args.learning_rate)
         trainer = TransformerTrainer(
-            model, args.experiments_path, args.experiment_name,
-            batch_sampler, batch_loader,
+            model=model, experiments_path=args.experiments_path,
+            experiment_name=args.experiment_name, batch_sampler=batch_sampler,
+            batch_loader=batch_loader,
             # COMET
             comet_project=args.comet_project,
             comet_workspace=args.comet_workspace,
@@ -122,6 +123,11 @@ def init_from_args(args, sub_loggers_level):
             max_batches_per_epoch_validation=args.max_batches_per_epoch_validation,
             patience=args.patience,  patience_delta=args.patience_delta,
             from_checkpoint=False,
+            # (generation validation:)
+            add_a_tracking_validation_phase=args.add_a_tracking_validation_phase,
+            tracking_phase_frequency=args.tracking_phase_frequency,
+            tracking_phase_nb_steps_init=args.tracking_phase_nb_steps_init,
+            tracking_phase_mask_group=args.tracking_mask,
             # MEMORY
             nb_cpu_processes=args.nbr_processes, use_gpu=args.use_gpu,
             log_level=args.logging)

From 2c6a4606f5c1037582fb59b89b4c06240115b8b7 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Mon, 12 Jun 2023 09:39:27 -0400
Subject: [PATCH 06/13] Small fixes

---
 dwi_ml/training/trainers.py                | 5 +++--
 dwi_ml/training/with_generation/trainer.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/dwi_ml/training/trainers.py b/dwi_ml/training/trainers.py
index f8dcbaaa..da2cad53 100644
--- a/dwi_ml/training/trainers.py
+++ b/dwi_ml/training/trainers.py
@@ -460,6 +460,9 @@ def init_from_checkpoint(
             trainer.max_epochs = new_max_epochs
 
         # Save params to json to help user remember.
+        current_states = checkpoint_state['current_states']
+        trainer._update_states_from_checkpoint(current_states)
+
         if new_patience:
             trainer.best_epoch_monitor.patience = new_patience
         logger.info("Starting from checkpoint! Starting from epoch #{}.\n"
@@ -470,8 +473,6 @@ def init_from_checkpoint(
                             trainer.best_epoch_monitor.best_epoch,
                             trainer.best_epoch_monitor.n_bad_epochs))
 
-        current_states = checkpoint_state['current_states']
-        trainer._update_states_from_checkpoint(current_states)
         return trainer
 
     def _update_states_from_checkpoint(self, current_states):
diff --git a/dwi_ml/training/with_generation/trainer.py b/dwi_ml/training/with_generation/trainer.py
index 971a8c7e..8de9ca24 100644
--- a/dwi_ml/training/with_generation/trainer.py
+++ b/dwi_ml/training/with_generation/trainer.py
@@ -221,7 +221,6 @@ def generate_from_one_batch(self, data, compute_all_scores=False):
             # 1. Also clipping final dist
             final_dist_clipped = torch.clip(final_dist, min=None,
                                             max=ACCEPTABLE_THRESHOLD)
-            final_dist = torch.mean(final_dist)
             final_dist_clipped = torch.mean(final_dist_clipped)
 
             # 2. Connectivity scores
@@ -235,6 +234,7 @@ def generate_from_one_batch(self, data, compute_all_scores=False):
                 final_dist > ACCEPTABLE_THRESHOLD) / len(lines) * 100
             invalid_ratio_loose = torch.sum(
                 final_dist > VERY_FAR_THRESHOLD) / len(lines) * 100
+            final_dist = torch.mean(final_dist)
 
             # 4. Verify point where streamline starts diverging.
             # 0% = error at first point --> really bad.

From 08b98cb5edb982f426ecbeba49e587f284b2e908 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Mon, 12 Jun 2023 11:54:56 -0400
Subject: [PATCH 07/13] Update comments and methods' parameters for all
 trainers

---
 dwi_ml/training/trainers.py                | 176 +++++++++++---------
 dwi_ml/training/with_generation/trainer.py | 184 ++++++++++++++-------
 2 files changed, 221 insertions(+), 139 deletions(-)

diff --git a/dwi_ml/training/trainers.py b/dwi_ml/training/trainers.py
index da2cad53..a4010608 100644
--- a/dwi_ml/training/trainers.py
+++ b/dwi_ml/training/trainers.py
@@ -40,19 +40,15 @@ class DWIMLAbstractTrainer:
     """
     This Trainer class's train_and_validate() method:
         - Creates DataLoaders from the data_loaders. Collate_fn will be the
-        loader.load_batch() method, and the dataset will be
-        sampler.source_data.
-        - Trains each epoch by using compute_batch_loss, which should be
-        implemented in each project's child class.
+        found in the batch loader, and the dataset will be found in the data
+        sampler.
+        - Trains each epoch by using the model's loss computation method.
 
     Comet is used to save training information, but some logs will also be
     saved locally in the saving_path.
 
-    NOTE: TRAINER USES STREAMLINES COORDINATES IN VOXEL SPACE, TO CORNER.
+    NOTE: TRAINER USES STREAMLINES COORDINATES IN VOXEL SPACE, CORNER ORIGIN.
     """
-    # For now, this is ugly... But the option is there if you want.
-    save_logs_per_batch = False
-
     def __init__(self,
                  model: MainModelAbstract, experiments_path: str,
                  experiment_name: str, batch_sampler: DWIMLBatchIDSampler,
@@ -323,14 +319,16 @@ def __init__(self,
 
     @property
     def params_for_checkpoint(self):
-        # These are the parameters necessary to use _init_, together with
-        # instantiated classes (model, batch loader, batch sampler).
-
+        """
+        Returns the parameters necessary to initialize an identical Trainer.
+        However, the trainer's state could need to be updated (see checkpoint
+        management).
+        """
         # Not saving experiment_path and experiment_name. Allowing user to
         # move the experiment on his computer between training sessions.
 
-        # Patience and patience delta will be taken from the best epoch monitor.
-
+        # Patience is not saved here: we manage it separately to allow the
+        # user to increase the patience when running again.
         params = {
             'learning_rates': self.learning_rates,
             'weight_decay': self.weight_decay,
@@ -347,8 +345,8 @@ def params_for_checkpoint(self):
 
     def save_params_to_json(self):
         """
-        Utility method to save the parameters to a json file in the same
-        folder as the experiment.
+        Save the trainer's parameters to a json file in the same folder as the
+        experiment.
         """
         now = datetime.now()
         json_filename = os.path.join(self.saving_path, "parameters_{}.json"
@@ -370,14 +368,12 @@ def save_params_to_json(self):
 
     def save_checkpoint(self):
         """
-        Save an experiment checkpoint that can be resumed from.
+        Saves an experiment checkpoint, with parameters and states.
         """
         logger.debug("Saving checkpoint...")
-
-        # Make checkpoint directory
         checkpoint_dir = os.path.join(self.saving_path, "checkpoint")
 
-        # Backup old checkpoint before saving, and erase it afterwards
+        # Backup old checkpoint before saving, and erase it afterward.
         to_remove = None
         if os.path.exists(checkpoint_dir):
             to_remove = os.path.join(self.saving_path, "checkpoint_old")
@@ -399,8 +395,11 @@ def save_checkpoint(self):
             shutil.rmtree(to_remove)
 
     def _prepare_checkpoint_info(self) -> dict:
-        # These are parameters that should be updated after instantiating cls.
-
+        """
+        To instantiate a Trainer, we need the initialization parameters
+        (self.params_for_checkpoint), and the states. This method returns
+        the dictionary of required states.
+        """
         # Note. batch sampler's rng state and batch loader's are the same.
         current_states = {
             # Rng value.
@@ -443,16 +442,13 @@ def init_from_checkpoint(
             batch_loader: DWIMLAbstractBatchLoader,
             checkpoint_state: dict, new_patience, new_max_epochs, log_level):
         """
-        During save_checkpoint(), checkpoint_state.pkl is saved. Loading it
-        back offers a dict that can be used to instantiate an experiment and
-        set it at the same state as previously. (Current_epoch is updated +1).
-
-        Hint: If you want to use this in your child class, use:
-        experiment, checkpoint_state = super(cls, cls).init_from_checkpoint(...
+        Loads checkpoint information (parameters and states) to instantiate
+        a Trainer. Current_epoch is updated +1.
         """
         trainer_params = checkpoint_state['params_for_init']
         trainer = cls(model=model, experiments_path=experiments_path,
-                      experiment_name=experiment_name, batch_sampler=batch_sampler,
+                      experiment_name=experiment_name,
+                      batch_sampler=batch_sampler,
                       batch_loader=batch_loader, from_checkpoint=True,
                       log_level=log_level, **trainer_params)
 
@@ -475,7 +471,22 @@ def init_from_checkpoint(
 
         return trainer
 
+    @staticmethod
+    def load_params_from_checkpoint(experiments_path: str, experiment_name: str):
+        total_path = os.path.join(
+            experiments_path, experiment_name, "checkpoint",
+            "checkpoint_state.pkl")
+        if not os.path.isfile(total_path):
+            raise FileNotFoundError('Checkpoint was not found! ({})'
+                                    .format(total_path))
+        checkpoint_state = torch.load(total_path)
+
+        return checkpoint_state
+
     def _update_states_from_checkpoint(self, current_states):
+        """
+        Updates all states from the checkpoint dictionary of states.
+        """
         # A. Rng value.
         # RNG:
         #  - numpy
@@ -507,7 +518,8 @@ def _update_states_from_checkpoint(self, current_states):
 
     def _init_comet(self):
         """
-        For more information on comet, see our doc/Getting Started
+        Initialize comet's experiment. User's account and workspace must be
+        already set.
         """
         try:
             if self.comet_key:
@@ -550,11 +562,10 @@ def _init_comet(self):
 
     def estimate_nb_batches_per_epoch(self):
         """
-        Please override in your child class if you have a better way to
-        define the epochs sizes.
+        Counts the number of training / validation batches required to see all
+        the data (up to the maximum number of allowed batches).
 
-        Returns:
-             (nb_training_batches_per_epoch, nb_validation_batches_per_epoch)
+        Data must be already loaded to access the information.
         """
         streamline_group = self.batch_sampler.streamline_group_idx
         train_set = self.batch_sampler.dataset.training_set
@@ -585,16 +596,16 @@ def estimate_nb_batches_per_epoch(self):
 
     def train_and_validate(self):
         """
-        Train + validates the model (+ computes loss)
+        Trains + validates the model. Computes the training loss at each
+        training loop, and many validation metrics at each validation loop.
 
         - Starts comet,
         - Creates DataLoaders from the BatchSamplers,
         - For each epoch
             - uses _train_one_epoch and _validate_one_epoch,
-            - checks for earlyStopping if the loss is bad,
+            - saves a checkpoint,
+            - checks for earlyStopping if the loss is bad or patience is reached,
             - saves the model if the loss is good.
-        - Checks if allowed training time is exceeded.
-
         """
         logger.debug("Trainer {}: \nRunning the model {}.\n\n"
                      .format(type(self), type(self.model)))
@@ -677,8 +688,10 @@ def train_and_validate(self):
                 break
 
     def _get_latest_loss_to_supervise_best(self):
-        # This can be overriden by child classes if you possesss other
-        # test metrics than the loss.
+        """
+        Defines the metric to be used to define the best model. Override if
+        you have other validation metrics.
+        """
         if self.use_validation:
             mean_epoch_loss = self.valid_local_loss_monitor.average_per_epoch[-1]
         else:
@@ -687,6 +700,9 @@ def _get_latest_loss_to_supervise_best(self):
         return mean_epoch_loss
 
     def save_local_logs(self):
+        """
+        Save logs locally as numpy arrays.
+        """
         for monitor in self.monitors:
             if isinstance(monitor, BatchHistoryMonitor):
                 self._save_log_locally(monitor.average_per_epoch,
@@ -695,7 +711,15 @@ def save_local_logs(self):
                 self._save_log_locally(monitor.epoch_durations,
                                        monitor.name + '_duration.npy')
 
+    def _save_log_locally(self, array: np.ndarray, fname: str):
+        np.save(os.path.join(self.log_dir, fname), array)
+
     def _clear_handles(self):
+        """
+        Trying to improve the handles management.
+        Todo. Improve again. CPU multiprocessing fails because of handles
+         management.
+        """
         # Make sure there are no existing HDF handles if using parallel workers
         if (self.nb_cpu_processes > 0 and
                 self.batch_sampler.context_subset.is_lazy):
@@ -706,11 +730,15 @@ def back_propagation(self, loss):
         logger.debug('*** Computing back propagation')
         loss.backward()
 
-        self.fix_parameters()  # Ex: clip gradients
+        # Any other steps. Ex: clip gradients. Not implemented here.
+        # See Learn2track's Trainer for an example.
+        self.fix_parameters()
+
+        # Supervizing the gradient's norm.
         grad_norm = compute_gradient_norm(self.model.parameters())
 
         # Update parameters
-        # toDo. We could update only every n steps.
+        # Future work: We could update only every n steps.
         #  Effective batch size is n time bigger.
         #  See here https://towardsdatascience.com/optimize-pytorch-performance-for-speed-and-memory-efficiency-2022-84f453916ea6
         self.optimizer.step()
@@ -723,7 +751,8 @@ def back_propagation(self, loss):
 
     def train_one_epoch(self, epoch):
         """
-        Train one epoch of the model: loop on all batches (forward + backward).
+        Trains one epoch of the model: loops on all batches
+        (forward + backpropagation).
         """
         for monitor in self.training_monitors:
             monitor.start_new_epoch()
@@ -779,7 +808,7 @@ def train_one_epoch(self, epoch):
 
     def validate_one_epoch(self, epoch):
         """
-        Validate one epoch of the model: loop on all batches.
+        Validates one epoch of the model: loops on all batches.
         """
         for monitor in self.validation_monitors:
             monitor.start_new_epoch()
@@ -826,7 +855,8 @@ def validate_one_epoch(self, epoch):
 
     def train_one_batch(self, data, epoch):
         """
-        Returns: The loss to be backpropagated.
+        Computes the loss for the current batch and updates monitors.
+        Returns the loss to be used for backpropagation.
         """
         # Encapsulated for easier management of child classes.
         mean_local_loss, n = self.run_one_batch(data)
@@ -835,20 +865,16 @@ def train_one_batch(self, data, epoch):
         return mean_local_loss
 
     def validate_one_batch(self, data, epoch):
-        # Encapsulated for easier management of child classes.
+        """
+        Computes the loss(es) for the current batch and updates monitors.
+        """
         mean_local_loss, n = self.run_one_batch(data)
         self.valid_local_loss_monitor.update(mean_local_loss.cpu().item(),
                                              weight=n)
 
     def _update_comet_after_epoch(self, context: str, epoch: int):
         """
-        Update logs:
-            - logging to user
-            - get values from monitors and save final log locally.
-            - send mean data to comet
-
-        local_context: prefix when saving log. Training_ or Validate_ for
-        instance.
+        Sends monitors information to comet.
         """
         if context == 'training':
             monitors = self.training_monitors
@@ -871,6 +897,8 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
             logs.append((value, monitor.name))
 
         if self.comet_exp:
+            # Comet context: will add train_(loss) or valid_(loss) to the
+            # monitors name in comet.
             if context == 'training':
                 comet_context = self.comet_exp.train
             else:  # context == 'validation':
@@ -887,6 +915,10 @@ def _update_comet_after_epoch(self, context: str, epoch: int):
                         log[1], log[0], epoch=0, step=epoch)
 
     def _save_best_model(self):
+        """
+        Saves the current state of the model in the best_model folder.
+        Saves the loss to a json folder.
+        """
         logger.info("   Best epoch yet! Saving model and loss history.")
 
         # Save model
@@ -907,25 +939,18 @@ def _save_best_model(self):
 
     def run_one_batch(self, data):
         """
-        Run a batch of data through the model (calling its forward method)
-        and return the mean loss. If training, run the backward method too.
+        Runs a batch of data through the model (calling its forward method)
+        and returns the mean loss.
 
         Parameters
         ----------
         data : tuple of (List[StatefulTractogram], dict)
-            This is the output of the AbstractBatchLoader's
-            load_batch_streamlines() method. data is a tuple
+            Output of the batch loader's collate_fn.
+            With our basic BatchLoader class, data is a tuple
             - batch_sfts: one sft per subject
             - final_streamline_ids_per_subj: the dict of streamlines ids from
               the list of all streamlines (if we concatenate all sfts'
               streamlines)
-
-        Returns
-        -------
-        mean_loss : float
-            The mean loss of the provided batch.
-        n: int
-            Total number of points for this batch.
         """
         raise NotImplementedError
 
@@ -940,25 +965,14 @@ def fix_parameters(self):
         """
         pass
 
-    def _save_log_locally(self, array: np.ndarray, fname: str):
-        np.save(os.path.join(self.log_dir, fname), array)
-
-    @staticmethod
-    def load_params_from_checkpoint(experiments_path: str, experiment_name: str):
-        total_path = os.path.join(
-            experiments_path, experiment_name, "checkpoint",
-            "checkpoint_state.pkl")
-        if not os.path.isfile(total_path):
-            raise FileNotFoundError('Checkpoint was not found! ({})'
-                                    .format(total_path))
-        checkpoint_state = torch.load(total_path)
-
-        return checkpoint_state
-
     @staticmethod
     def check_stopping_cause(checkpoint_state, new_patience=None,
                              new_max_epochs=None):
-
+        """
+        This method should be used before starting the training. Verifies that
+        it makes sense to continue training based on number of epochs and
+        patience.
+        """
         current_epoch = checkpoint_state['current_states']['current_epoch']
 
         # 1. Check if early stopping had been triggered.
@@ -1017,7 +1031,9 @@ def run_one_batch(self, data, average_results=True):
             - batch_sfts: one sft per subject
             - final_streamline_ids_per_subj: the dict of streamlines ids from
               the list of all streamlines (if we concatenate all sfts'
-              streamlines)
+              streamlines).
+        average_results: bool
+            If true, returns the averaged loss (as defined by the model).
 
         Returns
         -------
diff --git a/dwi_ml/training/with_generation/trainer.py b/dwi_ml/training/with_generation/trainer.py
index 8de9ca24..e7af4489 100644
--- a/dwi_ml/training/with_generation/trainer.py
+++ b/dwi_ml/training/with_generation/trainer.py
@@ -1,4 +1,39 @@
 # -*- coding: utf-8 -*-
+"""
+Adds a tracking step to verify the generation process. Metrics on the
+streamlines are:
+
+- Very good / acceptable / very far IS threshold:
+    Percentage of streamlines ending inside a radius of 15 / 25 / 40 voxels of
+    the expected endpoint. This metric has the drawback that streamlines
+    following a correct path different from the "true" validation streamline
+    contribute negatively to the metric.
+- 'diverg':
+    The point where the streamline becomes significantly far (i.e. > 25 voxels)
+    from the "true" path. Values range between 100 (100% bad, i.e. diverging
+    from the start) to 0 (0% bad; ended correclty). If the generated streamline
+    is longer than the "true" one, values range between 0 (0% bad) and infinit
+    (ex: 100% = went 100% too far before becoming far from the expected point.
+    I.e. the generated streamline is at least twice as long as expected). Same
+    drawback as above.
+- Mean distance from expected endpoint:
+    In voxel space. Same drawback as above. Also, a single bad streamline may
+    contribute intensively to the score.
+- Idem, clipped.
+    Distances are clipped at 25. We consider that bad streamlines are bad, no
+    matter if they end up near or far.
+- Connectivity fit:
+    Percentage of streamlines ending in a block of the volume indeed connected
+    in the validation subject. Real connectivity matrices must be saved in the
+    hdf5. Right now, volumes are simply downsampled (the same way as in the
+    hdf5, ex, to 10x10x10 volumes for a total of 1000 blocks), not based on
+    anatomical ROIs. It has the advantage that it does not rely on the quality
+    of segmentation. It had the drawback that a generated streamline ending
+    very close to the "true" streamline, but in another block, if the
+    expected endpoint is close to the border of the block, contributes
+    negatively to the metric. It does not however have the drawback of other
+    metrics stated before.
+"""
 import logging
 from typing import List
 
@@ -33,9 +68,31 @@ class DWIMLTrainerForTrackingOneInput(DWIMLTrainerOneInput):
     batch_loader: DWIMLBatchLoaderWithConnectivity
 
     def __init__(self, add_a_tracking_validation_phase: bool = False,
-                 tracking_phase_frequency: int = 5,
+                 tracking_phase_frequency: int = 1,
                  tracking_phase_nb_steps_init: int = 5,
                  tracking_phase_mask_group: str = None, *args, **kw):
+        """
+        Parameters
+        ----------
+        add_a_tracking_validation_phase: bool
+            If true, the validation phase is extended with a generation (i.e.
+            tracking) step: the first N points of the validation streamlines
+            are kept as is, and streamlines are propagated through tractography
+            until they get out of the mask, or until the EOS criteria is
+            reached (if any) (threshold = 0.5).
+            In current implementation, the metric defining the best model is
+            the connectivity metric.
+        tracking_phase_frequency: int
+            There is the possibility to compute this additional step only every
+            X epochs.
+        tracking_phase_nb_steps_init: int
+            Number of initial points to keep in the validation step. Adding
+            enough should ensure that the generated streamlines go in the same
+            direction as the "true" validation streamline to generate good
+            metrics.
+        tracking_phase_mask_group: str
+            Name of the volume group to use as tracking mask.
+        """
         super().__init__(*args, **kw)
 
         self.add_a_tracking_validation_phase = add_a_tracking_validation_phase
@@ -62,6 +119,9 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
                         mask_interp='nearest')
                     self.tracking_mask.move_to(self.device)
 
+            # todo verify if available in hdf5
+            self.compute_connectivity = True
+
         # -------- Monitors
         # At training time: only the one metric used for training.
         # At validation time: A lot of exploratory metrics monitors.
@@ -112,21 +172,18 @@ def params_for_checkpoint(self):
         return p
 
     def _get_latest_loss_to_supervise_best(self):
-        if self.use_validation:
-            if self.add_a_tracking_validation_phase:
-                # Choosing connectivity.
-                mean_epoch_loss = \
-                    self.tracking_connectivity_score_monitor.average_per_epoch[-1]
-            else:
-                mean_epoch_loss = self.valid_local_loss_monitor.average_per_epoch[-1]
+        """Using the connectivity score, if available."""
+        if (self.use_validation and self.add_a_tracking_validation_phase and
+                self.compute_connectivity):
+            # Choosing connectivity.
+            mean_epoch_loss = \
+                self.tracking_connectivity_score_monitor.average_per_epoch[-1]
+            return mean_epoch_loss
         else:
-            # Without a validation set: take the training loss.
-            mean_epoch_loss = self.train_loss_monitor.average_per_epoch[-1]
-
-        return mean_epoch_loss
+            return super()._get_latest_loss_to_supervise_best()
 
     def validate_one_batch(self, data, epoch):
-        # 1. Compute local loss.
+        # 1. Compute the local loss as usual.
         super().validate_one_batch(data, epoch)
 
         # 2. Compute generation losses.
@@ -136,7 +193,7 @@ def validate_one_batch(self, data, epoch):
                              "from batch.")
                 (gen_n, mean_final_dist, mean_clipped_final_dist,
                  percent_IS_very_good, percent_IS_acceptable, percent_IS_very_far,
-                 diverging_pnt, connectivity) = self.generate_from_one_batch(
+                 diverging_pnt, connectivity) = self.validation_generation_one_batch(
                     data, compute_all_scores=True)
 
                 self.tracking_very_good_IS_monitor.update(
@@ -188,19 +245,21 @@ def validate_one_batch(self, data, epoch):
                                 self.tracking_connectivity_score_monitor]:
                     monitor.update(monitor.average_per_epoch[-1])
 
-    def generate_from_one_batch(self, data, compute_all_scores=False):
-        # Data interpolation has not been done yet. GPU computations are done
-        # here in the main thread.
-        torch.set_printoptions(precision=4)
-        np.set_printoptions(precision=2)
-
+    def validation_generation_one_batch(self, data, compute_all_scores=False):
+        """
+        Use tractography to generate streamlines starting from the "true"
+        seeds and first few segments. Expected results are the batch's
+        validation streamlines.
+        """
         real_lines, ids_per_subj = data
+
+        # Possibly sending again to GPU even if done in the local loss
+        # computation, but easier with current implementation.
         real_lines = [line.to(self.device, non_blocking=True, dtype=torch.float)
                       for line in real_lines]
         last_pos = torch.vstack([line[-1, :] for line in real_lines])
 
-        # Dataloader always works on CPU. Sending to right device.
-        # (model is already moved). Using only the n first points
+        # Starting from the n first points
         lines = [s[0:min(len(s), self.tracking_phase_nb_steps_init), :]
                  for s in real_lines]
 
@@ -218,16 +277,16 @@ def generate_from_one_batch(self, data, compute_all_scores=False):
         if not compute_all_scores:
             return final_dist
         else:
-            # 1. Also clipping final dist
+            # 1. (bis) Also clipping final dist
             final_dist_clipped = torch.clip(final_dist, min=None,
                                             max=ACCEPTABLE_THRESHOLD)
             final_dist_clipped = torch.mean(final_dist_clipped)
 
-            # 2. Connectivity scores
+            # 2. Connectivity scores, if available (else None)
             connectivity_score = self._compare_connectivity(lines, ids_per_subj)
 
-            # 3. Verify "IS ratio", i.e. percentage of streamlines ending
-            # inside a predefined radius.
+            # 3. "IS ratio", i.e. percentage of streamlines ending inside a
+            # predefined radius.
             invalid_ratio_severe = torch.sum(
                 final_dist > VERY_CLOSE_THRESHOLD) / len(lines) * 100
             invalid_ratio_acceptable = torch.sum(
@@ -237,12 +296,9 @@ def generate_from_one_batch(self, data, compute_all_scores=False):
             final_dist = torch.mean(final_dist)
 
             # 4. Verify point where streamline starts diverging.
-            # 0% = error at first point --> really bad.
-            # 100% = reached exactly the right point.
-            # >100% = went too far (longer than expected).
-            # We want a decreasing value towards 0.
-            # abs(100 - score): 0 = good. 100 = bad.
-            # Using 100 - x, so the score is diminishing, from 100 = perfect.
+            # abs(100 - score): 0 = good. 100 = bad (either abs(100) -> diverged
+            # at first point or abs(-100) = diverged after twice the expected
+            # length.
             total_point = 0
             for line, real_line in zip(lines, real_lines):
                 expected_nb = len(real_line)
@@ -251,8 +307,8 @@ def generate_from_one_batch(self, data, compute_all_scores=False):
                     diff_nb = len(real_line) - len(line)
                     line = torch.vstack((line, line[-1, :].repeat(diff_nb, 1)))
                 elif len(line) > expected_nb:
-                    real_line = torch.vstack((real_line,
-                                              real_line[-1, :].repeat(diff_nb, 1)))
+                    real_line = torch.vstack(
+                        (real_line, real_line[-1, :].repeat(diff_nb, 1)))
                 dist = l2_loss(line, real_line).cpu().numpy()
                 point, = np.where(dist > ACCEPTABLE_THRESHOLD)
                 if len(point) > 0:  # (else: score = 0. Never out of range).
@@ -272,34 +328,44 @@ def generate_from_one_batch(self, data, compute_all_scores=False):
                     connectivity_score)
 
     def _compare_connectivity(self, lines, ids_per_subj):
-        connectivity_matrices, volume_sizes, downsampled_sizes = \
-            self.batch_loader.load_batch_connectivity_matrices(ids_per_subj)
-
-        score = 0.0
-        for i, subj in enumerate(ids_per_subj.keys()):
-            real_matrix = connectivity_matrices[i]
-            volume_size = volume_sizes[i]
-            downsampled_size = downsampled_sizes[i]
-            _lines = lines[ids_per_subj[subj]]
-
-            batch_matrix = compute_triu_connectivity(
-                _lines, volume_size, downsampled_size,
-                binary=False, to_sparse_tensor=False, device=self.device)
-
-            # Where our batch has a 1, if there was really a one: score should
-            # be 0. Else, score should be 1.
-            # If two streamlines in a voxel, score is 0 or 2.
-
-            # Real matrices are saved as binary in create_hdf5.
-            where_one = np.where(batch_matrix > 0)
-            score += np.sum(batch_matrix[where_one] *
-                            (1.0 - real_matrix[where_one]))
-
-        # Average for batch
-        score = score / len(lines)
+        """
+        If available, computes connectivity matrices for the batch and
+        compares with expected values for the subject.
+        """
+        if self.compute_connectivity:
+            connectivity_matrices, volume_sizes, downsampled_sizes = \
+                self.batch_loader.load_batch_connectivity_matrices(ids_per_subj)
+
+            score = 0.0
+            for i, subj in enumerate(ids_per_subj.keys()):
+                real_matrix = connectivity_matrices[i]
+                volume_size = volume_sizes[i]
+                downsampled_size = downsampled_sizes[i]
+                _lines = lines[ids_per_subj[subj]]
+
+                batch_matrix = compute_triu_connectivity(
+                    _lines, volume_size, downsampled_size,
+                    binary=False, to_sparse_tensor=False, device=self.device)
+
+                # Where our batch has a 1, if there was really a one: score should
+                # be 0. Else, score should be 1.
+                # If two streamlines in a voxel, score is 0 or 2.
+
+                # Real matrices are saved as binary in create_hdf5.
+                where_one = np.where(batch_matrix > 0)
+                score += np.sum(batch_matrix[where_one] *
+                                (1.0 - real_matrix[where_one]))
+
+            # Average for batch
+            score = score / len(lines)
+        else:
+            score = None
         return score
 
     def propagate_multiple_lines(self, lines: List[torch.Tensor], ids_per_subj):
+        """
+        Tractography propagation of 'lines'.
+        """
         assert self.model.step_size is not None, \
             "We can't propagate compressed streamlines."
 

From 5d3de5b3efab20c248311e550206c85139b14e9f Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Mon, 12 Jun 2023 13:44:20 -0400
Subject: [PATCH 08/13] Verify that connectivity matrices are there

---
 dwi_ml/data/dataset/checks_for_groups.py      | 21 +++++++++++++-----
 .../data/dataset/multi_subject_containers.py  | 22 +++++++++++++------
 .../data/dataset/single_subject_containers.py |  4 ++--
 .../training/with_generation/batch_loader.py  |  8 +++++--
 dwi_ml/training/with_generation/trainer.py    |  3 +--
 5 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/dwi_ml/data/dataset/checks_for_groups.py b/dwi_ml/data/dataset/checks_for_groups.py
index 20adc1f8..658d5861 100644
--- a/dwi_ml/data/dataset/checks_for_groups.py
+++ b/dwi_ml/data/dataset/checks_for_groups.py
@@ -2,6 +2,8 @@
 import logging
 from typing import Tuple
 
+import numpy as np
+
 
 def _find_groups_info_for_subj(hdf_file, subj_id: str):
     """
@@ -25,10 +27,14 @@ def _find_groups_info_for_subj(hdf_file, subj_id: str):
         volume's last dimension).
     streamline_groups: List[str]
         The list of streamline groups for this subject.
+    contains_connectivity: np.ndarray
+        A list of boolean for each streamline_group stating if it contains the
+        pre-computed connectivity matrices for that subject.
     """
     volume_groups = []
     nb_features = []
     streamline_groups = []
+    contains_connectivity = []
 
     hdf_groups = hdf_file[subj_id]
     for hdf_group in hdf_groups:
@@ -39,6 +45,8 @@ def _find_groups_info_for_subj(hdf_file, subj_id: str):
                 hdf_file[subj_id][hdf_group].attrs['nb_features'])
         elif group_type == 'streamlines':
             streamline_groups.append(hdf_group)
+            found_matrix = 'connectivity_matrix' in hdf_file[subj_id][hdf_group]
+            contains_connectivity.append(found_matrix)
         else:
             raise NotImplementedError(
                 "So far, you can only add 'volume' or 'streamline' groups in "
@@ -46,7 +54,8 @@ def _find_groups_info_for_subj(hdf_file, subj_id: str):
                 "example. Your hdf5 contained group of type {} for subj {}"
                 .format(group_type, subj_id))
 
-    return volume_groups, nb_features, streamline_groups
+    contains_connectivity = np.asarray(contains_connectivity, dtype=bool)
+    return volume_groups, nb_features, streamline_groups, contains_connectivity
 
 
 def _compare_groups_info(volume_groups, nb_features, streamline_groups,
@@ -73,7 +82,7 @@ def _compare_groups_info(volume_groups, nb_features, streamline_groups,
                         .format(s, streamline_groups))
 
 
-def prepare_groups_info(subject_id: str, hdf_file, group_info=None):
+def prepare_groups_info(subject_id: str, hdf_file, ref_group_info=None):
     """
     Read the hdf5 file for this subject and get the groups information
     (volume and streamlines groups names, number of features for volumes).
@@ -81,11 +90,11 @@ def prepare_groups_info(subject_id: str, hdf_file, group_info=None):
     If group_info is given, compare subject's information with database
     expected information.
     """
-    volume_groups, nb_features, streamline_groups = \
+    volume_groups, nb_features, streamline_groups, contains_connectivity = \
         _find_groups_info_for_subj(hdf_file, subject_id)
 
-    if group_info is not None:
+    if ref_group_info is not None:
         _compare_groups_info(volume_groups, nb_features, streamline_groups,
-                             group_info)
+                             ref_group_info)
 
-    return volume_groups, nb_features, streamline_groups
+    return volume_groups, nb_features, streamline_groups, contains_connectivity
diff --git a/dwi_ml/data/dataset/multi_subject_containers.py b/dwi_ml/data/dataset/multi_subject_containers.py
index fe4c72d6..1922f24c 100644
--- a/dwi_ml/data/dataset/multi_subject_containers.py
+++ b/dwi_ml/data/dataset/multi_subject_containers.py
@@ -46,6 +46,7 @@ def __init__(self, set_name: str, hdf5_file: str, lazy: bool,
         self.volume_groups = []  # type: List[str]
         self.nb_features = []  # type: List[int]
         self.streamline_groups = []  # type: List[str]
+        self.contains_connectivity = []  # type: np.ndarray
 
         # The subjects data list will be either a SubjectsDataList or a
         # LazySubjectsDataList depending on MultisubjectDataset.is_lazy.
@@ -90,10 +91,11 @@ def close_all_handles(self):
                 s.hdf_handle = None
 
     def set_subset_info(self, volume_groups, nb_features, streamline_groups,
-                        step_size, compress):
+                        contains_connectivity, step_size, compress):
         self.volume_groups = volume_groups
-        self.streamline_groups = streamline_groups
         self.nb_features = nb_features
+        self.streamline_groups = streamline_groups
+        self.contains_connectivity = contains_connectivity
         self.step_size = step_size
         self.compress = compress
 
@@ -372,6 +374,7 @@ def __init__(self, hdf5_file: str, lazy: bool,
         self.volume_groups = []  # type: List[str]
         self.nb_features = []  # type: List[int]
         self.streamline_groups = []  # type: List[str]
+        self.streamlines_contain_connectivity = []
 
         self.is_lazy = lazy
         self.subset_cache_size = cache_size
@@ -445,9 +448,9 @@ def load_data(self, load_training=True, load_validation=True,
             # Loading the first training subject's group information.
             # Others should fit.
             one_subj = hdf_handle.attrs['training_subjs'][0]
-            group_info = \
-                prepare_groups_info(one_subj, hdf_handle, group_info=None)
-            (poss_volume_groups, nb_features, poss_strea_groups) = group_info
+            (poss_volume_groups, nb_features, poss_strea_groups,
+             contains_connectivity) = prepare_groups_info(
+                one_subj, hdf_handle, ref_group_info=None)
             logger.info("        Possible volume groups are: {}"
                         .format(poss_volume_groups))
             logger.info("        Number of features in each of these groups: "
@@ -479,14 +482,19 @@ def load_data(self, load_training=True, load_validation=True,
                     raise ValueError("Streamlines {} were not found in the "
                                      "first subject of your hdf5 file."
                                      .format(missing_str))
-                self.streamline_groups = np.intersect1d(streamline_groups,
-                                                        poss_strea_groups)
+                self.streamline_groups, _, ind = np.intersect1d(
+                    streamline_groups, poss_strea_groups, return_indices=True)
                 logger.info("Chosen streamline groups are: {}"
                             .format(self.streamline_groups))
+                self.streamlines_contain_connectivity = contains_connectivity[ind]
             else:
                 logger.info("Using all streamline groups.")
                 self.streamline_groups = poss_strea_groups
+                self.streamlines_contain_connectivity = contains_connectivity
 
+            group_info = (self.volume_groups, self.nb_features,
+                          self.streamline_groups,
+                          self.streamlines_contain_connectivity)
             self.training_set.set_subset_info(*group_info, step_size, compress)
             self.validation_set.set_subset_info(*group_info, step_size, compress)
             self.testing_set.set_subset_info(*group_info, step_size, compress)
diff --git a/dwi_ml/data/dataset/single_subject_containers.py b/dwi_ml/data/dataset/single_subject_containers.py
index 8aafb65b..8d057c99 100644
--- a/dwi_ml/data/dataset/single_subject_containers.py
+++ b/dwi_ml/data/dataset/single_subject_containers.py
@@ -160,8 +160,8 @@ def init_single_subject_from_hdf(
             Tuple containing (volume_groups, nb_features, streamline_groups)
             for this subject.
         """
-        volume_groups, nb_features, streamline_groups = prepare_groups_info(
-            subject_id, hdf_file, group_info)
+        volume_groups, nb_features, streamline_groups, _ = \
+            prepare_groups_info(subject_id, hdf_file, group_info)
 
         logger.debug('     Lazy: not loading data.')
 
diff --git a/dwi_ml/training/with_generation/batch_loader.py b/dwi_ml/training/with_generation/batch_loader.py
index a56a0a8c..b1e69b5a 100644
--- a/dwi_ml/training/with_generation/batch_loader.py
+++ b/dwi_ml/training/with_generation/batch_loader.py
@@ -8,11 +8,15 @@
 
 class DWIMLBatchLoaderWithConnectivity(DWIMLBatchLoaderOneInput):
     def __init__(self, **kwargs):
-        assert "hdf5 contains connectivity"
         super().__init__(**kwargs)
+        self.data_contains_connectivity = \
+            self.dataset.streamlines_contain_connectivity[self.streamline_group_idx]
 
     def load_batch_connectivity_matrices(
             self, streamline_ids_per_subj: Dict[int, slice]):
+        if not self.data_contains_connectivity:
+            raise ValueError("No connectivity matrix in this dataset.")
+
         # The batch's streamline ids will change throughout processing because
         # of data augmentation, so we need to do it subject by subject to
         # keep track of the streamline ids. These final ids will correspond to
@@ -30,7 +34,7 @@ def load_batch_connectivity_matrices(
                 self.context_subset.subjs_data_list.get_subj_with_handle(subj)
             subj_sft_data = subj_data.sft_data_list[self.streamline_group_idx]
 
-            # We could access it only at required index maybe. Loading the
+            # We could access it only at required index, maybe. Loading the
             # whole matrix here.
             matrices[i], volume_sizes[i], downsampled_sizes[i] = \
                 subj_sft_data.connectivity_matrix_and_info()
diff --git a/dwi_ml/training/with_generation/trainer.py b/dwi_ml/training/with_generation/trainer.py
index e7af4489..90ae2ffa 100644
--- a/dwi_ml/training/with_generation/trainer.py
+++ b/dwi_ml/training/with_generation/trainer.py
@@ -119,8 +119,7 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
                         mask_interp='nearest')
                     self.tracking_mask.move_to(self.device)
 
-            # todo verify if available in hdf5
-            self.compute_connectivity = True
+            self.compute_connectivity = self.batch_loader.data_contains_connectivity
 
         # -------- Monitors
         # At training time: only the one metric used for training.

From db2494a6e872f45740a182342e28ecaac99b7706 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Mon, 12 Jun 2023 14:00:43 -0400
Subject: [PATCH 09/13] Load the tracking mask correctly

---
 .../training/projects/learn2track_trainer.py  | 30 ++++++++---
 .../training/projects/transformer_trainer.py  | 30 ++++++++---
 dwi_ml/training/with_generation/trainer.py    | 52 +++++++++----------
 3 files changed, 73 insertions(+), 39 deletions(-)

diff --git a/dwi_ml/training/projects/learn2track_trainer.py b/dwi_ml/training/projects/learn2track_trainer.py
index 5267b3ce..37dcc5e3 100644
--- a/dwi_ml/training/projects/learn2track_trainer.py
+++ b/dwi_ml/training/projects/learn2track_trainer.py
@@ -2,11 +2,13 @@
 import logging
 from typing import List
 
+import h5py
 import numpy as np
 import torch
 
 from dwi_ml.models.projects.learn2track_model import Learn2TrackModel
 from dwi_ml.tracking.propagation import propagate_multiple_lines
+from dwi_ml.tracking.utils import prepare_tracking_mask
 from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput
 
@@ -92,10 +94,24 @@ def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
 
         theta = 2 * np.pi  # theta = 360 degrees
         max_nbr_pts = int(200 / self.model.step_size)
-        results = propagate_multiple_lines(
-            lines, update_memory_after_removing_lines, get_dirs_at_last_pos,
-            theta=theta, step_size=self.model.step_size,
-            verify_opposite_direction=False, mask=self.tracking_mask,
-            max_nbr_pts=max_nbr_pts, append_last_point=False,
-            normalize_directions=True)
-        return results
+
+        final_lines = []
+        for subj_idx, line_idx in ids_per_subj.items():
+
+            with h5py.File(self.batch_loader.dataset.hdf5_file, 'r') as hdf_handle:
+                subj_id = self.batch_loader.context_subset.subjects[subj_idx]
+                logging.debug("Loading subj {} ({})'s tracking mask."
+                              .format(subj_idx, subj_id))
+                tracking_mask, _ = prepare_tracking_mask(
+                    hdf_handle, self.tracking_mask_group, subj_id=subj_id,
+                    mask_interp='nearest')
+                tracking_mask.move_to(self.device)
+
+            final_lines.extend(propagate_multiple_lines(
+                lines[line_idx], update_memory_after_removing_lines,
+                get_dirs_at_last_pos, theta=theta,
+                step_size=self.model.step_size, verify_opposite_direction=False,
+                mask=tracking_mask, max_nbr_pts=max_nbr_pts,
+                append_last_point=False, normalize_directions=True))
+
+        return final_lines
diff --git a/dwi_ml/training/projects/transformer_trainer.py b/dwi_ml/training/projects/transformer_trainer.py
index 88e34a96..12e7bd14 100644
--- a/dwi_ml/training/projects/transformer_trainer.py
+++ b/dwi_ml/training/projects/transformer_trainer.py
@@ -1,9 +1,12 @@
 # -*- coding: utf-8 -*-
+import logging
 from typing import List
 
+import h5py
 import numpy as np
 import torch
 from dwi_ml.tracking.propagation import propagate_multiple_lines
+from dwi_ml.tracking.utils import prepare_tracking_mask
 
 from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput
@@ -45,9 +48,24 @@ def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
 
         theta = 2 * np.pi  # theta = 360 degrees
         max_nbr_pts = int(200 / self.model.step_size)
-        return propagate_multiple_lines(
-            lines, update_memory_after_removing_lines, get_dirs_at_last_pos,
-            theta=theta, step_size=self.model.step_size,
-            verify_opposite_direction=False, mask=self.tracking_mask,
-            max_nbr_pts=max_nbr_pts, append_last_point=False,
-            normalize_directions=True)
+
+        final_lines = []
+        for subj_idx, line_idx in ids_per_subj.items():
+
+            with h5py.File(self.batch_loader.dataset.hdf5_file, 'r') as hdf_handle:
+                subj_id = self.batch_loader.context_subset.subjects[subj_idx]
+                logging.debug("Loading subj {} ({})'s tracking mask."
+                              .format(subj_idx, subj_id))
+                tracking_mask, _ = prepare_tracking_mask(
+                    hdf_handle, self.tracking_mask_group, subj_id=subj_id,
+                    mask_interp='nearest')
+                tracking_mask.move_to(self.device)
+
+            final_lines.extend(propagate_multiple_lines(
+                lines[line_idx], update_memory_after_removing_lines,
+                get_dirs_at_last_pos, theta=theta,
+                step_size=self.model.step_size, verify_opposite_direction=False,
+                mask=tracking_mask, max_nbr_pts=max_nbr_pts,
+                append_last_point=False, normalize_directions=True))
+
+        return final_lines
diff --git a/dwi_ml/training/with_generation/trainer.py b/dwi_ml/training/with_generation/trainer.py
index 90ae2ffa..7f3d5e80 100644
--- a/dwi_ml/training/with_generation/trainer.py
+++ b/dwi_ml/training/with_generation/trainer.py
@@ -100,26 +100,7 @@ def __init__(self, add_a_tracking_validation_phase: bool = False,
         self.tracking_phase_nb_steps_init = tracking_phase_nb_steps_init
         self.tracking_mask_group = tracking_phase_mask_group
 
-        self.tracking_mask = None
-        if add_a_tracking_validation_phase:
-            # Right now, using any subject's, and supposing that they are all
-            # in the same space. Else, code would need refactoring to allow
-            # tracking on multiple subjects. Or we can loop on each subject.
-            logging.warning("***************\n"
-                            "CODE NEEDS REFACTORING. USING THE SAME TRACKING "
-                            "MASK FOR ALL SUBJECTS.\n"
-                            "***************\n")
-            any_subj = self.batch_loader.dataset.training_set.subjects[0]
-            if tracking_phase_mask_group is not None:
-                with h5py.File(self.batch_loader.dataset.hdf5_file, 'r') \
-                        as hdf_handle:
-                    logging.info("Loading tracking mask.")
-                    self.tracking_mask, _ = prepare_tracking_mask(
-                        hdf_handle, tracking_phase_mask_group, subj_id=any_subj,
-                        mask_interp='nearest')
-                    self.tracking_mask.move_to(self.device)
-
-            self.compute_connectivity = self.batch_loader.data_contains_connectivity
+        self.compute_connectivity = self.batch_loader.data_contains_connectivity
 
         # -------- Monitors
         # At training time: only the one metric used for training.
@@ -387,9 +368,28 @@ def get_dirs_at_last_pos(_lines: List[torch.Tensor], n_last_pos):
 
         theta = 2 * np.pi  # theta = 360 degrees
         max_nbr_pts = int(200 / self.model.step_size)
-        return propagate_multiple_lines(
-            lines, update_memory_after_removing_lines, get_dirs_at_last_pos,
-            theta=theta, step_size=self.model.step_size,
-            verify_opposite_direction=False, mask=self.tracking_mask,
-            max_nbr_pts=max_nbr_pts, append_last_point=False,
-            normalize_directions=True)
+
+        # Looping on subjects because current implementation requires a single
+        # tracking mask. But all the rest (get_dirs_at_last_pos, particularly)
+        # work on multiple subjects because the batch loader loads input
+        # according to subject id. Could refactor "propagate_multiple_line" to
+        # accept multiple masks or manage it differently.
+        final_lines = []
+        for subj_idx, line_idx in ids_per_subj.items():
+            with h5py.File(self.batch_loader.dataset.hdf5_file, 'r') as hdf_handle:
+                subj_id = self.batch_loader.context_subset.subjects[subj_idx]
+                logging.debug("Loading subj {} ({})'s tracking mask."
+                              .format(subj_idx, subj_id))
+                tracking_mask, _ = prepare_tracking_mask(
+                    hdf_handle, self.tracking_mask_group, subj_id=subj_id,
+                    mask_interp='nearest')
+                tracking_mask.move_to(self.device)
+
+            final_lines.extend(propagate_multiple_lines(
+                lines[line_idx], update_memory_after_removing_lines,
+                get_dirs_at_last_pos, theta=theta,
+                step_size=self.model.step_size, verify_opposite_direction=False,
+                mask=tracking_mask, max_nbr_pts=max_nbr_pts,
+                append_last_point=False, normalize_directions=True))
+
+        return final_lines

From ae3593a43075e4d8daf0da99af83abb39ee43314 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Tue, 27 Jun 2023 11:40:34 -0400
Subject: [PATCH 10/13] Remove debugging lines weight with angle

---
 dwi_ml/data/processing/streamlines/post_processing.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/dwi_ml/data/processing/streamlines/post_processing.py b/dwi_ml/data/processing/streamlines/post_processing.py
index 40851788..342c88fe 100644
--- a/dwi_ml/data/processing/streamlines/post_processing.py
+++ b/dwi_ml/data/processing/streamlines/post_processing.py
@@ -254,14 +254,7 @@ def weight_value_with_angle(values: List, streamlines: List = None,
 
         # Mult choice:
         # We don't want to multiply by 0. Multiplying by angles + 1.
-        # values[i] = values[i] * (angles + 1.0)
-        values[i] = values[i] * (angles + 1.0)**2
-
-        # Pow choice:
-        # loss^0 = 1. loss^1 = loss. Also adding 1.
-        # But if values are < 1, pow becomes smaller.
-        # Our losses tend toward 0.  Adding 1 before.
-        # values[i] = torch.pow(1.0 + values[i], angles + 1.0) - 1.0
+        values[i] = values[i] * (angles + 1.0)
 
     return values
 

From 5bb597b4ece783f7b1a6b9f8e000d5ba9c6e4537 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Tue, 27 Jun 2023 14:54:26 -0400
Subject: [PATCH 11/13] Fix group verification when skipping some groups

---
 dwi_ml/data/dataset/checks_for_groups.py      | 42 ++++++++++-------
 .../data/dataset/multi_subject_containers.py  | 45 ++++++++++---------
 .../data/dataset/single_subject_containers.py |  4 +-
 dwi_ml/models/main_models.py                  |  5 ++-
 dwi_ml/models/projects/learn2track_model.py   |  7 +++
 5 files changed, 60 insertions(+), 43 deletions(-)

diff --git a/dwi_ml/data/dataset/checks_for_groups.py b/dwi_ml/data/dataset/checks_for_groups.py
index 658d5861..d63fa430 100644
--- a/dwi_ml/data/dataset/checks_for_groups.py
+++ b/dwi_ml/data/dataset/checks_for_groups.py
@@ -58,28 +58,31 @@ def _find_groups_info_for_subj(hdf_file, subj_id: str):
     return volume_groups, nb_features, streamline_groups, contains_connectivity
 
 
-def _compare_groups_info(volume_groups, nb_features, streamline_groups,
-                         group_info: Tuple):
+def _compare_groups_info(subject_group_info, ref_group_info: Tuple):
     """
-    Compares the three lists (volume_groups, nb_features, streamline_groups) of
-    one subject to the expected list for this database, included in group_info.
+    Compares the two tuple (volume_groups, nb_features, streamline_groups,
+    contains_connectivity) between one subject to the expected list for this
+    database, included in group_info.
     """
-    v, f, s = group_info
-    if volume_groups != v:
+    sv, sf, ss, sc = subject_group_info
+    rv, rf, rs, rc = ref_group_info
+    if not set(rv).issubset(set(sv)):
         logging.warning("Subject's hdf5 groups with attributes 'type' set as "
                         "'volume' are not the same as expected with this "
                         "dataset! Expected: {}. Found: {}"
-                        .format(v, volume_groups))
-    if nb_features != f:
+                        .format(rv, sv))
+
+    if not set(rf).issubset(set(sf)):  # not a good verification but ok for now.
         logging.warning("Among subject's hdf5 groups with attributes 'type' "
                         "set as 'volume', some data to not have the same "
                         "number of features as expected for this dataset! "
-                        "Expected: {}. Found: {}".format(f, nb_features))
-    if streamline_groups != s:
+                        "Expected: {}. Found: {}".format(rf, sf))
+
+    if not set(rs).issubset(set(ss)):
         logging.warning("Subject's hdf5 groups with attributes 'type' set as "
                         "'streamlines' are not the same as expected with this "
                         "dataset! Expected: {}. Found: {}"
-                        .format(s, streamline_groups))
+                        .format(rs, ss))
 
 
 def prepare_groups_info(subject_id: str, hdf_file, ref_group_info=None):
@@ -88,13 +91,18 @@ def prepare_groups_info(subject_id: str, hdf_file, ref_group_info=None):
     (volume and streamlines groups names, number of features for volumes).
 
     If group_info is given, compare subject's information with database
-    expected information.
+    expected information. If subject has more information than the reference,
+    (ex, non-useful volume groups), they will be ignored.
+
+    Returns
+    -------
+    subject_group_info = (volume_groups, nb_features,
+                          streamline_groups, contains_connectivity)
     """
-    volume_groups, nb_features, streamline_groups, contains_connectivity = \
-        _find_groups_info_for_subj(hdf_file, subject_id)
+    subject_group_info = _find_groups_info_for_subj(hdf_file, subject_id)
 
     if ref_group_info is not None:
-        _compare_groups_info(volume_groups, nb_features, streamline_groups,
-                             ref_group_info)
+        _compare_groups_info(subject_group_info, ref_group_info)
+        return ref_group_info
 
-    return volume_groups, nb_features, streamline_groups, contains_connectivity
+    return subject_group_info
diff --git a/dwi_ml/data/dataset/multi_subject_containers.py b/dwi_ml/data/dataset/multi_subject_containers.py
index 1922f24c..ccd236d3 100644
--- a/dwi_ml/data/dataset/multi_subject_containers.py
+++ b/dwi_ml/data/dataset/multi_subject_containers.py
@@ -226,7 +226,6 @@ def load(self, hdf_handle: h5py.File, subj_id=None):
         Load all subjects for this subjset (either training, validation or
         testing).
         """
-
         # Checking if there are any subjects to load
         subject_keys = sorted(hdf_handle.attrs[self.set_name + '_subjs'])
         if subj_id is not None:
@@ -258,6 +257,9 @@ def load(self, hdf_handle: h5py.File, subj_id=None):
         lengths = [[] for _ in self.streamline_groups]
         lengths_mm = [[] for _ in self.streamline_groups]
 
+        ref_group_info = (self.volume_groups, self.nb_features,
+                          self.streamline_groups, self.contains_connectivity)
+
         # Using tqdm progress bar, load all subjects from hdf_file
         with logging_redirect_tqdm(loggers=[logging.root], tqdm_class=tqdm):
             for subj_id in tqdm(subject_keys, ncols=100, total=self.nb_subjects):
@@ -266,8 +268,7 @@ def load(self, hdf_handle: h5py.File, subj_id=None):
                 # calling this method.
                 logger.debug("     Creating subject '{}'.".format(subj_id))
                 subj_data = self._init_subj_from_hdf(
-                    hdf_handle, subj_id, self.volume_groups, self.nb_features,
-                    self.streamline_groups)
+                    hdf_handle, subj_id, ref_group_info)
 
                 # Add subject to the list
                 logger.debug("     Adding it to the list of subjects.")
@@ -325,16 +326,13 @@ def _build_empty_data_list(self):
         else:
             return SubjectsDataList(self.hdf5_file, logger)
 
-    def _init_subj_from_hdf(self, hdf_handle, subject_id, volume_groups,
-                            nb_features, streamline_groups):
+    def _init_subj_from_hdf(self, hdf_handle, subject_id, ref_group_info):
         if self.is_lazy:
             return LazySubjectData.init_single_subject_from_hdf(
-                subject_id, hdf_handle,
-                (volume_groups, nb_features, streamline_groups))
+                subject_id, hdf_handle, ref_group_info)
         else:
             return SubjectData.init_single_subject_from_hdf(
-                subject_id, hdf_handle,
-                (volume_groups, nb_features, streamline_groups))
+                subject_id, hdf_handle, ref_group_info)
 
 
 class MultiSubjectDataset:
@@ -350,7 +348,7 @@ class MultiSubjectDataset:
               'streamlines/lengths', 'streamlines/euclidean_lengths'.
     """
     def __init__(self, hdf5_file: str, lazy: bool,
-                 cache_size: int = 0, log_level=logging.root.level):
+                 cache_size: int = 0, log_level=None):
         """
         Params
         ------
@@ -369,7 +367,8 @@ def __init__(self, hdf5_file: str, lazy: bool,
         # Dataset info
         self.hdf5_file = hdf5_file
 
-        logger.setLevel(log_level)
+        if log_level is not None:
+            logger.setLevel(log_level)
 
         self.volume_groups = []  # type: List[str]
         self.nb_features = []  # type: List[int]
@@ -451,12 +450,14 @@ def load_data(self, load_training=True, load_validation=True,
             (poss_volume_groups, nb_features, poss_strea_groups,
              contains_connectivity) = prepare_groups_info(
                 one_subj, hdf_handle, ref_group_info=None)
-            logger.info("        Possible volume groups are: {}"
-                        .format(poss_volume_groups))
-            logger.info("        Number of features in each of these groups: "
-                        "{}".format(nb_features))
-            logger.info("        Possible streamline groups are: {}"
-                        .format(poss_strea_groups))
+            logger.debug("Possible volume groups are: {}"
+                         .format(poss_volume_groups))
+            logger.debug("Number of features in each of these groups: {}"
+                         .format(nb_features))
+            logger.debug("Possible streamline groups are: {}"
+                         .format(poss_strea_groups))
+            logger.debug("Streamline groups containing a connectivity matrix: "
+                         "{}".format(contains_connectivity))
 
             # Verifying groups of interest
             if volume_groups is not None:
@@ -467,12 +468,12 @@ def load_data(self, load_training=True, load_validation=True,
                                      .format(missing_vol))
                 vol, indv, indposs = np.intersect1d(
                     volume_groups, poss_volume_groups, return_indices=True)
-                self.volume_groups = vol
+                self.volume_groups = list(vol)
                 self.nb_features = [nb_features[i] for i in indposs]
-                logger.info("Chosen volume groups are: {}"
+                logger.info("--> Chosen volume groups are: {}"
                             .format(self.volume_groups))
             else:
-                logger.info("Using all volume groups.")
+                logger.info("--> Using all volume groups.")
                 self.volume_groups = poss_volume_groups
                 self.nb_features = nb_features
 
@@ -484,11 +485,11 @@ def load_data(self, load_training=True, load_validation=True,
                                      .format(missing_str))
                 self.streamline_groups, _, ind = np.intersect1d(
                     streamline_groups, poss_strea_groups, return_indices=True)
-                logger.info("Chosen streamline groups are: {}"
+                logger.info("--> Chosen streamline groups are: {}"
                             .format(self.streamline_groups))
                 self.streamlines_contain_connectivity = contains_connectivity[ind]
             else:
-                logger.info("Using all streamline groups.")
+                logger.info("--> Using all streamline groups.")
                 self.streamline_groups = poss_strea_groups
                 self.streamlines_contain_connectivity = contains_connectivity
 
diff --git a/dwi_ml/data/dataset/single_subject_containers.py b/dwi_ml/data/dataset/single_subject_containers.py
index 8d057c99..b4d17096 100644
--- a/dwi_ml/data/dataset/single_subject_containers.py
+++ b/dwi_ml/data/dataset/single_subject_containers.py
@@ -94,8 +94,8 @@ def init_single_subject_from_hdf(
         """
         Instantiating a single subject data: load info and use __init__
         """
-        volume_groups, nb_features, streamline_groups = prepare_groups_info(
-            subject_id, hdf_file, group_info)
+        (volume_groups, nb_features, streamline_groups, _) = \
+            prepare_groups_info(subject_id, hdf_file, group_info)
 
         subject_mri_data_list = []
         subject_sft_data_list = []
diff --git a/dwi_ml/models/main_models.py b/dwi_ml/models/main_models.py
index 74a8f928..0d6e6f7b 100644
--- a/dwi_ml/models/main_models.py
+++ b/dwi_ml/models/main_models.py
@@ -10,6 +10,7 @@
 import torch
 from torch import Tensor
 
+from dwi_ml.data.dataset.multi_subject_containers import MultisubjectSubset
 from dwi_ml.data.processing.volume.interpolation import \
     interpolate_volume_in_neighborhood
 from dwi_ml.data.processing.space.neighborhood import prepare_neighborhood_vectors
@@ -377,8 +378,8 @@ def forward(self, inputs, target_streamlines: List[torch.tensor], **kw):
 
 
 class MainModelOneInput(MainModelAbstract):
-    def prepare_batch_one_input(self, streamlines, subset, subj,
-                                input_group_idx, prepare_mask=False):
+    def prepare_batch_one_input(self, streamlines, subset: MultisubjectSubset,
+                                subj, input_group_idx, prepare_mask=False):
         """
         These params are passed by either the batch loader or the propagator,
         which manage the data.
diff --git a/dwi_ml/models/projects/learn2track_model.py b/dwi_ml/models/projects/learn2track_model.py
index 74391293..1062a366 100644
--- a/dwi_ml/models/projects/learn2track_model.py
+++ b/dwi_ml/models/projects/learn2track_model.py
@@ -234,6 +234,13 @@ def forward(self, inputs: List[torch.tensor],
         if self._context is None:
             raise ValueError("Please set context before usage.")
 
+        # Verifying the first input
+        assert inputs[0].shape[-1] == self.input_size, \
+            "Not the expected input size! Should be {} (i.e. {} features for " \
+            "each {} neighbor), but got {} (input shape {})." \
+            .format(self.input_size, self.nb_features, self.nb_neighbors + 1,
+                    inputs[0].shape[-1], inputs[0].shape)
+
         # Making sure we can use default 'enforce_sorted=True' with packed
         # sequences.
         unsorted_indices = None

From ebfee3b7e46576546195dfb4ff085ed497a51e49 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Tue, 27 Jun 2023 16:11:43 -0400
Subject: [PATCH 12/13] Visualizing connectivity matrix's blocs

---
 dwi_ml/data/hdf5/hdf5_creation.py             |  3 +-
 .../processing/streamlines/post_processing.py | 11 ++--
 scripts_python/dwiml_create_hdf5_dataset.py   |  8 ++-
 .../dwiml_divide_volume_into_blocs.py         | 61 +++++++++++++++++++
 4 files changed, 73 insertions(+), 10 deletions(-)
 create mode 100644 scripts_python/dwiml_divide_volume_into_blocs.py

diff --git a/dwi_ml/data/hdf5/hdf5_creation.py b/dwi_ml/data/hdf5/hdf5_creation.py
index 3ed62346..728bada9 100644
--- a/dwi_ml/data/hdf5/hdf5_creation.py
+++ b/dwi_ml/data/hdf5/hdf5_creation.py
@@ -163,7 +163,8 @@ def __init__(self, root_folder: Path, out_hdf_filename: Path,
             if isinstance(downsampled_size_for_connectivity, List):
                 assert len(downsampled_size_for_connectivity) == 3, \
                     "Expecting to work with 3D volumes. Expecting " \
-                    "connectivity downsample size to be a list of 3 values."
+                    "connectivity downsample size to be a list of 3 values, " \
+                    "but got {}.".format(downsampled_size_for_connectivity)
                 self.connectivity_downsample_size = downsampled_size_for_connectivity
             else:
                 assert isinstance(downsampled_size_for_connectivity, int), \
diff --git a/dwi_ml/data/processing/streamlines/post_processing.py b/dwi_ml/data/processing/streamlines/post_processing.py
index 342c88fe..c31f6e14 100644
--- a/dwi_ml/data/processing/streamlines/post_processing.py
+++ b/dwi_ml/data/processing/streamlines/post_processing.py
@@ -272,12 +272,11 @@ def compute_triu_connectivity(
     volume_size: list
         The 3D dimension of the reference volume.
     downsampled_volume_size:
-        Either a 3D size or the size m of the m x m x m downsampled volume
-        coordinates for the connectivity matrix. This means that the matrix
-        will be a m^d x m^d triangular matrix. In 3D, with 20x20x20, this is an
-        8000 x 8000 matrix (triangular = half of it in memory). It probably
-        contains a lot of zeros with the background being included. Saved as
-        sparse.
+        The m1 x m2 x m3 = mm downsampled volume size for the connectivity matrix.
+        This means that the matrix will be a mm x mm triangular matrix.
+        In 3D, with 20x20x20, this is an 8000 x 8000 matrix (triangular). It
+        probably contains a lot of zeros with the background being included.
+        Can be saved as sparse.
     binary: bool
         If true, return a binary matrix.
     to_sparse_tensor:
diff --git a/scripts_python/dwiml_create_hdf5_dataset.py b/scripts_python/dwiml_create_hdf5_dataset.py
index a0445178..1902548f 100644
--- a/scripts_python/dwiml_create_hdf5_dataset.py
+++ b/scripts_python/dwiml_create_hdf5_dataset.py
@@ -75,9 +75,11 @@ def main():
     assert_outputs_exist(p, args, args.out_hdf5_file)
 
     # Default value with arparser '+' not possible. Setting manually.
-    if args.compute_connectivity_matrix and \
-            args.connectivity_downsample_size is None:
-        args.connectivity_downsample_size = 20
+    if args.compute_connectivity_matrix:
+        if args.connectivity_downsample_size is None:
+            args.connectivity_downsample_size = 20
+        elif len(args.connectivity_downsample_size) == 1:
+            args.connectivity_downsample_size = args.connectivity_downsample_size[0]
 
     # Prepare creator and load config file.
     creator = prepare_hdf5_creator(args)
diff --git a/scripts_python/dwiml_divide_volume_into_blocs.py b/scripts_python/dwiml_divide_volume_into_blocs.py
new file mode 100644
index 00000000..c5b3b472
--- /dev/null
+++ b/scripts_python/dwiml_divide_volume_into_blocs.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import argparse
+
+import nibabel as nib
+import numpy as np
+
+from scilpy.io.utils import assert_inputs_exist, assert_outputs_exist, \
+    add_overwrite_arg
+
+
+def _build_arg_parser():
+    p = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
+    p.add_argument('in_image', metavar='IN_FILE',
+                   help='Input file name, in nifti format.')
+
+    p.add_argument(
+        'out', metavar='OUT_FILE', dest='out_filename',
+        help='name of the output file, which will be saved as a text file.')
+
+    add_overwrite_arg(p)
+
+    return p
+
+
+def color_mri_connectivity_blocs(downsampled_volume_size, volume_size):
+
+    # For tracking coordinates: we can work with float.
+    # Here, dividing as ints.
+    volume_size = np.asarray(volume_size)
+    downsampled_volume_size = np.asarray(downsampled_volume_size)
+    sizex, sizey, sizez = (volume_size / downsampled_volume_size).astype(int)
+    print("Coloring into blocs of size: ", sizex, sizey, sizez)
+
+    final_volume = np.zeros(volume_size)
+    for i in range(downsampled_volume_size[0]):
+        for j in range(downsampled_volume_size[1]):
+            for k in range(downsampled_volume_size[2]):
+                final_volume[i*sizex: (i+1)*sizex,
+                             j*sizey: (j+1)*sizey,
+                             k*sizez: (k+1)*sizez] = i + 10*j + 100*k
+
+    return final_volume
+
+
+def main():
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+
+    assert_inputs_exist(parser, args.in_image)
+    assert_outputs_exist(parser, args, required=args.out_filename)
+    
+    volume = nib.load(args.in_image)
+    final_volume = color_mri_connectivity_blocs([6, 6, 6], volume.shape)
+    img = nib.Nifti1Image(final_volume, volume.affine)
+    nib.save(img, args.out_filename)
+
+
+if __name__ == '__main__':
+    main()

From f0384f4575273cebb41849a4a26e1a0da8a24e39 Mon Sep 17 00:00:00 2001
From: EmmaRenauld <emmanuelle.renauld@usherbrooke.ca>
Date: Fri, 30 Jun 2023 11:06:03 -0400
Subject: [PATCH 13/13] Fixes from rebasing. All tests passing. L2T working
 correctly.

---
 dwi_ml/testing/testers.py                       | 5 +++--
 dwi_ml/training/projects/learn2track_trainer.py | 2 +-
 dwi_ml/training/projects/transformer_trainer.py | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/dwi_ml/testing/testers.py b/dwi_ml/testing/testers.py
index 47604d07..5d3d37a7 100644
--- a/dwi_ml/testing/testers.py
+++ b/dwi_ml/testing/testers.py
@@ -81,8 +81,9 @@ def load_and_format_data(self, subj_id, hdf5_file, subset_name):
         # we don't verify streamline ids (loading all), and we don't split /
         # reverse streamlines. But we resample / compress.
         logging.info("Loading its streamlines as SFT.")
-        streamline_group_idx = self.subset.streamline_groups.index(
-            self.streamlines_group)
+        streamline_group_idx, = np.where(self.subset.streamline_groups ==
+                                         self.streamlines_group)
+        streamline_group_idx = streamline_group_idx[0]
         subj_data = self.subset.subjs_data_list.get_subj_with_handle(self.subj_idx)
         subj_sft_data = subj_data.sft_data_list[streamline_group_idx]
         sft = subj_sft_data.as_sft()
diff --git a/dwi_ml/training/projects/learn2track_trainer.py b/dwi_ml/training/projects/learn2track_trainer.py
index 37dcc5e3..2ef066d9 100644
--- a/dwi_ml/training/projects/learn2track_trainer.py
+++ b/dwi_ml/training/projects/learn2track_trainer.py
@@ -7,8 +7,8 @@
 import torch
 
 from dwi_ml.models.projects.learn2track_model import Learn2TrackModel
+from dwi_ml.tracking.projects.utils import prepare_tracking_mask
 from dwi_ml.tracking.propagation import propagate_multiple_lines
-from dwi_ml.tracking.utils import prepare_tracking_mask
 from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput
 
diff --git a/dwi_ml/training/projects/transformer_trainer.py b/dwi_ml/training/projects/transformer_trainer.py
index 12e7bd14..4bb0918e 100644
--- a/dwi_ml/training/projects/transformer_trainer.py
+++ b/dwi_ml/training/projects/transformer_trainer.py
@@ -5,8 +5,9 @@
 import h5py
 import numpy as np
 import torch
+
+from dwi_ml.tracking.projects.utils import prepare_tracking_mask
 from dwi_ml.tracking.propagation import propagate_multiple_lines
-from dwi_ml.tracking.utils import prepare_tracking_mask
 
 from dwi_ml.training.with_generation.trainer import \
     DWIMLTrainerForTrackingOneInput