Merge pull request #227 from EmmaRenauld/send_log_to_comet

Send log to comet
scil-vital · Mar 12, 2024 · ad2ba48 · ad2ba48
2 parents bf8d2aa + 1a1e8eb
commit ad2ba48
Show file tree

Hide file tree

Showing 14 changed files with 287 additions and 26 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -5,22 +5,29 @@
 # Main dependency: scilpy
 #    Scilpy and comet_ml both require requests. In comet: >=2.18.*,
 #    which installs a version >2.28. Adding request version explicitely.
+#
+#    Changed many times the scilpy version. Tried to have latest fixed version,
+#    but currently, working on beluga only when installing manually. But using
+#    the master is not good; changes too fast for us. Using a fixed commit for
+#    now.
+#    To using a commit preceding all recent changes in scilpy's test data
+#    management: d20d3d4917d40f698aa975f64a575dda34e0c89c
 # -------
 requests==2.28.*
-scilpy @ git+https://github.com/scilus/scilpy.git@1.7.0-dev
-
+-e git+https://github.com/scilus/scilpy.git@d20d3d4917d40f698aa975f64a575dda34e0c89c#egg=scilpy
 
 # -------
 # Other important dependencies
 # -------
 bertviz~=1.4.0  # For transformer's visu
 torch==1.13.*
 tqdm==4.64.*
-comet-ml==3.21.*
+comet-ml>=3.22.0
 contextlib2==21.6.0
 jupyterlab>=3.6.2  # For transformer's visu
 IProgress>=0.4     # For jupyter with tdqm
 nested_lookup==0.2.25  # For lists management
+pynvml>=11.5.0
 
 # -------
 # Necessary but should be installed with scilpy (Last check: 01/2024):

diff --git a/scripts_python/dwiml_compute_connectivity_matrix_from_blocs.py b/scripts_python/dwiml_compute_connectivity_matrix_from_blocs.py
@@ -10,8 +10,9 @@
 from dipy.io.streamline import save_tractogram
 from dipy.io.utils import is_header_compatible
 
+from scilpy.io.streamlines import load_tractogram_with_reference
 from scilpy.io.utils import assert_inputs_exist, assert_outputs_exist, \
-    load_tractogram_with_reference, add_verbose_arg, add_overwrite_arg
+    add_verbose_arg, add_overwrite_arg
 
 from dwi_ml.data.hdf5.utils import format_nb_blocs_connectivity
 from dwi_ml.data.processing.streamlines.post_processing import \

diff --git a/scripts_python/dwiml_compute_connectivity_matrix_from_labels.py b/scripts_python/dwiml_compute_connectivity_matrix_from_labels.py
@@ -17,8 +17,9 @@
 from dipy.io.utils import is_header_compatible
 from scilpy.image.labels import get_data_as_labels
 
+from scilpy.io.streamlines import load_tractogram_with_reference
 from scilpy.io.utils import assert_inputs_exist, assert_outputs_exist, \
-    load_tractogram_with_reference, add_verbose_arg, add_overwrite_arg
+    add_verbose_arg, add_overwrite_arg
 
 from dwi_ml.data.processing.streamlines.post_processing import \
     find_streamlines_with_chosen_connectivity, \

diff --git a/scripts_python/dwiml_compute_connectivity_score.py b/scripts_python/dwiml_compute_connectivity_score.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import argparse
+import logging
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -15,7 +16,8 @@ def _build_arg_parser():
                    help="Matrix file (reference). .npy")
     p.add_argument('scored_matrix',
                    help="Matrix being scored. .npy")
-
+    p.add_argument('--show_now', action='store_true',
+                   help="If set, shows the matrix with matplotlib.")
     add_overwrite_arg(p)
 
     return p
@@ -89,7 +91,11 @@ def main():
     axs[2].imshow((ref.astype(int) - m.astype(int))**2)
     plt.suptitle("Binary")
 
-    plt.show()
+    if args.show_now:
+        plt.show()
+    else:
+        logging.warning("Saving of figure not implemented yet! Use --show_now "
+                        "to see the figure.")
 
 
 if __name__ == '__main__':

diff --git a/scripts_python/dwiml_divide_volume_into_blocs.py b/scripts_python/dwiml_divide_volume_into_blocs.py
@@ -8,16 +8,20 @@
 from scilpy.io.utils import assert_inputs_exist, assert_outputs_exist, \
     add_overwrite_arg
 
+from dwi_ml.data.hdf5.utils import format_nb_blocs_connectivity
+
 
 def _build_arg_parser():
     p = argparse.ArgumentParser(
         description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
     p.add_argument('in_image', metavar='IN_FILE',
                    help='Input file name, in nifti format.')
-
-    p.add_argument(
-        'out_filename',
-        help='name of the output file, which will be saved as a text file.')
+    p.add_argument('out_filename',
+                   help='name of the output file, which will be saved as a '
+                        'text file.')
+    p.add_argument('nb_blocs', nargs='+', type=int,
+                   help="Number of blocs. Either a single int, or a list of "
+                        "3 values.")
 
     add_overwrite_arg(p)
 
@@ -48,11 +52,18 @@ def main():
     parser = _build_arg_parser()
     args = parser.parse_args()
 
+    # Checks
     assert_inputs_exist(parser, args.in_image)
     assert_outputs_exist(parser, args, required=args.out_filename)
+    args.nb_blocs = format_nb_blocs_connectivity(args.nb_blocs)
 
+    # Loading
     volume = nib.load(args.in_image)
-    final_volume = color_mri_connectivity_blocs([6, 6, 6], volume.shape)
+
+    # Processing
+    final_volume = color_mri_connectivity_blocs(args.nb_blocs, volume.shape)
+
+    # Saving
     img = nib.Nifti1Image(final_volume, volume.affine)
     nib.save(img, args.out_filename)
 

diff --git a/scripts_python/dwiml_send_value_to_comet_from_log.py b/scripts_python/dwiml_send_value_to_comet_from_log.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Sends all values to comet from a log (expecting the log to be one value per
+epoch). Note that the comet experiment is found by reading the experiment's
+checkpoing (must exist).
+
+USE WITH CARE. This cannot be undone.
+"""
+import argparse
+import glob
+import logging
+import os
+from time import sleep
+
+import numpy as np
+from comet_ml import ExistingExperiment
+from scilpy.io.utils import assert_inputs_exist
+
+from dwi_ml.training.trainers import DWIMLAbstractTrainer
+
+
+def _build_arg_parser():
+    p = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
+    p.add_argument('experiments_path',
+                   help="Path to the experiments folder.")
+    p.add_argument('experiment_name',
+                   help="Name of the experiment.")
+    p.add_argument('--logs', nargs='+',
+                   help="Log file (s). Expected to be located inside the "
+                        "experiments_path. If not set, uses all logs.")
+    p.add_argument('--metric_names', nargs='+',
+                   help="Comet.ml's metric name(s). Must contain the same "
+                        "number of inputs as --logs.\n"
+                        "If not set, we will suggest you the probable name(s) "
+                        "but we will not run the script.")
+    p.add_argument('--use_suggested_name', action='store_true',
+                   help="If set and --metric_name is not set, will run with "
+                        "the suggested name(s).")
+    p.add_argument('--use_best', action='store_true',
+                   help="If set, uses only the best value in segment [0, t] "
+                        "as value at time t. (Best = lowest).")
+
+    return p
+
+
+def main():
+    parser = _build_arg_parser()
+    args = parser.parse_args()
+
+    # Verifications
+    experiment_path = os.path.join(args.experiments_path, args.experiment_name)
+    if not os.path.isdir(experiment_path):
+        parser.error('Experiment does not exist: {}'.format(experiment_path))
+
+    # Prepare logs, with possible wildcards.
+    log_path = os.path.join(str(experiment_path), 'logs')
+    if args.logs is None:
+        log_paths = glob.glob(os.path.join(log_path, '*'))
+    else:
+        log_paths = [os.path.join(log_path, file) for file in args.logs]
+
+    assert_inputs_exist(parser, log_paths)
+
+    # Prepare associated metric names (if not given)
+    if args.metric_names is None:
+        args.metric_names = []
+        if args.use_best:
+            print("Not sure what to suggest you for --metric_name with "
+                  "option --use_best. Probably 'best_loss'!?")
+            args.use_suggested_name = False
+        else:
+            for log in log_paths:
+                # Based on current implementation of things:
+                # - comet adds train_ or validate_ as prefix
+                # - we add _per_epoch.npy as suffix to the log name.
+                base_name = os.path.basename(log)
+                metric_name, _ = os.path.splitext(base_name)
+
+                # Add comet prefix
+                if 'tracking' in metric_name or 'valid' in metric_name:
+                    metric_name = 'validate_' + metric_name
+                elif 'training' in metric_name:
+                    metric_name = 'train_' + metric_name
+
+                # Remove our suffix
+                if metric_name[-10:] == '_per_epoch':
+                    metric_name = metric_name[:-10]
+
+                print("Suggested --metric_name for log {}: {}"
+                      .format(base_name, metric_name))
+                args.metric_names.append(metric_name)
+
+        # Possibly stop now
+        if not args.use_suggested_name:
+            return
+
+    # Verify
+    if not len(args.metric_names) == len(log_paths):
+        parser.error("Expecting the same number of metrics_names (got {}) "
+                     "than logs (got {})."
+                     .format(len(args.metric_names), len(log_paths)))
+
+    # Loading comet from info in checkpoint
+    checkpoint_state = DWIMLAbstractTrainer.load_params_from_checkpoint(
+        args.experiments_path, args.experiment_name)
+    current_states = checkpoint_state['current_states']
+    comet_key = current_states['comet_key']
+
+    print("Found comet experiment: key {}. Loading.".format(comet_key))
+    comet_exp = ExistingExperiment(previous_experiment=comet_key)
+
+    for log, metric_name in zip(log_paths, args.metric_names):
+        # Loading the log
+        data = np.load(log)
+
+        # Note. There is comet_exp.metrics with the metrics previously logged.
+        # But seems to only contain metrics logged in current session.
+        # Now, metrics={}.
+        print("Will send values for all {} epochs to metric {}"
+              .format(len(data), metric_name))
+        best_value = np.inf
+        for t in range(len(data)):
+            value = data[t]
+            if args.use_best:
+                best_value = min(value, best_value)
+                value = best_value
+
+            # Send value
+            comet_exp.log_metric(metric_name, value, step=t)
+
+            # Rate limits:
+            # https://www.comet.com/docs/v2/api-and-sdk/python-sdk/warnings-errors/
+            # 10,000 per minute = 167 per second.
+            # If we wait 0.01 second, max 100 call per second, will help.
+            # Note. Sleep on Linux seems to allow 1ms sleep. On windows, could
+            # fail. https://python-forum.io/thread-17019.html
+            sleep(0.01)
+
+    print("Done!\n\n")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/...python/dwiml_send_value_manually_comet.py → ...hon/dwiml_send_value_to_comet_manually.py b/...python/dwiml_send_value_manually_comet.py → ...hon/dwiml_send_value_to_comet_manually.py
diff --git a/scripts_python/tests/test_compute_connectivity_matrix_from_labels.py b/scripts_python/tests/test_compute_connectivity_matrix_from_labels.py
@@ -5,3 +5,8 @@ def test_help_option(script_runner):
     ret = script_runner.run('dwiml_compute_connectivity_matrix_from_labels.py',
                             '--help')
     assert ret.success
+
+
+def test_execution(script_runner):
+    # Impossible for now, no labels file. Could use data from scilpy's tests.
+    pass
diff --git a/scripts_python/tests/test_compute_connectivity_score.py b/scripts_python/tests/test_compute_connectivity_score.py
@@ -1,8 +1,34 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+import os
+import tempfile
+
+from dwi_ml.unit_tests.utils.data_and_models_for_tests import \
+    fetch_testing_data
+
+data_dir = fetch_testing_data()
+tmp_dir = tempfile.TemporaryDirectory()
 
 
 def test_help_option(script_runner):
+    ret = script_runner.run('dwiml_compute_connectivity_score.py', '--help')
+    assert ret.success
+
+
+def test_execution(script_runner):
+    os.chdir(os.path.expanduser(tmp_dir.name))
+    dwi_ml_folder = os.path.join(data_dir, 'dwi_ml_ready', 'subjX')
+
+    # Currently no matrix in our tests. Creating one.
+    in_volume = os.path.join(dwi_ml_folder, 'anat', 't1.nii.gz')
+    streamlines = os.path.join(dwi_ml_folder, 'example_bundle', 'Fornix.trk')
+    matrix = 'matrix_connectivity.npy'
+
+    nb_blocs = '4'
+    script_runner.run('dwiml_compute_connectivity_matrix_from_blocs.py',
+                      in_volume, streamlines, matrix, nb_blocs, '--binary')
+
+    # Now scoring
     ret = script_runner.run('dwiml_compute_connectivity_score.py',
-                            '--help')
+                            matrix, matrix)
     assert ret.success
diff --git a/scripts_python/tests/test_divide_volume_into_blocs.py b/scripts_python/tests/test_divide_volume_into_blocs.py
@@ -1,8 +1,29 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+import os
+import tempfile
+
+from dwi_ml.unit_tests.utils.data_and_models_for_tests import \
+    fetch_testing_data
+
+data_dir = fetch_testing_data()
+tmp_dir = tempfile.TemporaryDirectory()
 
 
 def test_help_option(script_runner):
+    ret = script_runner.run('dwiml_divide_volume_into_blocs.py', '--help')
+    assert ret.success
+
+
+def test_run(script_runner):
+    os.chdir(os.path.expanduser(tmp_dir.name))
+
+    dwi_ml_folder = os.path.join(data_dir, 'dwi_ml_ready', 'subjX')
+    in_volume = os.path.join(dwi_ml_folder, 'anat', 't1.nii.gz')
+
+    out_file = 'volume_blocs.nii.gz'
+
+    nb_blocs = '4'
     ret = script_runner.run('dwiml_divide_volume_into_blocs.py',
-                            '--help')
+                            in_volume, out_file, nb_blocs)
     assert ret.success
diff --git a/scripts_python/tests/test_print_hdf5_architecture.py b/scripts_python/tests/test_print_hdf5_architecture.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+
+from dwi_ml.unit_tests.utils.data_and_models_for_tests import fetch_testing_data
+
+data_dir = fetch_testing_data()
+experiment_name = 'test_experiment'
+
+
+def test_help_option(script_runner):
+    ret = script_runner.run('dwiml_print_hdf5_architecture.py', '--help')
+    assert ret.success
+
+
+def test_execution(script_runner):
+    hdf5_file = os.path.join(data_dir, 'hdf5_file.hdf5')
+    ret = script_runner.run('dwiml_print_hdf5_architecture.py', hdf5_file)
+    assert ret.success
diff --git a/scripts_python/tests/test_send_value_to_comet_from_log.py b/scripts_python/tests/test_send_value_to_comet_from_log.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+def test_help_option(script_runner):
+    ret = script_runner.run('dwiml_send_value_to_comet_from_log.py', '--help')
+    assert ret.success
+
+
+def test_execution(script_runner):
+    # Impossible
+    pass
diff --git a/scripts_python/tests/test_send_value_to_comet_manually.py b/scripts_python/tests/test_send_value_to_comet_manually.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+def test_help_option(script_runner):
+    ret = script_runner.run('dwiml_send_value_to_comet_manually.py', '--help')
+    assert ret.success
+
+
+def test_execution(script_runner):
+    # Impossible
+    pass