user friendly updates to debugger scripts

uio-bmi · May 20, 2024 · eb92e01 · eb92e01
1 parent e42b724
commit eb92e01
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 13 deletions.
diff --git a/scripts/check_new_encoder.py b/scripts/check_new_encoder.py
@@ -1,4 +1,10 @@
 import argparse
+import sys
+
+# Ensure the immuneML/ project 'root dir' is added to sys.path
+# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
+# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
+sys.path.extend([".", "../"])
 
 from scripts.checker_util import *
 from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset
@@ -17,11 +23,15 @@
 
 def parse_commandline_arguments(args):
     parser = argparse.ArgumentParser(description="Tool for testing new immuneML DatasetEncoder classes")
-    parser.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
-    parser.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
-    parser.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")
-    parser.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
-    parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")
+
+    usage_args = parser.add_argument_group('usage arguments')
+    usage_args.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
+    usage_args.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
+    usage_args.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")
+
+    logging_args = parser.add_argument_group('logging arguments')
+    logging_args.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_encoder_log.txt').")
+    logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")
 
     return parser.parse_args(args)
 
@@ -45,7 +55,7 @@ def check_encoded_data(encoded_data, dummy_dataset, base_class_name):
     assert isinstance(encoded_data, EncodedData), f"Error: expected the .encoded_data field of the output dataset to be an EncodedData object, found {encoded_data.__class__.__name__}"
 
     assert encoded_data.examples is not None, f"Error: EncodedData.examples is None, but should be a numeric matrix with a number of rows equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
-    assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
+    assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples ({encoded_data.examples.shape[0]}) must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
 
     assert encoded_data.example_ids == dummy_dataset.get_example_ids(), f"Error: EncodedData.example_ids must match the original dataset: {dummy_dataset.get_example_ids()}, found {encoded_data.example_ids}"
     assert encoded_data.encoding == base_class_name, f"Error: EncodedData.encoding must be set to the base class name ('{base_class_name}'), found {encoded_data.encoding}"

diff --git a/scripts/check_new_ml_method.py b/scripts/check_new_ml_method.py
@@ -1,23 +1,32 @@
+import sys
 import argparse
 import random
-
 import numpy as np
 
+# Ensure the immuneML/ project 'root dir' is added to sys.path
+# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
+# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
+sys.path.extend([".", "../"])
+
+from scripts.checker_util import *
 from immuneML.data_model.encoded_data.EncodedData import EncodedData
 from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader
 from immuneML.environment.EnvironmentSettings import EnvironmentSettings
 from immuneML.environment.Label import Label
 from immuneML.ml_methods.classifiers.MLMethod import MLMethod
 from immuneML.util.ReflectionHandler import ReflectionHandler
-from scripts.checker_util import *
 
 
 def parse_commandline_arguments(args):
     parser = argparse.ArgumentParser(description="Tool for testing new immuneML MLMethod classes")
-    parser.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
-    parser.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")
-    parser.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
-    parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")
+
+    usage_args = parser.add_argument_group('usage arguments')
+    usage_args.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
+    usage_args.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")
+
+    logging_args = parser.add_argument_group('logging arguments')
+    logging_args.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_ml_method_log.txt').")
+    logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")
 
     return parser.parse_args(args)
 
@@ -55,12 +64,17 @@ def check_methods(ml_method_instance):
     assert MLMethod._assert_matching_label == ml_method_instance.__class__._assert_matching_label, mssg.format("_assert_matching_label", ml_method_instance.__class__._assert_matching_label)
     assert MLMethod.predict == ml_method_instance.__class__.predict, mssg.format("predict", ml_method_instance.__class__.predict)
     assert MLMethod.predict_proba == ml_method_instance.__class__.predict_proba, mssg.format("predict_proba", ml_method_instance.__class__.predict_proba)
-    assert MLMethod.check_encoder_compatibility == ml_method_instance.__class__.check_encoder_compatibility, mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)
     assert MLMethod.get_feature_names == ml_method_instance.__class__.get_feature_names, mssg.format("get_feature_names", ml_method_instance.__class__.get_feature_names)
     assert MLMethod.get_label_name == ml_method_instance.__class__.get_label_name, mssg.format("get_label_name", ml_method_instance.__class__.get_label_name)
     assert MLMethod.get_classes == ml_method_instance.__class__.get_classes, mssg.format("get_classes", ml_method_instance.__class__.get_classes)
     assert MLMethod.get_positive_class == ml_method_instance.__class__.get_positive_class, mssg.format("get_positive_class", ml_method_instance.__class__.get_positive_class)
 
+    if MLMethod.check_encoder_compatibility != ml_method_instance.__class__.check_encoder_compatibility:
+        logging.warning(f"class method 'check_encoder_compatibility' was overwritten from MLMethod. Please ensure this was intentional (for example: if more than just the Encoder type needs to be checked). ")
+
+        # , mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)
+
+
     check_base_vs_instance_methods(MLMethod, ml_method_instance)
 
     compatible_encoders = ml_method_instance.get_compatible_encoders()