Merge pull request #173 from uio-bmi/bug_fixes

Bug fixes
uio-bmi · May 20, 2024 · 480d6bb · 480d6bb
2 parents cf72dfc + eb92e01
commit 480d6bb
Show file tree

Hide file tree

Showing 13 changed files with 57 additions and 31 deletions.
diff --git a/immuneML/config/default_params/instructions/dataset_export.yaml b/immuneML/config/default_params/instructions/dataset_export.yaml
@@ -0,0 +1,2 @@
+number_of_processes: 4
+export_formats: [AIRR]
diff --git a/immuneML/data_model/dataset/ElementDataset.py b/immuneML/data_model/dataset/ElementDataset.py
@@ -117,7 +117,7 @@ def make_subset(self, example_indices, path, dataset_type: str):
     def get_label_names(self):
         """Returns the list of metadata fields which can be used as labels"""
         return [label for label in list(self.labels.keys()) if
-                label not in ['region_type', 'receptor_chains', 'organism']] if self.labels else []
+                label not in ['region_type', 'receptor_chains', 'organism', 'type_dict']] if self.labels else []
 
     def clone(self, keep_identifier: bool = False):
         raise NotImplementedError

diff --git a/immuneML/data_model/dataset/RepertoireDataset.py b/immuneML/data_model/dataset/RepertoireDataset.py
@@ -107,7 +107,7 @@ def get_label_names(self, refresh=False):
         """Returns the list of metadata fields which can be used as labels; if refresh=True, it reloads the fields
         from disk"""
         all_metadata_fields = set(self.get_metadata_fields(refresh))
-        for non_label in ["subject_id", "filename", "repertoire_id", "identifier"]:
+        for non_label in ["subject_id", "filename", "repertoire_id", "identifier", "type_dict"]:
             if non_label in all_metadata_fields:
                 all_metadata_fields.remove(non_label)
 

diff --git a/immuneML/data_model/receptor/ElementGenerator.py b/immuneML/data_model/receptor/ElementGenerator.py
@@ -48,7 +48,7 @@ def _load_batch(self, current_file: int, return_objects: bool = True):
                 elements = bnp_data
         except ValueError as error:
             raise ValueError(f'{ElementGenerator.__name__}: an error occurred while creating an object from tsv file. '
-                             f'Details: {error}')
+                             f'Details: {error}').with_traceback(error.__traceback__)
 
         return elements
 

diff --git a/immuneML/dsl/ObjectParser.py b/immuneML/dsl/ObjectParser.py
@@ -66,6 +66,6 @@ def parse_object(specs, valid_class_names: list, class_name_ending: str, class_p
         except TypeError as err:
             raise AssertionError(f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} "
                                  f"under key {key}. Valid parameter names are: "
-                                 f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}")
+                                 f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}").with_traceback(err.__traceback__)
 
         return (obj, {class_name: params}) if return_params_dict else obj
diff --git a/immuneML/dsl/import_parsers/ImportParser.py b/immuneML/dsl/import_parsers/ImportParser.py
@@ -63,9 +63,9 @@ def parse_dataset(key: str, dataset_specs: dict, result_path: Path) -> Dataset:
             raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
                            f"The keyword {key_error.args[0]} was missing. This either means this argument was "
                            f"not defined under definitions/datasets/{key}/params, or this column was missing from "
-                           f"an input data file. ")
+                           f"an input data file. ").with_traceback(key_error.__traceback__)
         except Exception as ex:
-            raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.")
+            raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.").with_traceback(ex.__traceback__)
 
         return dataset
 
@@ -88,6 +88,6 @@ def _prepare_params(dataset_specs: dict, result_path: Path, dataset_name: str):
 
     @staticmethod
     def log_dataset_info(dataset: Dataset):
-        print_log(f"\nImported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n"
-                  f"Example count: {dataset.get_example_count()}\n"
-                  f"Labels: {dataset.get_label_names()}", True)
+        print_log(f"Imported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n"
+                  f"- Example count: {dataset.get_example_count()}\n"
+                  f"- Labels: {dataset.get_label_names()}", True)
diff --git a/immuneML/dsl/instruction_parsers/TrainMLModelParser.py b/immuneML/dsl/instruction_parsers/TrainMLModelParser.py
@@ -139,7 +139,7 @@ def _parse_settings(self, instruction: dict, symbol_table: SymbolTable) -> list:
                 settings.append(s)
             return settings
         except KeyError as key_error:
-            raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.")
+            raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.") from key_error
 
     def _prepare_path(self, instruction: dict) -> Path:
         if "path" in instruction:
@@ -192,7 +192,7 @@ def _parse_split_config(self, instruction_key, instruction: dict, split_key: str
                                if "leave_one_out_config" in instruction[split_key] else None)
 
         except KeyError as key_error:
-            raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.")
+            raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.") from key_error
 
     def _prepare_report_config(self, instruction_key, instruction, split_key, symbol_table):
         if "reports" in instruction[split_key] and len(instruction[split_key]["reports"]) > 0:

diff --git a/immuneML/ml_metrics/ClassificationMetric.py b/immuneML/ml_metrics/ClassificationMetric.py
@@ -18,8 +18,8 @@ class ClassificationMetric(Enum):
     def get_metric(metric_name: str):
         try:
             return ClassificationMetric[metric_name.upper()]
-        except KeyError:
-            raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}")
+        except KeyError as e:
+            raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}").with_traceback(e.__traceback__)
 
     @staticmethod
     def get_search_criterion(metric):

diff --git a/immuneML/util/CompAIRRHelper.py b/immuneML/util/CompAIRRHelper.py
@@ -46,7 +46,7 @@ def check_compairr_path(compairr_path):
         except Exception as e:
             raise Exception(f"CompAIRRHelper: failed to call CompAIRR: {e}\n"
                             f"Please ensure the correct version of CompAIRR has been installed (version {required_major}.{required_minor}.{required_patch} or later), "
-                            f"or provide the path to the CompAIRR executable.")
+                            f"or provide the path to the CompAIRR executable.").with_traceback(e.__traceback__)
 
         return compairr_path
 

diff --git a/immuneML/util/ImportHelper.py b/immuneML/util/ImportHelper.py
@@ -86,7 +86,7 @@ def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset
         except Exception as e:
             raise Exception(f"{e}\nAn error occurred while reading in the metadata file {params.metadata_file}. Please "
                             f"see the error log above for more details on this error and the documentation for the "
-                            f"expected format of the metadata.")
+                            f"expected format of the metadata.").with_traceback(e.__traceback__)
 
         ParameterValidator.assert_keys_present(metadata.columns.tolist(), ["filename"], ImportHelper.__name__,
                                                f'{dataset_name}: params: metadata_file')
@@ -142,7 +142,7 @@ def load_repertoire_as_object(import_class, metadata_row, params: DatasetImportP
             return repertoire
         except Exception as exception:
             raise RuntimeError(
-                f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}") from exception
+                f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}").with_traceback(exception.__traceback__)
 
     @staticmethod
     def load_sequence_dataframe(filepath, params, alternative_load_func=None):
@@ -156,7 +156,7 @@ def load_sequence_dataframe(filepath, params, alternative_load_func=None):
                 f"{ex}\n\nImportHelper: an error occurred during dataset import while parsing the input file: {filepath}.\n"
                 f"Please make sure this is a correct immune receptor data file (not metadata).\n"
                 f"The parameters used for import are {params}.\nFor technical description of the error, see the log above. "
-                f"For details on how to specify the dataset import, see the documentation.")
+                f"For details on how to specify the dataset import, see the documentation.").with_traceback(ex.__traceback__)
 
         ImportHelper.rename_dataframe_columns(df, params)
         ImportHelper.standardize_none_values(df)

diff --git a/immuneML/util/Logger.py b/immuneML/util/Logger.py
@@ -14,7 +14,7 @@ def wrapped(*args, **kwargs):
                     raise Exception(f"{e}\n\n"
                                     f"ImmuneMLParser: an error occurred during parsing in function {func.__name__} "
                                     f" with parameters: {args}.\n\nFor more details on how to write the specification, "
-                                    f"see the documentation. For technical description of the error, see the log above.")
+                                    f"see the documentation. For technical description of the error, see the log above.").with_traceback(e.__traceback__)
                 else:
                     raise e
         finally:

diff --git a/scripts/check_new_encoder.py b/scripts/check_new_encoder.py
@@ -1,4 +1,10 @@
 import argparse
+import sys
+
+# Ensure the immuneML/ project 'root dir' is added to sys.path
+# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
+# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
+sys.path.extend([".", "../"])
 
 from scripts.checker_util import *
 from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset
@@ -17,11 +23,15 @@
 
 def parse_commandline_arguments(args):
     parser = argparse.ArgumentParser(description="Tool for testing new immuneML DatasetEncoder classes")
-    parser.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
-    parser.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
-    parser.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")
-    parser.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
-    parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")
+
+    usage_args = parser.add_argument_group('usage arguments')
+    usage_args.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
+    usage_args.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
+    usage_args.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")
+
+    logging_args = parser.add_argument_group('logging arguments')
+    logging_args.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_encoder_log.txt').")
+    logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")
 
     return parser.parse_args(args)
 
@@ -45,7 +55,7 @@ def check_encoded_data(encoded_data, dummy_dataset, base_class_name):
     assert isinstance(encoded_data, EncodedData), f"Error: expected the .encoded_data field of the output dataset to be an EncodedData object, found {encoded_data.__class__.__name__}"
 
     assert encoded_data.examples is not None, f"Error: EncodedData.examples is None, but should be a numeric matrix with a number of rows equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
-    assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
+    assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples ({encoded_data.examples.shape[0]}) must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
 
     assert encoded_data.example_ids == dummy_dataset.get_example_ids(), f"Error: EncodedData.example_ids must match the original dataset: {dummy_dataset.get_example_ids()}, found {encoded_data.example_ids}"
     assert encoded_data.encoding == base_class_name, f"Error: EncodedData.encoding must be set to the base class name ('{base_class_name}'), found {encoded_data.encoding}"

diff --git a/scripts/check_new_ml_method.py b/scripts/check_new_ml_method.py
@@ -1,23 +1,32 @@
+import sys
 import argparse
 import random
-
 import numpy as np
 
+# Ensure the immuneML/ project 'root dir' is added to sys.path
+# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
+# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
+sys.path.extend([".", "../"])
+
+from scripts.checker_util import *
 from immuneML.data_model.encoded_data.EncodedData import EncodedData
 from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader
 from immuneML.environment.EnvironmentSettings import EnvironmentSettings
 from immuneML.environment.Label import Label
 from immuneML.ml_methods.classifiers.MLMethod import MLMethod
 from immuneML.util.ReflectionHandler import ReflectionHandler
-from scripts.checker_util import *
 
 
 def parse_commandline_arguments(args):
     parser = argparse.ArgumentParser(description="Tool for testing new immuneML MLMethod classes")
-    parser.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
-    parser.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")
-    parser.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
-    parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")
+
+    usage_args = parser.add_argument_group('usage arguments')
+    usage_args.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
+    usage_args.add_argument("-p", "--no_default_parameters",  action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")
+
+    logging_args = parser.add_argument_group('logging arguments')
+    logging_args.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_ml_method_log.txt').")
+    logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")
 
     return parser.parse_args(args)
 
@@ -55,12 +64,17 @@ def check_methods(ml_method_instance):
     assert MLMethod._assert_matching_label == ml_method_instance.__class__._assert_matching_label, mssg.format("_assert_matching_label", ml_method_instance.__class__._assert_matching_label)
     assert MLMethod.predict == ml_method_instance.__class__.predict, mssg.format("predict", ml_method_instance.__class__.predict)
     assert MLMethod.predict_proba == ml_method_instance.__class__.predict_proba, mssg.format("predict_proba", ml_method_instance.__class__.predict_proba)
-    assert MLMethod.check_encoder_compatibility == ml_method_instance.__class__.check_encoder_compatibility, mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)
     assert MLMethod.get_feature_names == ml_method_instance.__class__.get_feature_names, mssg.format("get_feature_names", ml_method_instance.__class__.get_feature_names)
     assert MLMethod.get_label_name == ml_method_instance.__class__.get_label_name, mssg.format("get_label_name", ml_method_instance.__class__.get_label_name)
     assert MLMethod.get_classes == ml_method_instance.__class__.get_classes, mssg.format("get_classes", ml_method_instance.__class__.get_classes)
     assert MLMethod.get_positive_class == ml_method_instance.__class__.get_positive_class, mssg.format("get_positive_class", ml_method_instance.__class__.get_positive_class)
 
+    if MLMethod.check_encoder_compatibility != ml_method_instance.__class__.check_encoder_compatibility:
+        logging.warning(f"class method 'check_encoder_compatibility' was overwritten from MLMethod. Please ensure this was intentional (for example: if more than just the Encoder type needs to be checked). ")
+
+        # , mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)
+
+
     check_base_vs_instance_methods(MLMethod, ml_method_instance)
 
     compatible_encoders = ml_method_instance.get_compatible_encoders()