diff --git a/immuneML/config/default_params/instructions/dataset_export.yaml b/immuneML/config/default_params/instructions/dataset_export.yaml new file mode 100644 index 000000000..daa3f1ef5 --- /dev/null +++ b/immuneML/config/default_params/instructions/dataset_export.yaml @@ -0,0 +1,2 @@ +number_of_processes: 4 +export_formats: [AIRR] \ No newline at end of file diff --git a/immuneML/data_model/dataset/ElementDataset.py b/immuneML/data_model/dataset/ElementDataset.py index 7946f52e2..83a1a306b 100644 --- a/immuneML/data_model/dataset/ElementDataset.py +++ b/immuneML/data_model/dataset/ElementDataset.py @@ -117,7 +117,7 @@ def make_subset(self, example_indices, path, dataset_type: str): def get_label_names(self): """Returns the list of metadata fields which can be used as labels""" return [label for label in list(self.labels.keys()) if - label not in ['region_type', 'receptor_chains', 'organism']] if self.labels else [] + label not in ['region_type', 'receptor_chains', 'organism', 'type_dict']] if self.labels else [] def clone(self, keep_identifier: bool = False): raise NotImplementedError diff --git a/immuneML/data_model/dataset/RepertoireDataset.py b/immuneML/data_model/dataset/RepertoireDataset.py index 217ee97b8..b75f64428 100644 --- a/immuneML/data_model/dataset/RepertoireDataset.py +++ b/immuneML/data_model/dataset/RepertoireDataset.py @@ -107,7 +107,7 @@ def get_label_names(self, refresh=False): """Returns the list of metadata fields which can be used as labels; if refresh=True, it reloads the fields from disk""" all_metadata_fields = set(self.get_metadata_fields(refresh)) - for non_label in ["subject_id", "filename", "repertoire_id", "identifier"]: + for non_label in ["subject_id", "filename", "repertoire_id", "identifier", "type_dict"]: if non_label in all_metadata_fields: all_metadata_fields.remove(non_label) diff --git a/immuneML/data_model/receptor/ElementGenerator.py b/immuneML/data_model/receptor/ElementGenerator.py index 0e0a074fe..780145dcf 100644 --- a/immuneML/data_model/receptor/ElementGenerator.py +++ b/immuneML/data_model/receptor/ElementGenerator.py @@ -48,7 +48,7 @@ def _load_batch(self, current_file: int, return_objects: bool = True): elements = bnp_data except ValueError as error: raise ValueError(f'{ElementGenerator.__name__}: an error occurred while creating an object from tsv file. ' - f'Details: {error}') + f'Details: {error}').with_traceback(error.__traceback__) return elements diff --git a/immuneML/dsl/ObjectParser.py b/immuneML/dsl/ObjectParser.py index 106a8c9b8..b32201445 100644 --- a/immuneML/dsl/ObjectParser.py +++ b/immuneML/dsl/ObjectParser.py @@ -66,6 +66,6 @@ def parse_object(specs, valid_class_names: list, class_name_ending: str, class_p except TypeError as err: raise AssertionError(f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} " f"under key {key}. Valid parameter names are: " - f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}") + f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}").with_traceback(err.__traceback__) return (obj, {class_name: params}) if return_params_dict else obj diff --git a/immuneML/dsl/import_parsers/ImportParser.py b/immuneML/dsl/import_parsers/ImportParser.py index 39c4a7dd2..998a990ae 100644 --- a/immuneML/dsl/import_parsers/ImportParser.py +++ b/immuneML/dsl/import_parsers/ImportParser.py @@ -63,9 +63,9 @@ def parse_dataset(key: str, dataset_specs: dict, result_path: Path) -> Dataset: raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. " f"The keyword {key_error.args[0]} was missing. This either means this argument was " f"not defined under definitions/datasets/{key}/params, or this column was missing from " - f"an input data file. ") + f"an input data file. ").with_traceback(key_error.__traceback__) except Exception as ex: - raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.") + raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.").with_traceback(ex.__traceback__) return dataset @@ -88,6 +88,6 @@ def _prepare_params(dataset_specs: dict, result_path: Path, dataset_name: str): @staticmethod def log_dataset_info(dataset: Dataset): - print_log(f"\nImported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n" - f"Example count: {dataset.get_example_count()}\n" - f"Labels: {dataset.get_label_names()}", True) + print_log(f"Imported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n" + f"- Example count: {dataset.get_example_count()}\n" + f"- Labels: {dataset.get_label_names()}", True) diff --git a/immuneML/dsl/instruction_parsers/TrainMLModelParser.py b/immuneML/dsl/instruction_parsers/TrainMLModelParser.py index 61f8317d5..2696bff0a 100644 --- a/immuneML/dsl/instruction_parsers/TrainMLModelParser.py +++ b/immuneML/dsl/instruction_parsers/TrainMLModelParser.py @@ -139,7 +139,7 @@ def _parse_settings(self, instruction: dict, symbol_table: SymbolTable) -> list: settings.append(s) return settings except KeyError as key_error: - raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.") + raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.") from key_error def _prepare_path(self, instruction: dict) -> Path: if "path" in instruction: @@ -192,7 +192,7 @@ def _parse_split_config(self, instruction_key, instruction: dict, split_key: str if "leave_one_out_config" in instruction[split_key] else None) except KeyError as key_error: - raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.") + raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.") from key_error def _prepare_report_config(self, instruction_key, instruction, split_key, symbol_table): if "reports" in instruction[split_key] and len(instruction[split_key]["reports"]) > 0: diff --git a/immuneML/ml_metrics/ClassificationMetric.py b/immuneML/ml_metrics/ClassificationMetric.py index 666fa6df4..63ddfc2ac 100644 --- a/immuneML/ml_metrics/ClassificationMetric.py +++ b/immuneML/ml_metrics/ClassificationMetric.py @@ -18,8 +18,8 @@ class ClassificationMetric(Enum): def get_metric(metric_name: str): try: return ClassificationMetric[metric_name.upper()] - except KeyError: - raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}") + except KeyError as e: + raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}").with_traceback(e.__traceback__) @staticmethod def get_search_criterion(metric): diff --git a/immuneML/util/CompAIRRHelper.py b/immuneML/util/CompAIRRHelper.py index c65875f79..564c12f9e 100644 --- a/immuneML/util/CompAIRRHelper.py +++ b/immuneML/util/CompAIRRHelper.py @@ -46,7 +46,7 @@ def check_compairr_path(compairr_path): except Exception as e: raise Exception(f"CompAIRRHelper: failed to call CompAIRR: {e}\n" f"Please ensure the correct version of CompAIRR has been installed (version {required_major}.{required_minor}.{required_patch} or later), " - f"or provide the path to the CompAIRR executable.") + f"or provide the path to the CompAIRR executable.").with_traceback(e.__traceback__) return compairr_path diff --git a/immuneML/util/ImportHelper.py b/immuneML/util/ImportHelper.py index 511d365e2..0860a1c81 100644 --- a/immuneML/util/ImportHelper.py +++ b/immuneML/util/ImportHelper.py @@ -86,7 +86,7 @@ def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset except Exception as e: raise Exception(f"{e}\nAn error occurred while reading in the metadata file {params.metadata_file}. Please " f"see the error log above for more details on this error and the documentation for the " - f"expected format of the metadata.") + f"expected format of the metadata.").with_traceback(e.__traceback__) ParameterValidator.assert_keys_present(metadata.columns.tolist(), ["filename"], ImportHelper.__name__, f'{dataset_name}: params: metadata_file') @@ -142,7 +142,7 @@ def load_repertoire_as_object(import_class, metadata_row, params: DatasetImportP return repertoire except Exception as exception: raise RuntimeError( - f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}") from exception + f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}").with_traceback(exception.__traceback__) @staticmethod def load_sequence_dataframe(filepath, params, alternative_load_func=None): @@ -156,7 +156,7 @@ def load_sequence_dataframe(filepath, params, alternative_load_func=None): f"{ex}\n\nImportHelper: an error occurred during dataset import while parsing the input file: {filepath}.\n" f"Please make sure this is a correct immune receptor data file (not metadata).\n" f"The parameters used for import are {params}.\nFor technical description of the error, see the log above. " - f"For details on how to specify the dataset import, see the documentation.") + f"For details on how to specify the dataset import, see the documentation.").with_traceback(ex.__traceback__) ImportHelper.rename_dataframe_columns(df, params) ImportHelper.standardize_none_values(df) diff --git a/immuneML/util/Logger.py b/immuneML/util/Logger.py index b8dcf8184..6fefe0d70 100644 --- a/immuneML/util/Logger.py +++ b/immuneML/util/Logger.py @@ -14,7 +14,7 @@ def wrapped(*args, **kwargs): raise Exception(f"{e}\n\n" f"ImmuneMLParser: an error occurred during parsing in function {func.__name__} " f" with parameters: {args}.\n\nFor more details on how to write the specification, " - f"see the documentation. For technical description of the error, see the log above.") + f"see the documentation. For technical description of the error, see the log above.").with_traceback(e.__traceback__) else: raise e finally: diff --git a/scripts/check_new_encoder.py b/scripts/check_new_encoder.py index c136b20e4..fe6dc397d 100644 --- a/scripts/check_new_encoder.py +++ b/scripts/check_new_encoder.py @@ -1,4 +1,10 @@ import argparse +import sys + +# Ensure the immuneML/ project 'root dir' is added to sys.path +# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/ +# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here +sys.path.extend([".", "../"]) from scripts.checker_util import * from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset @@ -17,11 +23,15 @@ def parse_commandline_arguments(args): parser = argparse.ArgumentParser(description="Tool for testing new immuneML DatasetEncoder classes") - parser.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ") - parser.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.") - parser.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ") - parser.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten.") - parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.") + + usage_args = parser.add_argument_group('usage arguments') + usage_args.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ") + usage_args.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.") + usage_args.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ") + + logging_args = parser.add_argument_group('logging arguments') + logging_args.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_encoder_log.txt').") + logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').") return parser.parse_args(args) @@ -45,7 +55,7 @@ def check_encoded_data(encoded_data, dummy_dataset, base_class_name): assert isinstance(encoded_data, EncodedData), f"Error: expected the .encoded_data field of the output dataset to be an EncodedData object, found {encoded_data.__class__.__name__}" assert encoded_data.examples is not None, f"Error: EncodedData.examples is None, but should be a numeric matrix with a number of rows equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})" - assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})" + assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples ({encoded_data.examples.shape[0]}) must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})" assert encoded_data.example_ids == dummy_dataset.get_example_ids(), f"Error: EncodedData.example_ids must match the original dataset: {dummy_dataset.get_example_ids()}, found {encoded_data.example_ids}" assert encoded_data.encoding == base_class_name, f"Error: EncodedData.encoding must be set to the base class name ('{base_class_name}'), found {encoded_data.encoding}" diff --git a/scripts/check_new_ml_method.py b/scripts/check_new_ml_method.py index a74e3d8d8..2ca70991b 100644 --- a/scripts/check_new_ml_method.py +++ b/scripts/check_new_ml_method.py @@ -1,23 +1,32 @@ +import sys import argparse import random - import numpy as np +# Ensure the immuneML/ project 'root dir' is added to sys.path +# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/ +# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here +sys.path.extend([".", "../"]) + +from scripts.checker_util import * from immuneML.data_model.encoded_data.EncodedData import EncodedData from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader from immuneML.environment.EnvironmentSettings import EnvironmentSettings from immuneML.environment.Label import Label from immuneML.ml_methods.classifiers.MLMethod import MLMethod from immuneML.util.ReflectionHandler import ReflectionHandler -from scripts.checker_util import * def parse_commandline_arguments(args): parser = argparse.ArgumentParser(description="Tool for testing new immuneML MLMethod classes") - parser.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ") - parser.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ") - parser.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten.") - parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.") + + usage_args = parser.add_argument_group('usage arguments') + usage_args.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ") + usage_args.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ") + + logging_args = parser.add_argument_group('logging arguments') + logging_args.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_ml_method_log.txt').") + logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').") return parser.parse_args(args) @@ -55,12 +64,17 @@ def check_methods(ml_method_instance): assert MLMethod._assert_matching_label == ml_method_instance.__class__._assert_matching_label, mssg.format("_assert_matching_label", ml_method_instance.__class__._assert_matching_label) assert MLMethod.predict == ml_method_instance.__class__.predict, mssg.format("predict", ml_method_instance.__class__.predict) assert MLMethod.predict_proba == ml_method_instance.__class__.predict_proba, mssg.format("predict_proba", ml_method_instance.__class__.predict_proba) - assert MLMethod.check_encoder_compatibility == ml_method_instance.__class__.check_encoder_compatibility, mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility) assert MLMethod.get_feature_names == ml_method_instance.__class__.get_feature_names, mssg.format("get_feature_names", ml_method_instance.__class__.get_feature_names) assert MLMethod.get_label_name == ml_method_instance.__class__.get_label_name, mssg.format("get_label_name", ml_method_instance.__class__.get_label_name) assert MLMethod.get_classes == ml_method_instance.__class__.get_classes, mssg.format("get_classes", ml_method_instance.__class__.get_classes) assert MLMethod.get_positive_class == ml_method_instance.__class__.get_positive_class, mssg.format("get_positive_class", ml_method_instance.__class__.get_positive_class) + if MLMethod.check_encoder_compatibility != ml_method_instance.__class__.check_encoder_compatibility: + logging.warning(f"class method 'check_encoder_compatibility' was overwritten from MLMethod. Please ensure this was intentional (for example: if more than just the Encoder type needs to be checked). ") + + # , mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility) + + check_base_vs_instance_methods(MLMethod, ml_method_instance) compatible_encoders = ml_method_instance.get_compatible_encoders()