Skip to content

Commit

Permalink
Merge pull request #173 from uio-bmi/bug_fixes
Browse files Browse the repository at this point in the history
Bug fixes
  • Loading branch information
LonnekeScheffer committed May 20, 2024
2 parents cf72dfc + eb92e01 commit 480d6bb
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 31 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
number_of_processes: 4
export_formats: [AIRR]
2 changes: 1 addition & 1 deletion immuneML/data_model/dataset/ElementDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def make_subset(self, example_indices, path, dataset_type: str):
def get_label_names(self):
"""Returns the list of metadata fields which can be used as labels"""
return [label for label in list(self.labels.keys()) if
label not in ['region_type', 'receptor_chains', 'organism']] if self.labels else []
label not in ['region_type', 'receptor_chains', 'organism', 'type_dict']] if self.labels else []

def clone(self, keep_identifier: bool = False):
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion immuneML/data_model/dataset/RepertoireDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def get_label_names(self, refresh=False):
"""Returns the list of metadata fields which can be used as labels; if refresh=True, it reloads the fields
from disk"""
all_metadata_fields = set(self.get_metadata_fields(refresh))
for non_label in ["subject_id", "filename", "repertoire_id", "identifier"]:
for non_label in ["subject_id", "filename", "repertoire_id", "identifier", "type_dict"]:
if non_label in all_metadata_fields:
all_metadata_fields.remove(non_label)

Expand Down
2 changes: 1 addition & 1 deletion immuneML/data_model/receptor/ElementGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _load_batch(self, current_file: int, return_objects: bool = True):
elements = bnp_data
except ValueError as error:
raise ValueError(f'{ElementGenerator.__name__}: an error occurred while creating an object from tsv file. '
f'Details: {error}')
f'Details: {error}').with_traceback(error.__traceback__)

return elements

Expand Down
2 changes: 1 addition & 1 deletion immuneML/dsl/ObjectParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,6 @@ def parse_object(specs, valid_class_names: list, class_name_ending: str, class_p
except TypeError as err:
raise AssertionError(f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} "
f"under key {key}. Valid parameter names are: "
f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}")
f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}").with_traceback(err.__traceback__)

return (obj, {class_name: params}) if return_params_dict else obj
10 changes: 5 additions & 5 deletions immuneML/dsl/import_parsers/ImportParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ def parse_dataset(key: str, dataset_specs: dict, result_path: Path) -> Dataset:
raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
f"The keyword {key_error.args[0]} was missing. This either means this argument was "
f"not defined under definitions/datasets/{key}/params, or this column was missing from "
f"an input data file. ")
f"an input data file. ").with_traceback(key_error.__traceback__)
except Exception as ex:
raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.")
raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.").with_traceback(ex.__traceback__)

return dataset

Expand All @@ -88,6 +88,6 @@ def _prepare_params(dataset_specs: dict, result_path: Path, dataset_name: str):

@staticmethod
def log_dataset_info(dataset: Dataset):
print_log(f"\nImported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n"
f"Example count: {dataset.get_example_count()}\n"
f"Labels: {dataset.get_label_names()}", True)
print_log(f"Imported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n"
f"- Example count: {dataset.get_example_count()}\n"
f"- Labels: {dataset.get_label_names()}", True)
4 changes: 2 additions & 2 deletions immuneML/dsl/instruction_parsers/TrainMLModelParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def _parse_settings(self, instruction: dict, symbol_table: SymbolTable) -> list:
settings.append(s)
return settings
except KeyError as key_error:
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.")
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.") from key_error

def _prepare_path(self, instruction: dict) -> Path:
if "path" in instruction:
Expand Down Expand Up @@ -192,7 +192,7 @@ def _parse_split_config(self, instruction_key, instruction: dict, split_key: str
if "leave_one_out_config" in instruction[split_key] else None)

except KeyError as key_error:
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.")
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.") from key_error

def _prepare_report_config(self, instruction_key, instruction, split_key, symbol_table):
if "reports" in instruction[split_key] and len(instruction[split_key]["reports"]) > 0:
Expand Down
4 changes: 2 additions & 2 deletions immuneML/ml_metrics/ClassificationMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class ClassificationMetric(Enum):
def get_metric(metric_name: str):
try:
return ClassificationMetric[metric_name.upper()]
except KeyError:
raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}")
except KeyError as e:
raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}").with_traceback(e.__traceback__)

@staticmethod
def get_search_criterion(metric):
Expand Down
2 changes: 1 addition & 1 deletion immuneML/util/CompAIRRHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def check_compairr_path(compairr_path):
except Exception as e:
raise Exception(f"CompAIRRHelper: failed to call CompAIRR: {e}\n"
f"Please ensure the correct version of CompAIRR has been installed (version {required_major}.{required_minor}.{required_patch} or later), "
f"or provide the path to the CompAIRR executable.")
f"or provide the path to the CompAIRR executable.").with_traceback(e.__traceback__)

return compairr_path

Expand Down
6 changes: 3 additions & 3 deletions immuneML/util/ImportHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset
except Exception as e:
raise Exception(f"{e}\nAn error occurred while reading in the metadata file {params.metadata_file}. Please "
f"see the error log above for more details on this error and the documentation for the "
f"expected format of the metadata.")
f"expected format of the metadata.").with_traceback(e.__traceback__)

ParameterValidator.assert_keys_present(metadata.columns.tolist(), ["filename"], ImportHelper.__name__,
f'{dataset_name}: params: metadata_file')
Expand Down Expand Up @@ -142,7 +142,7 @@ def load_repertoire_as_object(import_class, metadata_row, params: DatasetImportP
return repertoire
except Exception as exception:
raise RuntimeError(
f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}") from exception
f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}").with_traceback(exception.__traceback__)

@staticmethod
def load_sequence_dataframe(filepath, params, alternative_load_func=None):
Expand All @@ -156,7 +156,7 @@ def load_sequence_dataframe(filepath, params, alternative_load_func=None):
f"{ex}\n\nImportHelper: an error occurred during dataset import while parsing the input file: {filepath}.\n"
f"Please make sure this is a correct immune receptor data file (not metadata).\n"
f"The parameters used for import are {params}.\nFor technical description of the error, see the log above. "
f"For details on how to specify the dataset import, see the documentation.")
f"For details on how to specify the dataset import, see the documentation.").with_traceback(ex.__traceback__)

ImportHelper.rename_dataframe_columns(df, params)
ImportHelper.standardize_none_values(df)
Expand Down
2 changes: 1 addition & 1 deletion immuneML/util/Logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def wrapped(*args, **kwargs):
raise Exception(f"{e}\n\n"
f"ImmuneMLParser: an error occurred during parsing in function {func.__name__} "
f" with parameters: {args}.\n\nFor more details on how to write the specification, "
f"see the documentation. For technical description of the error, see the log above.")
f"see the documentation. For technical description of the error, see the log above.").with_traceback(e.__traceback__)
else:
raise e
finally:
Expand Down
22 changes: 16 additions & 6 deletions scripts/check_new_encoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import argparse
import sys

# Ensure the immuneML/ project 'root dir' is added to sys.path
# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
sys.path.extend([".", "../"])

from scripts.checker_util import *
from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset
Expand All @@ -17,11 +23,15 @@

def parse_commandline_arguments(args):
parser = argparse.ArgumentParser(description="Tool for testing new immuneML DatasetEncoder classes")
parser.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
parser.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
parser.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")
parser.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")

usage_args = parser.add_argument_group('usage arguments')
usage_args.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
usage_args.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
usage_args.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")

logging_args = parser.add_argument_group('logging arguments')
logging_args.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_encoder_log.txt').")
logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")

return parser.parse_args(args)

Expand All @@ -45,7 +55,7 @@ def check_encoded_data(encoded_data, dummy_dataset, base_class_name):
assert isinstance(encoded_data, EncodedData), f"Error: expected the .encoded_data field of the output dataset to be an EncodedData object, found {encoded_data.__class__.__name__}"

assert encoded_data.examples is not None, f"Error: EncodedData.examples is None, but should be a numeric matrix with a number of rows equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples ({encoded_data.examples.shape[0]}) must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"

assert encoded_data.example_ids == dummy_dataset.get_example_ids(), f"Error: EncodedData.example_ids must match the original dataset: {dummy_dataset.get_example_ids()}, found {encoded_data.example_ids}"
assert encoded_data.encoding == base_class_name, f"Error: EncodedData.encoding must be set to the base class name ('{base_class_name}'), found {encoded_data.encoding}"
Expand Down
28 changes: 21 additions & 7 deletions scripts/check_new_ml_method.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
import sys
import argparse
import random

import numpy as np

# Ensure the immuneML/ project 'root dir' is added to sys.path
# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
sys.path.extend([".", "../"])

from scripts.checker_util import *
from immuneML.data_model.encoded_data.EncodedData import EncodedData
from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.environment.Label import Label
from immuneML.ml_methods.classifiers.MLMethod import MLMethod
from immuneML.util.ReflectionHandler import ReflectionHandler
from scripts.checker_util import *


def parse_commandline_arguments(args):
parser = argparse.ArgumentParser(description="Tool for testing new immuneML MLMethod classes")
parser.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
parser.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")
parser.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")

usage_args = parser.add_argument_group('usage arguments')
usage_args.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
usage_args.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")

logging_args = parser.add_argument_group('logging arguments')
logging_args.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_ml_method_log.txt').")
logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")

return parser.parse_args(args)

Expand Down Expand Up @@ -55,12 +64,17 @@ def check_methods(ml_method_instance):
assert MLMethod._assert_matching_label == ml_method_instance.__class__._assert_matching_label, mssg.format("_assert_matching_label", ml_method_instance.__class__._assert_matching_label)
assert MLMethod.predict == ml_method_instance.__class__.predict, mssg.format("predict", ml_method_instance.__class__.predict)
assert MLMethod.predict_proba == ml_method_instance.__class__.predict_proba, mssg.format("predict_proba", ml_method_instance.__class__.predict_proba)
assert MLMethod.check_encoder_compatibility == ml_method_instance.__class__.check_encoder_compatibility, mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)
assert MLMethod.get_feature_names == ml_method_instance.__class__.get_feature_names, mssg.format("get_feature_names", ml_method_instance.__class__.get_feature_names)
assert MLMethod.get_label_name == ml_method_instance.__class__.get_label_name, mssg.format("get_label_name", ml_method_instance.__class__.get_label_name)
assert MLMethod.get_classes == ml_method_instance.__class__.get_classes, mssg.format("get_classes", ml_method_instance.__class__.get_classes)
assert MLMethod.get_positive_class == ml_method_instance.__class__.get_positive_class, mssg.format("get_positive_class", ml_method_instance.__class__.get_positive_class)

if MLMethod.check_encoder_compatibility != ml_method_instance.__class__.check_encoder_compatibility:
logging.warning(f"class method 'check_encoder_compatibility' was overwritten from MLMethod. Please ensure this was intentional (for example: if more than just the Encoder type needs to be checked). ")

# , mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)


check_base_vs_instance_methods(MLMethod, ml_method_instance)

compatible_encoders = ml_method_instance.get_compatible_encoders()
Expand Down

0 comments on commit 480d6bb

Please sign in to comment.