Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes #173

Merged
merged 4 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
number_of_processes: 4
export_formats: [AIRR]
2 changes: 1 addition & 1 deletion immuneML/data_model/dataset/ElementDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def make_subset(self, example_indices, path, dataset_type: str):
def get_label_names(self):
"""Returns the list of metadata fields which can be used as labels"""
return [label for label in list(self.labels.keys()) if
label not in ['region_type', 'receptor_chains', 'organism']] if self.labels else []
label not in ['region_type', 'receptor_chains', 'organism', 'type_dict']] if self.labels else []

def clone(self, keep_identifier: bool = False):
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion immuneML/data_model/dataset/RepertoireDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def get_label_names(self, refresh=False):
"""Returns the list of metadata fields which can be used as labels; if refresh=True, it reloads the fields
from disk"""
all_metadata_fields = set(self.get_metadata_fields(refresh))
for non_label in ["subject_id", "filename", "repertoire_id", "identifier"]:
for non_label in ["subject_id", "filename", "repertoire_id", "identifier", "type_dict"]:
if non_label in all_metadata_fields:
all_metadata_fields.remove(non_label)

Expand Down
2 changes: 1 addition & 1 deletion immuneML/data_model/receptor/ElementGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _load_batch(self, current_file: int, return_objects: bool = True):
elements = bnp_data
except ValueError as error:
raise ValueError(f'{ElementGenerator.__name__}: an error occurred while creating an object from tsv file. '
f'Details: {error}')
f'Details: {error}').with_traceback(error.__traceback__)

return elements

Expand Down
2 changes: 1 addition & 1 deletion immuneML/dsl/ObjectParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,6 @@ def parse_object(specs, valid_class_names: list, class_name_ending: str, class_p
except TypeError as err:
raise AssertionError(f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} "
f"under key {key}. Valid parameter names are: "
f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}")
f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}").with_traceback(err.__traceback__)

return (obj, {class_name: params}) if return_params_dict else obj
10 changes: 5 additions & 5 deletions immuneML/dsl/import_parsers/ImportParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ def parse_dataset(key: str, dataset_specs: dict, result_path: Path) -> Dataset:
raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
f"The keyword {key_error.args[0]} was missing. This either means this argument was "
f"not defined under definitions/datasets/{key}/params, or this column was missing from "
f"an input data file. ")
f"an input data file. ").with_traceback(key_error.__traceback__)
except Exception as ex:
raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.")
raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.").with_traceback(ex.__traceback__)

return dataset

Expand All @@ -88,6 +88,6 @@ def _prepare_params(dataset_specs: dict, result_path: Path, dataset_name: str):

@staticmethod
def log_dataset_info(dataset: Dataset):
print_log(f"\nImported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n"
f"Example count: {dataset.get_example_count()}\n"
f"Labels: {dataset.get_label_names()}", True)
print_log(f"Imported {dataset.__class__.__name__.split('Dataset')[0].lower()} dataset {dataset.name}:\n"
f"- Example count: {dataset.get_example_count()}\n"
f"- Labels: {dataset.get_label_names()}", True)
4 changes: 2 additions & 2 deletions immuneML/dsl/instruction_parsers/TrainMLModelParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def _parse_settings(self, instruction: dict, symbol_table: SymbolTable) -> list:
settings.append(s)
return settings
except KeyError as key_error:
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.")
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction.") from key_error

def _prepare_path(self, instruction: dict) -> Path:
if "path" in instruction:
Expand Down Expand Up @@ -192,7 +192,7 @@ def _parse_split_config(self, instruction_key, instruction: dict, split_key: str
if "leave_one_out_config" in instruction[split_key] else None)

except KeyError as key_error:
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.")
raise KeyError(f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under {split_key}.") from key_error

def _prepare_report_config(self, instruction_key, instruction, split_key, symbol_table):
if "reports" in instruction[split_key] and len(instruction[split_key]["reports"]) > 0:
Expand Down
4 changes: 2 additions & 2 deletions immuneML/ml_metrics/ClassificationMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class ClassificationMetric(Enum):
def get_metric(metric_name: str):
try:
return ClassificationMetric[metric_name.upper()]
except KeyError:
raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}")
except KeyError as e:
raise KeyError(f"'{metric_name}' is not a valid performance metric. Valid metrics are: {', '.join([m.name for m in ClassificationMetric])}").with_traceback(e.__traceback__)

@staticmethod
def get_search_criterion(metric):
Expand Down
2 changes: 1 addition & 1 deletion immuneML/util/CompAIRRHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def check_compairr_path(compairr_path):
except Exception as e:
raise Exception(f"CompAIRRHelper: failed to call CompAIRR: {e}\n"
f"Please ensure the correct version of CompAIRR has been installed (version {required_major}.{required_minor}.{required_patch} or later), "
f"or provide the path to the CompAIRR executable.")
f"or provide the path to the CompAIRR executable.").with_traceback(e.__traceback__)

return compairr_path

Expand Down
6 changes: 3 additions & 3 deletions immuneML/util/ImportHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def import_repertoire_dataset(import_class, params: DatasetImportParams, dataset
except Exception as e:
raise Exception(f"{e}\nAn error occurred while reading in the metadata file {params.metadata_file}. Please "
f"see the error log above for more details on this error and the documentation for the "
f"expected format of the metadata.")
f"expected format of the metadata.").with_traceback(e.__traceback__)

ParameterValidator.assert_keys_present(metadata.columns.tolist(), ["filename"], ImportHelper.__name__,
f'{dataset_name}: params: metadata_file')
Expand Down Expand Up @@ -142,7 +142,7 @@ def load_repertoire_as_object(import_class, metadata_row, params: DatasetImportP
return repertoire
except Exception as exception:
raise RuntimeError(
f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}") from exception
f"{ImportHelper.__name__}: error when importing file {metadata_row['filename']}: {exception}").with_traceback(exception.__traceback__)

@staticmethod
def load_sequence_dataframe(filepath, params, alternative_load_func=None):
Expand All @@ -156,7 +156,7 @@ def load_sequence_dataframe(filepath, params, alternative_load_func=None):
f"{ex}\n\nImportHelper: an error occurred during dataset import while parsing the input file: {filepath}.\n"
f"Please make sure this is a correct immune receptor data file (not metadata).\n"
f"The parameters used for import are {params}.\nFor technical description of the error, see the log above. "
f"For details on how to specify the dataset import, see the documentation.")
f"For details on how to specify the dataset import, see the documentation.").with_traceback(ex.__traceback__)

ImportHelper.rename_dataframe_columns(df, params)
ImportHelper.standardize_none_values(df)
Expand Down
2 changes: 1 addition & 1 deletion immuneML/util/Logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def wrapped(*args, **kwargs):
raise Exception(f"{e}\n\n"
f"ImmuneMLParser: an error occurred during parsing in function {func.__name__} "
f" with parameters: {args}.\n\nFor more details on how to write the specification, "
f"see the documentation. For technical description of the error, see the log above.")
f"see the documentation. For technical description of the error, see the log above.").with_traceback(e.__traceback__)
else:
raise e
finally:
Expand Down
22 changes: 16 additions & 6 deletions scripts/check_new_encoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import argparse
import sys

# Ensure the immuneML/ project 'root dir' is added to sys.path
# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
sys.path.extend([".", "../"])

from scripts.checker_util import *
from immuneML.data_model.dataset.ReceptorDataset import ReceptorDataset
Expand All @@ -17,11 +23,15 @@

def parse_commandline_arguments(args):
parser = argparse.ArgumentParser(description="Tool for testing new immuneML DatasetEncoder classes")
parser.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
parser.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
parser.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")
parser.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")

usage_args = parser.add_argument_group('usage arguments')
usage_args.add_argument("-e", "--encoder_file", type=str, required=True, help="Path to the (dataset-specific) encoder file, placed in the correct immuneML subfolder. ")
usage_args.add_argument("-d", "--dataset_type", type=str, choices=["repertoire", "sequence", "receptor"], required=True, help="Whether to test using 'sequence', 'receptor' or 'repertoire' dataset.")
usage_args.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the Encoder can be run without supplying additional parameters. ")

logging_args = parser.add_argument_group('logging arguments')
logging_args.add_argument("-l", "--log_file", type=str, default="check_new_encoder_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_encoder_log.txt').")
logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")

return parser.parse_args(args)

Expand All @@ -45,7 +55,7 @@ def check_encoded_data(encoded_data, dummy_dataset, base_class_name):
assert isinstance(encoded_data, EncodedData), f"Error: expected the .encoded_data field of the output dataset to be an EncodedData object, found {encoded_data.__class__.__name__}"

assert encoded_data.examples is not None, f"Error: EncodedData.examples is None, but should be a numeric matrix with a number of rows equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"
assert encoded_data.examples.shape[0] == dummy_dataset.get_example_count(), f"Error: the number of rows in EncodedData.examples ({encoded_data.examples.shape[0]}) must be equal to the number of examples in the dataset ({dummy_dataset.get_example_count()})"

assert encoded_data.example_ids == dummy_dataset.get_example_ids(), f"Error: EncodedData.example_ids must match the original dataset: {dummy_dataset.get_example_ids()}, found {encoded_data.example_ids}"
assert encoded_data.encoding == base_class_name, f"Error: EncodedData.encoding must be set to the base class name ('{base_class_name}'), found {encoded_data.encoding}"
Expand Down
28 changes: 21 additions & 7 deletions scripts/check_new_ml_method.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
import sys
import argparse
import random

import numpy as np

# Ensure the immuneML/ project 'root dir' is added to sys.path
# Adding "." and "../" allows the script to be run from immuneML/ and immuneML/scripts/
# When encountering ModuleNotFoundError, try adding the absolute path to the project 'root dir' here
sys.path.extend([".", "../"])

from scripts.checker_util import *
from immuneML.data_model.encoded_data.EncodedData import EncodedData
from immuneML.dsl.DefaultParamsLoader import DefaultParamsLoader
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.environment.Label import Label
from immuneML.ml_methods.classifiers.MLMethod import MLMethod
from immuneML.util.ReflectionHandler import ReflectionHandler
from scripts.checker_util import *


def parse_commandline_arguments(args):
parser = argparse.ArgumentParser(description="Tool for testing new immuneML MLMethod classes")
parser.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
parser.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")
parser.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten.")
parser.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten.")

usage_args = parser.add_argument_group('usage arguments')
usage_args.add_argument("-m", "--ml_method_file", type=str, required=True, help="Path to the MLMethod file, placed in the correct immuneML subfolder. ")
usage_args.add_argument("-p", "--no_default_parameters", action='store_true', help="If enabled, it is assumed that no default parameters file exists, and the MLMethod can be run without supplying additional parameters. ")

logging_args = parser.add_argument_group('logging arguments')
logging_args.add_argument("-l", "--log_file", type=str, default="check_new_ml_method_log.txt", help="Path to the output log file. If already present, the file will be overwritten (default='./check_new_ml_method_log.txt').")
logging_args.add_argument("-t", "--tmp_path", type=str, default="./tmp", help="Path to the temporary output folder. If already present, the folder will be overwritten (default='./tmp').")

return parser.parse_args(args)

Expand Down Expand Up @@ -55,12 +64,17 @@ def check_methods(ml_method_instance):
assert MLMethod._assert_matching_label == ml_method_instance.__class__._assert_matching_label, mssg.format("_assert_matching_label", ml_method_instance.__class__._assert_matching_label)
assert MLMethod.predict == ml_method_instance.__class__.predict, mssg.format("predict", ml_method_instance.__class__.predict)
assert MLMethod.predict_proba == ml_method_instance.__class__.predict_proba, mssg.format("predict_proba", ml_method_instance.__class__.predict_proba)
assert MLMethod.check_encoder_compatibility == ml_method_instance.__class__.check_encoder_compatibility, mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)
assert MLMethod.get_feature_names == ml_method_instance.__class__.get_feature_names, mssg.format("get_feature_names", ml_method_instance.__class__.get_feature_names)
assert MLMethod.get_label_name == ml_method_instance.__class__.get_label_name, mssg.format("get_label_name", ml_method_instance.__class__.get_label_name)
assert MLMethod.get_classes == ml_method_instance.__class__.get_classes, mssg.format("get_classes", ml_method_instance.__class__.get_classes)
assert MLMethod.get_positive_class == ml_method_instance.__class__.get_positive_class, mssg.format("get_positive_class", ml_method_instance.__class__.get_positive_class)

if MLMethod.check_encoder_compatibility != ml_method_instance.__class__.check_encoder_compatibility:
logging.warning(f"class method 'check_encoder_compatibility' was overwritten from MLMethod. Please ensure this was intentional (for example: if more than just the Encoder type needs to be checked). ")

# , mssg.format("check_encoder_compatibility", ml_method_instance.__class__.check_encoder_compatibility)


check_base_vs_instance_methods(MLMethod, ml_method_instance)

compatible_encoders = ml_method_instance.get_compatible_encoders()
Expand Down
Loading