Skip to content

Commit 75dbec3

Browse files
author
SietsmaRJ
committed
- Small QoL update to test_categorical_processor.py
- Processed mypy feedback - Processed flake8 feedback
1 parent aede9be commit 75dbec3

File tree

8 files changed

+35
-20
lines changed

8 files changed

+35
-20
lines changed

src/molgenis/capice/cli/args_handler_parent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def _retrieve_argument_from_list(self,
119119
return self._single_argument_retriever(arg, arg_name, has_default)
120120
except IOError as e:
121121
self.parser.error(e)
122+
return None
122123

123124
@staticmethod
124125
def _single_argument_retriever(arg: list | None,

src/molgenis/capice/cli/args_handler_predict.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ def _extension(self):
2020

2121
@property
2222
def _model_extension(self) -> tuple[str]:
23-
return '.json', '.ubj'
23+
# Ignore because the amount of values of tuple does not matter.
24+
return '.json', '.ubj' # type: ignore
2425

2526
def _model_extension_str(self) -> str:
2627
return self._join_extensions(self._model_extension)

src/molgenis/capice/utilities/categorical_processor.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self):
2020
def process(
2121
self,
2222
dataset: pd.DataFrame,
23-
processable_features: list | None = None,
23+
processable_features: list[str] | None = None,
2424
predetermined_features: dict[str, list] | None = None
2525
) -> tuple[pd.DataFrame, dict[str, list]]:
2626
"""
@@ -54,7 +54,12 @@ def process(
5454
self._validate_one_feature_list_present(processable_features, predetermined_features)
5555
self._create_preservation_col(dataset)
5656
if predetermined_features is None:
57-
processing_features = self._get_categorical_columns(dataset, processable_features)
57+
# Type ignore, else mypy takes issue with Typing since processable_features can be
58+
# None, so it is considered Optional[list[str]] instead of list[str].
59+
processing_features = self._get_categorical_columns(
60+
dataset,
61+
processable_features # type: ignore
62+
)
5863
else:
5964
processing_features = predetermined_features
6065

@@ -88,7 +93,8 @@ def _create_preservation_col(dataset: pd.DataFrame) -> None:
8893
[Column.chr.value, Column.pos.value, Column.ref.value, Column.alt.value]
8994
].astype(str).agg(UniqueSeparator.unique_separator.value.join, axis=1)
9095

91-
def _get_categorical_columns(self, dataset: pd.DataFrame, processable_features: list) -> dict:
96+
def _get_categorical_columns(self, dataset: pd.DataFrame,
97+
processable_features: list[str]) -> dict[str, list]:
9298
"""
9399
Method for when the predetermined_features is None, usually in case of train,
94100
to determine the top 5 features that should be used for pandas.get_dummies().

src/molgenis/capice/utilities/dynamic_loader.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def __init__(self, required_attributes: list, path):
2121
self.path = path
2222
self._check_dir_exists()
2323
self.required_attributes = required_attributes
24-
self.modules = {}
24+
self.modules: dict[str, object] = {}
2525

2626
def load_manual_annotators(self):
2727
"""
@@ -82,14 +82,15 @@ def _load_modules_from_path(path):
8282
modules.append(module)
8383
return modules
8484

85-
def _import(self, usable_modules: list):
85+
def _import(self, usable_modules: list[str]) -> dict[str, object]:
8686
"""
8787
Function to dynamically load in the modules using the
8888
import_module library.
8989
:param usable_modules: list of absolute paths to potential modules
9090
:return: list of usable modules
9191
"""
92-
return_modules = {}
92+
# For some reason, mypy wants this line to be Typed instead of the method.
93+
return_modules: dict[str, object] = {}
9394
for module in usable_modules:
9495
name = os.path.basename(module).split('.py')[0]
9596
spec = util.spec_from_file_location(name=name, location=module)

src/molgenis/capice/validators/input_validator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def validate_input_path(input_path: os.PathLike, extension: tuple[str]):
1616
"""
1717
if not os.path.exists(input_path):
1818
raise FileNotFoundError(f'{input_path} does not exist!')
19-
if not (input_path.endswith(extension)):
19+
if not str(input_path).endswith(extension):
2020
raise IOError(f'{input_path} does not match required extension: '
2121
f'{", ".join(extension)}')
2222

src/molgenis/capice/validators/predict_validator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def validate_data_predict_ready(self, dataset: pd.DataFrame, model: xgb.XGBClass
2323
Raised when a required predict feature is missing from dataset.
2424
"""
2525
missing = []
26-
for feature in model.get_booster().feature_names:
26+
for feature in model.get_booster().feature_names: # type: ignore
2727
if feature not in dataset.columns:
2828
missing.append(feature)
2929
if len(missing) > 0:

src/molgenis/capice/validators/version_validator.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,18 @@ def validate_versions_compatible(self, capice_version: str, model_version: str):
7979
ValueError
8080
Raised when the model and framework versions are not compatible.
8181
"""
82+
# All mypy ignores here are because attributes are not found.
8283
capice = match(self.regex, capice_version)
8384
model = match(self.regex, model_version)
84-
if capice.group('major') != model.group('major'):
85+
if capice.group('major') != model.group('major'): # type: ignore
8586
raise ValueError(
86-
f'CAPICE major version {capice.string} does not match with the model '
87-
f'{model.string}!'
87+
f'CAPICE major version {capice.string} ' # type: ignore
88+
f'does not match with the model '
89+
f'{model.string}!' # type: ignore
8890
)
8991

90-
if capice.group('prerelease') or model.group('prerelease'):
91-
self._validate_prerelease(capice, model)
92+
if capice.group('prerelease') or model.group('prerelease'): # type: ignore
93+
self._validate_prerelease(capice, model) # type: ignore
9294

9395
@staticmethod
9496
def _validate_prerelease(capice_version: re.Match,

tests/capice/utilities/test_categorical_processor.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ def setUp(cls):
2626
def tearDownClass(cls) -> None:
2727
teardown()
2828

29+
@staticmethod
30+
def creat_other_column(value: str) -> str:
31+
return '_'.join([value, Column.other.value])
32+
2933
def test_unit_preprocessing_file(self):
3034
"""
3135
Unit test for the preprocessor to see if the preprocessor works just
@@ -88,15 +92,15 @@ def test_preprocessing_train(self):
8892
'foo_a': [1, 0, 0, 0, 0, 0],
8993
'foo_b': [0, 1, 0, 0, 0, 0],
9094
'foo_c': [0, 0, 1, 0, 0, 0],
91-
'foo_other_CAPICE_value': [0, 0, 0, 1, 1, 1],
95+
self.creat_other_column('foo'): [0, 0, 0, 1, 1, 1],
9296
'bar_a': [1, 0, 0, 0, 0, 0],
93-
'bar_other_CAPICE_value': [0, 1, 1, 1, 1, 1],
97+
self.creat_other_column('bar'): [0, 1, 1, 1, 1, 1],
9498
'baz_a': [1, 0, 0, 0, 0, 0],
9599
'baz_b': [0, 1, 0, 0, 0, 0],
96100
'baz_c': [0, 0, 1, 0, 0, 0],
97101
'baz_d': [0, 0, 0, 1, 0, 0],
98102
'baz_e': [0, 0, 0, 0, 1, 0],
99-
'baz_other_CAPICE_value': [0, 0, 0, 0, 0, 1],
103+
self.creat_other_column('baz'): [0, 0, 0, 0, 0, 1],
100104
'REF': ['A', 'T', 'A', 'T', 'A', 'T'],
101105
'ALT': ['G', 'C', 'G', 'C', 'G', 'C'],
102106
'feature_1': [1, 2, 3, 4, np.nan, np.nan],
@@ -144,7 +148,7 @@ def test_creation_other(self):
144148
Column.other.value,
145149
observed_dict['foo']
146150
)
147-
self.assertIn('foo_other_CAPICE_value', observed_df.columns)
151+
self.assertIn(self.creat_other_column('foo'), observed_df.columns)
148152

149153
def test_creation_other_notin(self):
150154
test_case = pd.concat(
@@ -167,7 +171,7 @@ def test_creation_other_notin(self):
167171
Column.other.value,
168172
observed_dict['foo']
169173
)
170-
self.assertNotIn('foo_other_CAPICE_value', observed_df.columns)
174+
self.assertNotIn(self.creat_other_column('foo'), observed_df.columns)
171175

172176
def test_other_in_top_5(self):
173177
# Tests that, if "other" occurs in the top 5 categories, only this "other" feature gets
@@ -188,7 +192,7 @@ def test_other_in_top_5(self):
188192
self.assertFalse(test_series[test_series > 0].size > 2,
189193
msg=f'Actual size: {test_series[test_series > 0].size}')
190194
self.assertIn(
191-
'foo_other_CAPICE_value',
195+
self.creat_other_column('foo'),
192196
observed_df.columns
193197
)
194198

0 commit comments

Comments
 (0)