Skip to content

Commit fda8df4

Browse files
committed
Additional fixes, test
1 parent 0529064 commit fda8df4

File tree

6 files changed

+58
-41
lines changed

6 files changed

+58
-41
lines changed

cyclops/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
register_model(name="sgd_regressor", model_type="static")(SGDRegressor)
4141
register_model("rf_classifier", model_type="static")(RandomForestClassifier)
4242
register_model("logistic_regression", model_type="static")(LogisticRegression)
43-
register_model("mlp", model_type="static")(MLPClassifier)
43+
register_model("mlp_classifier", model_type="static")(MLPClassifier)
4444
if XGBClassifier is not None:
4545
register_model("xgb_classifier", model_type="static")(XGBClassifier)
4646
if DenseNet is not None:

cyclops/models/catalog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def create_model(
224224
raise RuntimeError(_xgboost_unavailable_message)
225225
if model_name in ["densenet", "resnet"]:
226226
raise RuntimeError(_torchxrayvision_unavailable_message)
227-
if model_name in ["gru", "lstm", "mlp", "rnn"]:
227+
if model_name in ["gru", "lstm", "mlp_pt", "rnn"]:
228228
raise RuntimeError(_torch_unavailable_message)
229229
similar_keys_list: List[str] = get_close_matches(
230230
model_name,

cyclops/models/wrappers/sk_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def find_best( # noqa: PLR0912, PLR0915
187187
if isinstance(X, (Dataset, DatasetDict)):
188188
if feature_columns is None:
189189
raise ValueError(
190-
"Missing target columns 'target_columns'. Please provide \
190+
"Missing target columns 'feature_columns'. Please provide \
191191
the name of feature columns when using a \
192192
Hugging Face dataset as the input.",
193193
)
Lines changed: 54 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,67 @@
11
"""Tests for scikit-learn model wrapper."""
22

3-
import numpy as np
3+
import pandas as pd
4+
from datasets import Dataset
5+
from sklearn.datasets import load_diabetes
46

57
from cyclops.models import create_model
8+
from cyclops.models.wrappers import SKModel
69

710

811
def test_find_best_grid_search():
912
"""Test find_best method with grid search."""
10-
parameters = {"C": [1, 2, 3], "l1_ratio": [0.25, 0.5, 0.75]}
11-
X = np.array(
12-
[
13-
[1, 2, 3],
14-
[4, 5, 6],
15-
[7, 8, 9],
16-
[0, 0, 0],
17-
[1, 1, 1],
18-
[2, 2, 2],
19-
[3, 3, 3],
20-
[1, 3, 1],
21-
[2, 3, 2],
22-
[3, 3, 3],
23-
[1, 2, 1],
24-
[2, 2, 1],
25-
[3, 2, 1],
26-
[1, 1, 1],
27-
[2, 1, 1],
28-
[3, 1, 1],
29-
],
30-
)
31-
y = np.array([1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2])
32-
feature_columns = ["feature1", "feature2", "feature3"]
33-
target_columns = ["target"]
34-
transforms = None
13+
parameters = {"C": [1], "l1_ratio": [0.5]}
14+
X, y = load_diabetes(return_X_y=True)
3515
metric = "accuracy"
3616
method = "grid"
3717

38-
model = create_model("logistic_regression")
18+
model = create_model("logistic_regression", penalty="elasticnet", solver="saga")
19+
best_estimator = model.find_best(
20+
parameters=parameters,
21+
X=X,
22+
y=y,
23+
metric=metric,
24+
method=method,
25+
)
26+
assert isinstance(best_estimator, SKModel)
27+
28+
29+
def test_find_best_random_search():
30+
"""Test find_best method with random search."""
31+
parameters = {"alpha": [0.001], "hidden_layer_sizes": [10]}
32+
X, y = load_diabetes(return_X_y=True)
33+
metric = "accuracy"
34+
method = "random"
35+
36+
model = create_model("mlp_classifier", early_stopping=True)
37+
best_estimator = model.find_best(
38+
parameters=parameters,
39+
X=X,
40+
y=y,
41+
metric=metric,
42+
method=method,
43+
)
44+
assert isinstance(best_estimator, SKModel)
45+
46+
47+
def test_find_best_hf_dataset_input():
48+
"""Test find_best method with huggingface dataset input."""
49+
parameters = {"alpha": [0.001], "hidden_layer_sizes": [10]}
50+
data = load_diabetes(as_frame=True)
51+
X, y = data["data"], data["target"]
52+
X_y = pd.concat([X, y], axis=1)
53+
features_names = data["feature_names"]
54+
dataset = Dataset.from_pandas(X_y)
55+
metric = "accuracy"
56+
method = "random"
57+
58+
model = create_model("mlp_classifier", early_stopping=True)
3959
best_estimator = model.find_best(
40-
parameters,
41-
X,
42-
y,
43-
feature_columns,
44-
target_columns,
45-
transforms,
46-
metric,
47-
method,
60+
parameters=parameters,
61+
X=dataset,
62+
metric=metric,
63+
method=method,
64+
feature_columns=features_names,
65+
target_columns="target",
4866
)
49-
assert best_estimator.l1_ratio == 0.25
50-
assert best_estimator.C == 1
67+
assert isinstance(best_estimator, SKModel)

tests/cyclops/tasks/test_classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class TestBinaryTabularClassificationTask(TestCase):
1919

2020
def setUp(self):
2121
"""Set up for testing."""
22-
self.model_name = "mlp"
22+
self.model_name = "mlp_classifier"
2323
self.model = create_model(self.model_name)
2424
self.test_task = BinaryTabularClassificationTask(
2525
{self.model_name: self.model},

0 commit comments

Comments
 (0)