Additional fixes, test

amrit110 · amrit110 · commit fda8df4cdc16 · 2023-12-15T16:55:55.000-05:00
diff --git a/cyclops/models/__init__.py b/cyclops/models/__init__.py
@@ -40,7 +40,7 @@
 register_model(name="sgd_regressor", model_type="static")(SGDRegressor)
 register_model("rf_classifier", model_type="static")(RandomForestClassifier)
 register_model("logistic_regression", model_type="static")(LogisticRegression)
-register_model("mlp", model_type="static")(MLPClassifier)
+register_model("mlp_classifier", model_type="static")(MLPClassifier)
 if XGBClassifier is not None:
     register_model("xgb_classifier", model_type="static")(XGBClassifier)
 if DenseNet is not None:
diff --git a/cyclops/models/catalog.py b/cyclops/models/catalog.py
@@ -224,7 +224,7 @@ def create_model(
             raise RuntimeError(_xgboost_unavailable_message)
         if model_name in ["densenet", "resnet"]:
             raise RuntimeError(_torchxrayvision_unavailable_message)
-        if model_name in ["gru", "lstm", "mlp", "rnn"]:
+        if model_name in ["gru", "lstm", "mlp_pt", "rnn"]:
             raise RuntimeError(_torch_unavailable_message)
         similar_keys_list: List[str] = get_close_matches(
             model_name,
diff --git a/cyclops/models/configs/mlp_classifier.yaml b/cyclops/models/configs/mlp_classifier.yaml
diff --git a/cyclops/models/wrappers/sk_model.py b/cyclops/models/wrappers/sk_model.py
@@ -187,7 +187,7 @@ def find_best(  # noqa: PLR0912, PLR0915
         if isinstance(X, (Dataset, DatasetDict)):
             if feature_columns is None:
                 raise ValueError(
-                    "Missing target columns 'target_columns'. Please provide \
+                    "Missing target columns 'feature_columns'. Please provide \
                     the name of feature columns when using a \
                     Hugging Face dataset as the input.",
                 )
diff --git a/tests/cyclops/models/wrappers/test_sk_model.py b/tests/cyclops/models/wrappers/test_sk_model.py
@@ -1,50 +1,67 @@
 """Tests for scikit-learn model wrapper."""
 
-import numpy as np
+import pandas as pd
+from datasets import Dataset
+from sklearn.datasets import load_diabetes
 
 from cyclops.models import create_model
+from cyclops.models.wrappers import SKModel
 
 
 def test_find_best_grid_search():
     """Test find_best method with grid search."""
-    parameters = {"C": [1, 2, 3], "l1_ratio": [0.25, 0.5, 0.75]}
-    X = np.array(
-        [
-            [1, 2, 3],
-            [4, 5, 6],
-            [7, 8, 9],
-            [0, 0, 0],
-            [1, 1, 1],
-            [2, 2, 2],
-            [3, 3, 3],
-            [1, 3, 1],
-            [2, 3, 2],
-            [3, 3, 3],
-            [1, 2, 1],
-            [2, 2, 1],
-            [3, 2, 1],
-            [1, 1, 1],
-            [2, 1, 1],
-            [3, 1, 1],
-        ],
-    )
-    y = np.array([1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2])
-    feature_columns = ["feature1", "feature2", "feature3"]
-    target_columns = ["target"]
-    transforms = None
+    parameters = {"C": [1], "l1_ratio": [0.5]}
+    X, y = load_diabetes(return_X_y=True)
     metric = "accuracy"
     method = "grid"
 
-    model = create_model("logistic_regression")
+    model = create_model("logistic_regression", penalty="elasticnet", solver="saga")
+    best_estimator = model.find_best(
+        parameters=parameters,
+        X=X,
+        y=y,
+        metric=metric,
+        method=method,
+    )
+    assert isinstance(best_estimator, SKModel)
+
+
+def test_find_best_random_search():
+    """Test find_best method with random search."""
+    parameters = {"alpha": [0.001], "hidden_layer_sizes": [10]}
+    X, y = load_diabetes(return_X_y=True)
+    metric = "accuracy"
+    method = "random"
+
+    model = create_model("mlp_classifier", early_stopping=True)
+    best_estimator = model.find_best(
+        parameters=parameters,
+        X=X,
+        y=y,
+        metric=metric,
+        method=method,
+    )
+    assert isinstance(best_estimator, SKModel)
+
+
+def test_find_best_hf_dataset_input():
+    """Test find_best method with huggingface dataset input."""
+    parameters = {"alpha": [0.001], "hidden_layer_sizes": [10]}
+    data = load_diabetes(as_frame=True)
+    X, y = data["data"], data["target"]
+    X_y = pd.concat([X, y], axis=1)
+    features_names = data["feature_names"]
+    dataset = Dataset.from_pandas(X_y)
+    metric = "accuracy"
+    method = "random"
+
+    model = create_model("mlp_classifier", early_stopping=True)
     best_estimator = model.find_best(
-        parameters,
-        X,
-        y,
-        feature_columns,
-        target_columns,
-        transforms,
-        metric,
-        method,
+        parameters=parameters,
+        X=dataset,
+        metric=metric,
+        method=method,
+        feature_columns=features_names,
+        target_columns="target",
     )
-    assert best_estimator.l1_ratio == 0.25
-    assert best_estimator.C == 1
+    assert isinstance(best_estimator, SKModel)
diff --git a/tests/cyclops/tasks/test_classification.py b/tests/cyclops/tasks/test_classification.py
@@ -19,7 +19,7 @@ class TestBinaryTabularClassificationTask(TestCase):
 
     def setUp(self):
         """Set up for testing."""
-        self.model_name = "mlp"
+        self.model_name = "mlp_classifier"
         self.model = create_model(self.model_name)
         self.test_task = BinaryTabularClassificationTask(
             {self.model_name: self.model},

Original file line number	Diff line number	Diff line change
`@@ -187,7 +187,7 @@ def find_best( # noqa: PLR0912, PLR0915`
`187`	`187`	`if isinstance(X, (Dataset, DatasetDict)):`
`188`	`188`	`if feature_columns is None:`
`189`	`189`	`raise ValueError(`
`190`		`- "Missing target columns 'target_columns'. Please provide \`
	`190`	`+ "Missing target columns 'feature_columns'. Please provide \`
`191`	`191`	`the name of feature columns when using a \`
`192`	`192`	`Hugging Face dataset as the input.",`
`193`	`193`	`)`