From f55d35e8844700790dde575e2847debafee53ae7 Mon Sep 17 00:00:00 2001
From: godcallray <godcallray@gmail.com>
Date: Mon, 28 Nov 2022 23:24:28 -0500
Subject: [PATCH 01/12] ml

---
 flaml/ml.py                          | 11 +++++++----
 test/automl/test_notebook_example.py | 15 ++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/flaml/ml.py b/flaml/ml.py
index e521c32395..1590a5f237 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -52,6 +52,7 @@
     "mse",
     "accuracy",
     "roc_auc",
+    "roc_auc_weighted",
     "roc_auc_ovr",
     "roc_auc_ovo",
     "log_loss",
@@ -192,7 +193,7 @@ def metric_loss_score(
                 metric_name
                 + " is not an built-in sklearn metric and nlp is not installed. "
                 "Currently built-in sklearn metrics are: "
-                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
+                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo,"
                 "log_loss, mape, f1, micro_f1, macro_f1, ap. "
                 "If the metric is an nlp metric, please pip install flaml[nlp] ",
                 "or pass a customized metric function to AutoML.fit(metric=func)",
@@ -203,7 +204,7 @@ def metric_loss_score(
             raise ValueError(
                 metric_name + " is neither an sklearn metric nor a huggingface metric. "
                 "Currently built-in sklearn metrics are: "
-                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
+                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo,"
                 "log_loss, mape, f1, micro_f1, macro_f1, ap. "
                 "Currently built-in huggingface metrics are: "
                 + ", ".join(huggingface_metric_to_mode.keys())
@@ -268,6 +269,8 @@ def sklearn_metric_loss_score(
         score = 1.0 - accuracy_score(y_true, y_predict, sample_weight=sample_weight)
     elif metric_name == "roc_auc":
         score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight)
+    elif metric_name == "roc_auc_weighted":
+        score = 1.0 - roc_auc_score(y_true, y_predict, average=weighted, sample_weight=sample_weight)
     elif metric_name == "roc_auc_ovr":
         score = 1.0 - roc_auc_score(
             y_true, y_predict, sample_weight=sample_weight, multi_class="ovr"
@@ -318,10 +321,10 @@ def sklearn_metric_loss_score(
 
 
 def get_y_pred(estimator, X, eval_metric, obj):
-    if eval_metric in ["roc_auc", "ap"] and "binary" in obj:
+    if eval_metric in ["roc_auc", "roc_auc_weighted", "ap"] and "binary" in obj:
         y_pred_classes = estimator.predict_proba(X)
         y_pred = y_pred_classes[:, 1] if y_pred_classes.ndim > 1 else y_pred_classes
-    elif eval_metric in ["log_loss", "roc_auc", "roc_auc_ovr", "roc_auc_ovo"]:
+    elif eval_metric in ["log_loss", "roc_auc", "roc_auc_weighted", "roc_auc_ovr", "roc_auc_ovo"]:
         y_pred = estimator.predict_proba(X)
     else:
         y_pred = estimator.predict(X)
diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py
index 818f7af363..024d35b711 100644
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -75,6 +75,9 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
     print(
         "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
     )
+    print(
+        "roc_auc_weighted", "=", 1 - sklearn_metric_loss_score("roc_auc_weighted", y_pred_proba, y_test)
+    )
     print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
     if budget is None:
         assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
@@ -108,7 +111,10 @@ def _test_nobudget():
 
 
 def test_mlflow():
-    # subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
+    import subprocess
+    import sys
+
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
     import mlflow
     from flaml.data import load_openml_task
 
@@ -149,12 +155,9 @@ def test_mlflow():
         print(automl.predict_proba(X_test))
     except ImportError:
         pass
+    # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
 
-
-def test_mlflow_iris():
     from sklearn.datasets import load_iris
-    import mlflow
-    from flaml import AutoML
 
     with mlflow.start_run():
         automl = AutoML()
@@ -167,8 +170,6 @@ def test_mlflow_iris():
         X_train, y_train = load_iris(return_X_y=True)
         automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
 
-    # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
-
 
 if __name__ == "__main__":
     test_automl(600)

From 6dd3c7f0c17f5b137086d75c813c67304a34ccb3 Mon Sep 17 00:00:00 2001
From: godcallray <godcallray@gmail.com>
Date: Tue, 29 Nov 2022 23:02:40 +0800
Subject: [PATCH 02/12] logger

---
 flaml/automl.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/flaml/automl.py b/flaml/automl.py
index 4e2772ed56..e1d74fb4c8 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -98,8 +98,10 @@ def valid_starting_point_one_dim(self, value_one_dim, domain_one_dim):
                 and renamed_type is float
             )
             if not (type_match and domain_one_dim.is_valid(value_one_dim)):
+                logger.error("Starting_points is not provided in the right format")
                 return False
         elif value_one_dim != domain_one_dim:
+            logger.error("Starting_points is not provided in the right format")
             return False
         return True
 
@@ -3396,6 +3398,13 @@ def _search_sequential(self):
                     state.best_config,
                     self.data_size_full,
                 )
+
+                if(self._trained_estimator.params[self._trained_estimaotr.ITER_HP]!=self.best_config[self._trained_estimaotr.ITER_HP]):
+                    logger.warning(
+                        "early stopping happened"
+                    )
+
+
                 logger.info(
                     "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)
                 )

From 71193c4a0a72d87c2ef986aa539abce7ca4fe43a Mon Sep 17 00:00:00 2001
From: godcallray <godcallray@gmail.com>
Date: Wed, 30 Nov 2022 10:31:53 +0800
Subject: [PATCH 03/12] backml

---
 flaml/ml.py                          | 11 ++++-------
 test/automl/test_notebook_example.py | 15 +++++++--------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/flaml/ml.py b/flaml/ml.py
index 1590a5f237..e521c32395 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -52,7 +52,6 @@
     "mse",
     "accuracy",
     "roc_auc",
-    "roc_auc_weighted",
     "roc_auc_ovr",
     "roc_auc_ovo",
     "log_loss",
@@ -193,7 +192,7 @@ def metric_loss_score(
                 metric_name
                 + " is not an built-in sklearn metric and nlp is not installed. "
                 "Currently built-in sklearn metrics are: "
-                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo,"
+                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
                 "log_loss, mape, f1, micro_f1, macro_f1, ap. "
                 "If the metric is an nlp metric, please pip install flaml[nlp] ",
                 "or pass a customized metric function to AutoML.fit(metric=func)",
@@ -204,7 +203,7 @@ def metric_loss_score(
             raise ValueError(
                 metric_name + " is neither an sklearn metric nor a huggingface metric. "
                 "Currently built-in sklearn metrics are: "
-                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo,"
+                "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
                 "log_loss, mape, f1, micro_f1, macro_f1, ap. "
                 "Currently built-in huggingface metrics are: "
                 + ", ".join(huggingface_metric_to_mode.keys())
@@ -269,8 +268,6 @@ def sklearn_metric_loss_score(
         score = 1.0 - accuracy_score(y_true, y_predict, sample_weight=sample_weight)
     elif metric_name == "roc_auc":
         score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight)
-    elif metric_name == "roc_auc_weighted":
-        score = 1.0 - roc_auc_score(y_true, y_predict, average=weighted, sample_weight=sample_weight)
     elif metric_name == "roc_auc_ovr":
         score = 1.0 - roc_auc_score(
             y_true, y_predict, sample_weight=sample_weight, multi_class="ovr"
@@ -321,10 +318,10 @@ def sklearn_metric_loss_score(
 
 
 def get_y_pred(estimator, X, eval_metric, obj):
-    if eval_metric in ["roc_auc", "roc_auc_weighted", "ap"] and "binary" in obj:
+    if eval_metric in ["roc_auc", "ap"] and "binary" in obj:
         y_pred_classes = estimator.predict_proba(X)
         y_pred = y_pred_classes[:, 1] if y_pred_classes.ndim > 1 else y_pred_classes
-    elif eval_metric in ["log_loss", "roc_auc", "roc_auc_weighted", "roc_auc_ovr", "roc_auc_ovo"]:
+    elif eval_metric in ["log_loss", "roc_auc", "roc_auc_ovr", "roc_auc_ovo"]:
         y_pred = estimator.predict_proba(X)
     else:
         y_pred = estimator.predict(X)
diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py
index 024d35b711..818f7af363 100644
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -75,9 +75,6 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
     print(
         "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test)
     )
-    print(
-        "roc_auc_weighted", "=", 1 - sklearn_metric_loss_score("roc_auc_weighted", y_pred_proba, y_test)
-    )
     print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
     if budget is None:
         assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
@@ -111,10 +108,7 @@ def _test_nobudget():
 
 
 def test_mlflow():
-    import subprocess
-    import sys
-
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
+    # subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
     import mlflow
     from flaml.data import load_openml_task
 
@@ -155,9 +149,12 @@ def test_mlflow():
         print(automl.predict_proba(X_test))
     except ImportError:
         pass
-    # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
 
+
+def test_mlflow_iris():
     from sklearn.datasets import load_iris
+    import mlflow
+    from flaml import AutoML
 
     with mlflow.start_run():
         automl = AutoML()
@@ -170,6 +167,8 @@ def test_mlflow():
         X_train, y_train = load_iris(return_X_y=True)
         automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
 
+    # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
+
 
 if __name__ == "__main__":
     test_automl(600)

From 5f6c854a1ecd6f533c12d5f2e4dda5a89b1d0229 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Mon, 26 Dec 2022 14:16:19 -0800
Subject: [PATCH 04/12] Add warning if starting_points is not the right format

---
 flaml/automl/automl.py | 76 +++++++++++++++++++++++++++---------------
 1 file changed, 49 insertions(+), 27 deletions(-)

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index d446330e9c..23ff5ff064 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -2728,32 +2728,54 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
 
         _sample_size_from_starting_points = {}
         if isinstance(starting_points, dict):
-            for _estimator, _point_per_estimator in starting_points.items():
-                sample_size = (
-                    _point_per_estimator
-                    and isinstance(_point_per_estimator, dict)
-                    and _point_per_estimator.get("FLAML_sample_size")
+            _estimators_from_starting_points = starting_points.keys()
+            if not any(
+                i in self.estimator_list for i in _estimators_from_starting_points
+            ):
+                logger.warning(
+                    "The proivded starting_points does not contain relevant estimators as keys"
+                    " and is thus NOT used. Please check the format of starting_points."
                 )
-                if sample_size:
-                    _sample_size_from_starting_points[_estimator] = sample_size
-                elif _point_per_estimator and isinstance(_point_per_estimator, list):
-                    _sample_size_set = set(
-                        [
-                            config["FLAML_sample_size"]
-                            for config in _point_per_estimator
-                            if "FLAML_sample_size" in config
-                        ]
-                    )
-                    if _sample_size_set:
-                        _sample_size_from_starting_points[_estimator] = min(
-                            _sample_size_set
-                        )
-                    if len(_sample_size_set) > 1:
+            else:
+                for _estimator, _point_per_estimator in starting_points.items():
+
+                    if not isinstance(_point_per_estimator, dict) and not isinstance(
+                        _point_per_estimator, list
+                    ):
                         logger.warning(
-                            "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format(
-                                _estimator, _sample_size_set
+                            "Starting_points for estimator {} is not provide in the format and is thus NOT used!"
+                            "When the starting_points is a dict, the keys are the name of the estimators, and the"
+                            "values should be hyperparamter configuration dicts or lists of hyperparamter configuration dicts".format(
+                                _estimator
                             )
                         )
+                    sample_size = (
+                        _point_per_estimator
+                        and isinstance(_point_per_estimator, dict)
+                        and _point_per_estimator.get("FLAML_sample_size")
+                    )
+                    if sample_size:
+                        _sample_size_from_starting_points[_estimator] = sample_size
+                    elif _point_per_estimator and isinstance(
+                        _point_per_estimator, list
+                    ):
+                        _sample_size_set = set(
+                            [
+                                config["FLAML_sample_size"]
+                                for config in _point_per_estimator
+                                if "FLAML_sample_size" in config
+                            ]
+                        )
+                        if _sample_size_set:
+                            _sample_size_from_starting_points[_estimator] = min(
+                                _sample_size_set
+                            )
+                        if len(_sample_size_set) > 1:
+                            logger.warning(
+                                "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format(
+                                    _estimator, _sample_size_set
+                                )
+                            )
 
         if not sample and isinstance(starting_points, dict):
             assert (
@@ -3560,11 +3582,11 @@ def _search_sequential(self):
                     self.data_size_full,
                 )
 
-                if(self._trained_estimator.params[self._trained_estimaotr.ITER_HP]!=self.best_config[self._trained_estimaotr.ITER_HP]):
-                    logger.warning(
-                        "early stopping happened"
-                    )
-
+                if (
+                    self._trained_estimator.params[self._trained_estimaotr.ITER_HP]
+                    != self.best_config[self._trained_estimaotr.ITER_HP]
+                ):
+                    logger.warning("early stopping happened")
 
                 logger.info(
                     "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)

From 2e209d14c57cc6c1b1c84c3459a64db4a1fef977 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Mon, 26 Dec 2022 14:45:57 -0800
Subject: [PATCH 05/12] simplify code

---
 flaml/automl/automl.py | 156 +++++++++++++++++++----------------------
 1 file changed, 72 insertions(+), 84 deletions(-)

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 23ff5ff064..ae2fdf4164 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -111,10 +111,8 @@ def valid_starting_point_one_dim(self, value_one_dim, domain_one_dim):
                 and renamed_type is float
             )
             if not (type_match and domain_one_dim.is_valid(value_one_dim)):
-                logger.error("Starting_points is not provided in the right format")
                 return False
         elif value_one_dim != domain_one_dim:
-            logger.error("Starting_points is not provided in the right format")
             return False
         return True
 
@@ -2726,56 +2724,86 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
         )
         self._auto_augment = auto_augment
 
-        _sample_size_from_starting_points = {}
+        if "auto" == estimator_list:
+            if self._state.task == "rank":
+                estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"]
+            elif _is_nlp_task(self._state.task):
+                estimator_list = ["transformer"]
+            elif self._state.task == TS_FORECASTPANEL:
+                estimator_list = ["tft"]
+            else:
+                try:
+                    import catboost
+
+                    estimator_list = [
+                        "lgbm",
+                        "rf",
+                        "catboost",
+                        "xgboost",
+                        "extra_tree",
+                        "xgb_limitdepth",
+                    ]
+                except ImportError:
+                    estimator_list = [
+                        "lgbm",
+                        "rf",
+                        "xgboost",
+                        "extra_tree",
+                        "xgb_limitdepth",
+                    ]
+                if self._state.task in TS_FORECAST:
+                    # catboost is removed because it has a `name` parameter, making it incompatible with hcrystalball
+                    if "catboost" in estimator_list:
+                        estimator_list.remove("catboost")
+                    if self._state.task in TS_FORECASTREGRESSION:
+                        try:
+                            import prophet
+
+                            estimator_list += ["prophet", "arima", "sarimax"]
+                        except ImportError:
+                            estimator_list += ["arima", "sarimax"]
+                elif "regression" != self._state.task:
+                    estimator_list += ["lrl1"]
+
         if isinstance(starting_points, dict):
             _estimators_from_starting_points = starting_points.keys()
-            if not any(
-                i in self.estimator_list for i in _estimators_from_starting_points
-            ):
+            if not any(i in estimator_list for i in _estimators_from_starting_points):
                 logger.warning(
-                    "The proivded starting_points does not contain relevant estimators as keys"
-                    " and is thus NOT used. Please check the format of starting_points."
+                    "The proivded starting_points {} is removed as it does not contain relevant estimators as keys"
+                    " and is thus NOT used. Please check the required format of starting_points.".format(
+                        starting_points
+                    )
                 )
-            else:
-                for _estimator, _point_per_estimator in starting_points.items():
+                starting_points = {}
 
-                    if not isinstance(_point_per_estimator, dict) and not isinstance(
-                        _point_per_estimator, list
-                    ):
-                        logger.warning(
-                            "Starting_points for estimator {} is not provide in the format and is thus NOT used!"
-                            "When the starting_points is a dict, the keys are the name of the estimators, and the"
-                            "values should be hyperparamter configuration dicts or lists of hyperparamter configuration dicts".format(
-                                _estimator
-                            )
-                        )
-                    sample_size = (
-                        _point_per_estimator
-                        and isinstance(_point_per_estimator, dict)
-                        and _point_per_estimator.get("FLAML_sample_size")
+        _sample_size_from_starting_points = {}
+        if isinstance(starting_points, dict):
+            for _estimator, _point_per_estimator in starting_points.items():
+                sample_size = (
+                    _point_per_estimator
+                    and isinstance(_point_per_estimator, dict)
+                    and _point_per_estimator.get("FLAML_sample_size")
+                )
+                if sample_size:
+                    _sample_size_from_starting_points[_estimator] = sample_size
+                elif _point_per_estimator and isinstance(_point_per_estimator, list):
+                    _sample_size_set = set(
+                        [
+                            config["FLAML_sample_size"]
+                            for config in _point_per_estimator
+                            if "FLAML_sample_size" in config
+                        ]
                     )
-                    if sample_size:
-                        _sample_size_from_starting_points[_estimator] = sample_size
-                    elif _point_per_estimator and isinstance(
-                        _point_per_estimator, list
-                    ):
-                        _sample_size_set = set(
-                            [
-                                config["FLAML_sample_size"]
-                                for config in _point_per_estimator
-                                if "FLAML_sample_size" in config
-                            ]
+                    if _sample_size_set:
+                        _sample_size_from_starting_points[_estimator] = min(
+                            _sample_size_set
                         )
-                        if _sample_size_set:
-                            _sample_size_from_starting_points[_estimator] = min(
-                                _sample_size_set
-                            )
-                        if len(_sample_size_set) > 1:
-                            logger.warning(
-                                "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format(
-                                    _estimator, _sample_size_set
-                                )
+                    if len(_sample_size_set) > 1:
+                        logger.warning(
+                            "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format(
+                                _estimator, _sample_size_set
                             )
+                        )
 
         if not sample and isinstance(starting_points, dict):
             assert (
@@ -2867,46 +2895,6 @@ def is_to_reverse_metric(metric, task):
             error_metric = "customized metric"
         logger.info(f"Minimizing error metric: {error_metric}")
 
-        if "auto" == estimator_list:
-            if self._state.task == "rank":
-                estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"]
-            elif _is_nlp_task(self._state.task):
-                estimator_list = ["transformer"]
-            elif self._state.task == TS_FORECASTPANEL:
-                estimator_list = ["tft"]
-            else:
-                try:
-                    import catboost
-
-                    estimator_list = [
-                        "lgbm",
-                        "rf",
-                        "catboost",
-                        "xgboost",
-                        "extra_tree",
-                        "xgb_limitdepth",
-                    ]
-                except ImportError:
-                    estimator_list = [
-                        "lgbm",
-                        "rf",
-                        "xgboost",
-                        "extra_tree",
-                        "xgb_limitdepth",
-                    ]
-                if self._state.task in TS_FORECAST:
-                    # catboost is removed because it has a `name` parameter, making it incompatible with hcrystalball
-                    if "catboost" in estimator_list:
-                        estimator_list.remove("catboost")
-                    if self._state.task in TS_FORECASTREGRESSION:
-                        try:
-                            import prophet
-
-                            estimator_list += ["prophet", "arima", "sarimax"]
-                        except ImportError:
-                            estimator_list += ["arima", "sarimax"]
-                elif "regression" != self._state.task:
-                    estimator_list += ["lrl1"]
         # When no search budget is specified
         if no_budget:
             max_iter = len(estimator_list)

From 1f3a4bb4754044cca772b0ae788ccb3040908361 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Mon, 26 Dec 2022 15:47:01 -0800
Subject: [PATCH 06/12] typo

---
 flaml/automl/automl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index ae2fdf4164..9af1bb2afe 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -3571,8 +3571,8 @@ def _search_sequential(self):
                 )
 
                 if (
-                    self._trained_estimator.params[self._trained_estimaotr.ITER_HP]
-                    != self.best_config[self._trained_estimaotr.ITER_HP]
+                    self._trained_estimator.params[self._trained_estimator.ITER_HP]
+                    != self.best_config[self._trained_estimator.ITER_HP]
                 ):
                     logger.warning("early stopping happened")
 

From 5db2ca7f769be4b08918ac067e036b49eb16abc3 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Thu, 29 Dec 2022 21:05:09 -0800
Subject: [PATCH 07/12] add test for the warmstart warning

---
 test/automl/test_warmstart.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py
index 74cf8c37f2..d65176b516 100644
--- a/test/automl/test_warmstart.py
+++ b/test/automl/test_warmstart.py
@@ -219,6 +219,17 @@ def test_FLAML_sample_size_in_starting_points(self):
         except AssertionError:
             pass
 
+        # In the following test case, the starting_points is not provided in the
+        # right format and thus we expect a warning for removing the provided
+        # starting_points when the fit function is called
+        automl5 = AutoML()
+        automl_settings["starting_points"] = automl3.best_config
+        automl5.fit(
+            X_train,
+            y_train,
+            **automl_settings,
+        )
+
 
 if __name__ == "__main__":
     unittest.main()

From 3fb9bd4a4e53792bdbb4aaf0fdebc384a49ab6f6 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Fri, 30 Dec 2022 13:46:48 -0800
Subject: [PATCH 08/12] revise warning condition

---
 flaml/automl/automl.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 9af1bb2afe..72b432ee7f 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -3569,12 +3569,24 @@ def _search_sequential(self):
                     state.best_config,
                     self.data_size_full,
                 )
-
-                if (
-                    self._trained_estimator.params[self._trained_estimator.ITER_HP]
-                    != self.best_config[self._trained_estimator.ITER_HP]
+                if getattr(self._trained_estimator, "params", {}) and getattr(
+                    self._trained_estimator, "ITER_HP", None
                 ):
-                    logger.warning("early stopping happened")
+                    _hp_trained_iter = self._trained_estimator.params.get(
+                        self._trained_estimator.ITER_HP
+                    )
+                    _best_config_iter = self.best_config.get(
+                        self._trained_estimator.ITER_HP
+                    )
+                    if _hp_trained_iter != _best_config_iter:
+                        logger.warning(
+                            "Early stopping happened when retraining a model with the best configuration."
+                            f" The best config's ITER_HP is {_best_config_iter}"
+                            f" and the actual ITER_HP used for retraining the model is {_hp_trained_iter}."
+                            " This early stopping happens because because flaml needs to do its best effort to"
+                            " retrain without violating the time budget when retrain_full is set to 'budget'. "
+                            " If this mismatch is not desired, please set retrain_full to True."
+                        )
 
                 logger.info(
                     "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)

From ba5f61fbfcf07eb2598279cd06ba6d066a6262fb Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Fri, 30 Dec 2022 13:49:11 -0800
Subject: [PATCH 09/12] fix typo

---
 flaml/automl/automl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 72b432ee7f..74d270f2de 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -3583,7 +3583,7 @@ def _search_sequential(self):
                             "Early stopping happened when retraining a model with the best configuration."
                             f" The best config's ITER_HP is {_best_config_iter}"
                             f" and the actual ITER_HP used for retraining the model is {_hp_trained_iter}."
-                            " This early stopping happens because because flaml needs to do its best effort to"
+                            " This early stopping happens because flaml needs to do its best effort to"
                             " retrain without violating the time budget when retrain_full is set to 'budget'. "
                             " If this mismatch is not desired, please set retrain_full to True."
                         )

From 773c7ed79c9123f0ecb3288ae220c0a091076ced Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Sun, 1 Jan 2023 10:13:10 -0500
Subject: [PATCH 10/12] Update flaml/automl/automl.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 flaml/automl/automl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 74d270f2de..0585043652 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -3581,8 +3581,8 @@ def _search_sequential(self):
                     if _hp_trained_iter != _best_config_iter:
                         logger.warning(
                             "Early stopping happened when retraining a model with the best configuration."
-                            f" The best config's ITER_HP is {_best_config_iter}"
-                            f" and the actual ITER_HP used for retraining the model is {_hp_trained_iter}."
+                            f" The best config's {self._trained_estimator.ITER_HP} is {_best_config_iter}"
+                            f" and the actual {self._trained_estimator.ITER_HP} used for retraining the model is {_hp_trained_iter}."
                             " This early stopping happens because flaml needs to do its best effort to"
                             " retrain without violating the time budget when retrain_full is set to 'budget'. "
                             " If this mismatch is not desired, please set retrain_full to True."

From af19cef4ad02c0c51de6ebbef0528677dfb1848f Mon Sep 17 00:00:00 2001
From: godcallray <godcallray@gmail.com>
Date: Mon, 2 Jan 2023 18:59:08 +0800
Subject: [PATCH 11/12] add_test

---
 test/automl/test_warmstart.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py
index d65176b516..cf5b030140 100644
--- a/test/automl/test_warmstart.py
+++ b/test/automl/test_warmstart.py
@@ -231,5 +231,31 @@ def test_FLAML_sample_size_in_starting_points(self):
         )
 
 
+        automl5 = AutoML()
+        settings = {
+            "time_budget": 10,  # total running time in seconds
+            "metric": 'r2',  # primary metrics for regression can be chosen from: ['mae','mse','r2']
+            "estimator_list": ['lgbm'],  # list of ML learners; we tune lightgbm in this example
+            "task": 'regression',  # task type
+            "log_file_name": 'houses_experiment.log',  # flaml log file
+            "log_type": 'all',
+            "seed": 0,  # random seed
+        }
+        automl5.fit(X_train=X_train, y_train=y_train, **settings)
+        warm_start_config = automl5.best_config
+
+        # fitting again
+        settings = {
+            "time_budget": 10,  # total running time in seconds
+            "metric": 'r2',  # primary metrics for regression can be chosen from: ['mae','mse','r2']
+            "estimator_list": ['lgbm'],  # list of ML learners; we tune lightgbm in this example
+            "task": 'regression',  # task type
+            "log_file_name": 'houses_experiment2.log',  # flaml log file (changed name from last run)
+            "log_type": 'all',
+            "seed": 0,  # random seed
+        }
+        automl5.fit(X_train=X_train, y_train=y_train, **settings, starting_points=warm_start_config)
+
+
 if __name__ == "__main__":
     unittest.main()

From 72dd4baa5ea92ed6078a7a96d06e9ba69bda29ec Mon Sep 17 00:00:00 2001
From: godcallray <62365520+godcallray@users.noreply.github.com>
Date: Fri, 6 Jan 2023 02:15:23 -0500
Subject: [PATCH 12/12] Update test_warmstart.py

---
 test/automl/test_warmstart.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py
index cf5b030140..8aaedb0289 100644
--- a/test/automl/test_warmstart.py
+++ b/test/automl/test_warmstart.py
@@ -229,33 +229,5 @@ def test_FLAML_sample_size_in_starting_points(self):
             y_train,
             **automl_settings,
         )
-
-
-        automl5 = AutoML()
-        settings = {
-            "time_budget": 10,  # total running time in seconds
-            "metric": 'r2',  # primary metrics for regression can be chosen from: ['mae','mse','r2']
-            "estimator_list": ['lgbm'],  # list of ML learners; we tune lightgbm in this example
-            "task": 'regression',  # task type
-            "log_file_name": 'houses_experiment.log',  # flaml log file
-            "log_type": 'all',
-            "seed": 0,  # random seed
-        }
-        automl5.fit(X_train=X_train, y_train=y_train, **settings)
-        warm_start_config = automl5.best_config
-
-        # fitting again
-        settings = {
-            "time_budget": 10,  # total running time in seconds
-            "metric": 'r2',  # primary metrics for regression can be chosen from: ['mae','mse','r2']
-            "estimator_list": ['lgbm'],  # list of ML learners; we tune lightgbm in this example
-            "task": 'regression',  # task type
-            "log_file_name": 'houses_experiment2.log',  # flaml log file (changed name from last run)
-            "log_type": 'all',
-            "seed": 0,  # random seed
-        }
-        automl5.fit(X_train=X_train, y_train=y_train, **settings, starting_points=warm_start_config)
-
-
 if __name__ == "__main__":
     unittest.main()