From f55d35e8844700790dde575e2847debafee53ae7 Mon Sep 17 00:00:00 2001 From: godcallray Date: Mon, 28 Nov 2022 23:24:28 -0500 Subject: [PATCH 01/12] ml --- flaml/ml.py | 11 +++++++---- test/automl/test_notebook_example.py | 15 ++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/flaml/ml.py b/flaml/ml.py index e521c32395..1590a5f237 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -52,6 +52,7 @@ "mse", "accuracy", "roc_auc", + "roc_auc_weighted", "roc_auc_ovr", "roc_auc_ovo", "log_loss", @@ -192,7 +193,7 @@ def metric_loss_score( metric_name + " is not an built-in sklearn metric and nlp is not installed. " "Currently built-in sklearn metrics are: " - "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo," + "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo," "log_loss, mape, f1, micro_f1, macro_f1, ap. " "If the metric is an nlp metric, please pip install flaml[nlp] ", "or pass a customized metric function to AutoML.fit(metric=func)", @@ -203,7 +204,7 @@ def metric_loss_score( raise ValueError( metric_name + " is neither an sklearn metric nor a huggingface metric. " "Currently built-in sklearn metrics are: " - "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo," + "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo," "log_loss, mape, f1, micro_f1, macro_f1, ap. " "Currently built-in huggingface metrics are: " + ", ".join(huggingface_metric_to_mode.keys()) @@ -268,6 +269,8 @@ def sklearn_metric_loss_score( score = 1.0 - accuracy_score(y_true, y_predict, sample_weight=sample_weight) elif metric_name == "roc_auc": score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight) + elif metric_name == "roc_auc_weighted": + score = 1.0 - roc_auc_score(y_true, y_predict, average=weighted, sample_weight=sample_weight) elif metric_name == "roc_auc_ovr": score = 1.0 - roc_auc_score( y_true, y_predict, sample_weight=sample_weight, multi_class="ovr" @@ -318,10 +321,10 @@ def sklearn_metric_loss_score( def get_y_pred(estimator, X, eval_metric, obj): - if eval_metric in ["roc_auc", "ap"] and "binary" in obj: + if eval_metric in ["roc_auc", "roc_auc_weighted", "ap"] and "binary" in obj: y_pred_classes = estimator.predict_proba(X) y_pred = y_pred_classes[:, 1] if y_pred_classes.ndim > 1 else y_pred_classes - elif eval_metric in ["log_loss", "roc_auc", "roc_auc_ovr", "roc_auc_ovo"]: + elif eval_metric in ["log_loss", "roc_auc", "roc_auc_weighted", "roc_auc_ovr", "roc_auc_ovo"]: y_pred = estimator.predict_proba(X) else: y_pred = estimator.predict(X) diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index 818f7af363..024d35b711 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -75,6 +75,9 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None): print( "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test) ) + print( + "roc_auc_weighted", "=", 1 - sklearn_metric_loss_score("roc_auc_weighted", y_pred_proba, y_test) + ) print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test)) if budget is None: assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67" @@ -108,7 +111,10 @@ def _test_nobudget(): def test_mlflow(): - # subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) + import subprocess + import sys + + subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) import mlflow from flaml.data import load_openml_task @@ -149,12 +155,9 @@ def test_mlflow(): print(automl.predict_proba(X_test)) except ImportError: pass + # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"]) - -def test_mlflow_iris(): from sklearn.datasets import load_iris - import mlflow - from flaml import AutoML with mlflow.start_run(): automl = AutoML() @@ -167,8 +170,6 @@ def test_mlflow_iris(): X_train, y_train = load_iris(return_X_y=True) automl.fit(X_train=X_train, y_train=y_train, **automl_settings) - # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"]) - if __name__ == "__main__": test_automl(600) From 6dd3c7f0c17f5b137086d75c813c67304a34ccb3 Mon Sep 17 00:00:00 2001 From: godcallray Date: Tue, 29 Nov 2022 23:02:40 +0800 Subject: [PATCH 02/12] logger --- flaml/automl.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/flaml/automl.py b/flaml/automl.py index 4e2772ed56..e1d74fb4c8 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -98,8 +98,10 @@ def valid_starting_point_one_dim(self, value_one_dim, domain_one_dim): and renamed_type is float ) if not (type_match and domain_one_dim.is_valid(value_one_dim)): + logger.error("Starting_points is not provided in the right format") return False elif value_one_dim != domain_one_dim: + logger.error("Starting_points is not provided in the right format") return False return True @@ -3396,6 +3398,13 @@ def _search_sequential(self): state.best_config, self.data_size_full, ) + + if(self._trained_estimator.params[self._trained_estimaotr.ITER_HP]!=self.best_config[self._trained_estimaotr.ITER_HP]): + logger.warning( + "early stopping happened" + ) + + logger.info( "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time) ) From 71193c4a0a72d87c2ef986aa539abce7ca4fe43a Mon Sep 17 00:00:00 2001 From: godcallray Date: Wed, 30 Nov 2022 10:31:53 +0800 Subject: [PATCH 03/12] backml --- flaml/ml.py | 11 ++++------- test/automl/test_notebook_example.py | 15 +++++++-------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/flaml/ml.py b/flaml/ml.py index 1590a5f237..e521c32395 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -52,7 +52,6 @@ "mse", "accuracy", "roc_auc", - "roc_auc_weighted", "roc_auc_ovr", "roc_auc_ovo", "log_loss", @@ -193,7 +192,7 @@ def metric_loss_score( metric_name + " is not an built-in sklearn metric and nlp is not installed. " "Currently built-in sklearn metrics are: " - "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo," + "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo," "log_loss, mape, f1, micro_f1, macro_f1, ap. " "If the metric is an nlp metric, please pip install flaml[nlp] ", "or pass a customized metric function to AutoML.fit(metric=func)", @@ -204,7 +203,7 @@ def metric_loss_score( raise ValueError( metric_name + " is neither an sklearn metric nor a huggingface metric. " "Currently built-in sklearn metrics are: " - "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_weighted, roc_auc_ovr, roc_auc_ovo," + "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo," "log_loss, mape, f1, micro_f1, macro_f1, ap. " "Currently built-in huggingface metrics are: " + ", ".join(huggingface_metric_to_mode.keys()) @@ -269,8 +268,6 @@ def sklearn_metric_loss_score( score = 1.0 - accuracy_score(y_true, y_predict, sample_weight=sample_weight) elif metric_name == "roc_auc": score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight) - elif metric_name == "roc_auc_weighted": - score = 1.0 - roc_auc_score(y_true, y_predict, average=weighted, sample_weight=sample_weight) elif metric_name == "roc_auc_ovr": score = 1.0 - roc_auc_score( y_true, y_predict, sample_weight=sample_weight, multi_class="ovr" @@ -321,10 +318,10 @@ def sklearn_metric_loss_score( def get_y_pred(estimator, X, eval_metric, obj): - if eval_metric in ["roc_auc", "roc_auc_weighted", "ap"] and "binary" in obj: + if eval_metric in ["roc_auc", "ap"] and "binary" in obj: y_pred_classes = estimator.predict_proba(X) y_pred = y_pred_classes[:, 1] if y_pred_classes.ndim > 1 else y_pred_classes - elif eval_metric in ["log_loss", "roc_auc", "roc_auc_weighted", "roc_auc_ovr", "roc_auc_ovo"]: + elif eval_metric in ["log_loss", "roc_auc", "roc_auc_ovr", "roc_auc_ovo"]: y_pred = estimator.predict_proba(X) else: y_pred = estimator.predict(X) diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index 024d35b711..818f7af363 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -75,9 +75,6 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None): print( "roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test) ) - print( - "roc_auc_weighted", "=", 1 - sklearn_metric_loss_score("roc_auc_weighted", y_pred_proba, y_test) - ) print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test)) if budget is None: assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67" @@ -111,10 +108,7 @@ def _test_nobudget(): def test_mlflow(): - import subprocess - import sys - - subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) + # subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) import mlflow from flaml.data import load_openml_task @@ -155,9 +149,12 @@ def test_mlflow(): print(automl.predict_proba(X_test)) except ImportError: pass - # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"]) + +def test_mlflow_iris(): from sklearn.datasets import load_iris + import mlflow + from flaml import AutoML with mlflow.start_run(): automl = AutoML() @@ -170,6 +167,8 @@ def test_mlflow(): X_train, y_train = load_iris(return_X_y=True) automl.fit(X_train=X_train, y_train=y_train, **automl_settings) + # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"]) + if __name__ == "__main__": test_automl(600) From 5f6c854a1ecd6f533c12d5f2e4dda5a89b1d0229 Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Mon, 26 Dec 2022 14:16:19 -0800 Subject: [PATCH 04/12] Add warning if starting_points is not the right format --- flaml/automl/automl.py | 76 +++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index d446330e9c..23ff5ff064 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -2728,32 +2728,54 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): _sample_size_from_starting_points = {} if isinstance(starting_points, dict): - for _estimator, _point_per_estimator in starting_points.items(): - sample_size = ( - _point_per_estimator - and isinstance(_point_per_estimator, dict) - and _point_per_estimator.get("FLAML_sample_size") + _estimators_from_starting_points = starting_points.keys() + if not any( + i in self.estimator_list for i in _estimators_from_starting_points + ): + logger.warning( + "The proivded starting_points does not contain relevant estimators as keys" + " and is thus NOT used. Please check the format of starting_points." ) - if sample_size: - _sample_size_from_starting_points[_estimator] = sample_size - elif _point_per_estimator and isinstance(_point_per_estimator, list): - _sample_size_set = set( - [ - config["FLAML_sample_size"] - for config in _point_per_estimator - if "FLAML_sample_size" in config - ] - ) - if _sample_size_set: - _sample_size_from_starting_points[_estimator] = min( - _sample_size_set - ) - if len(_sample_size_set) > 1: + else: + for _estimator, _point_per_estimator in starting_points.items(): + + if not isinstance(_point_per_estimator, dict) and not isinstance( + _point_per_estimator, list + ): logger.warning( - "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format( - _estimator, _sample_size_set + "Starting_points for estimator {} is not provide in the format and is thus NOT used!" + "When the starting_points is a dict, the keys are the name of the estimators, and the" + "values should be hyperparamter configuration dicts or lists of hyperparamter configuration dicts".format( + _estimator ) ) + sample_size = ( + _point_per_estimator + and isinstance(_point_per_estimator, dict) + and _point_per_estimator.get("FLAML_sample_size") + ) + if sample_size: + _sample_size_from_starting_points[_estimator] = sample_size + elif _point_per_estimator and isinstance( + _point_per_estimator, list + ): + _sample_size_set = set( + [ + config["FLAML_sample_size"] + for config in _point_per_estimator + if "FLAML_sample_size" in config + ] + ) + if _sample_size_set: + _sample_size_from_starting_points[_estimator] = min( + _sample_size_set + ) + if len(_sample_size_set) > 1: + logger.warning( + "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format( + _estimator, _sample_size_set + ) + ) if not sample and isinstance(starting_points, dict): assert ( @@ -3560,11 +3582,11 @@ def _search_sequential(self): self.data_size_full, ) - if(self._trained_estimator.params[self._trained_estimaotr.ITER_HP]!=self.best_config[self._trained_estimaotr.ITER_HP]): - logger.warning( - "early stopping happened" - ) - + if ( + self._trained_estimator.params[self._trained_estimaotr.ITER_HP] + != self.best_config[self._trained_estimaotr.ITER_HP] + ): + logger.warning("early stopping happened") logger.info( "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time) From 2e209d14c57cc6c1b1c84c3459a64db4a1fef977 Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Mon, 26 Dec 2022 14:45:57 -0800 Subject: [PATCH 05/12] simplify code --- flaml/automl/automl.py | 156 +++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 84 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 23ff5ff064..ae2fdf4164 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -111,10 +111,8 @@ def valid_starting_point_one_dim(self, value_one_dim, domain_one_dim): and renamed_type is float ) if not (type_match and domain_one_dim.is_valid(value_one_dim)): - logger.error("Starting_points is not provided in the right format") return False elif value_one_dim != domain_one_dim: - logger.error("Starting_points is not provided in the right format") return False return True @@ -2726,56 +2724,86 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds): ) self._auto_augment = auto_augment - _sample_size_from_starting_points = {} + if "auto" == estimator_list: + if self._state.task == "rank": + estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"] + elif _is_nlp_task(self._state.task): + estimator_list = ["transformer"] + elif self._state.task == TS_FORECASTPANEL: + estimator_list = ["tft"] + else: + try: + import catboost + + estimator_list = [ + "lgbm", + "rf", + "catboost", + "xgboost", + "extra_tree", + "xgb_limitdepth", + ] + except ImportError: + estimator_list = [ + "lgbm", + "rf", + "xgboost", + "extra_tree", + "xgb_limitdepth", + ] + if self._state.task in TS_FORECAST: + # catboost is removed because it has a `name` parameter, making it incompatible with hcrystalball + if "catboost" in estimator_list: + estimator_list.remove("catboost") + if self._state.task in TS_FORECASTREGRESSION: + try: + import prophet + + estimator_list += ["prophet", "arima", "sarimax"] + except ImportError: + estimator_list += ["arima", "sarimax"] + elif "regression" != self._state.task: + estimator_list += ["lrl1"] + if isinstance(starting_points, dict): _estimators_from_starting_points = starting_points.keys() - if not any( - i in self.estimator_list for i in _estimators_from_starting_points - ): + if not any(i in estimator_list for i in _estimators_from_starting_points): logger.warning( - "The proivded starting_points does not contain relevant estimators as keys" - " and is thus NOT used. Please check the format of starting_points." + "The proivded starting_points {} is removed as it does not contain relevant estimators as keys" + " and is thus NOT used. Please check the required format of starting_points.".format( + starting_points + ) ) - else: - for _estimator, _point_per_estimator in starting_points.items(): + starting_points = {} - if not isinstance(_point_per_estimator, dict) and not isinstance( - _point_per_estimator, list - ): - logger.warning( - "Starting_points for estimator {} is not provide in the format and is thus NOT used!" - "When the starting_points is a dict, the keys are the name of the estimators, and the" - "values should be hyperparamter configuration dicts or lists of hyperparamter configuration dicts".format( - _estimator - ) - ) - sample_size = ( - _point_per_estimator - and isinstance(_point_per_estimator, dict) - and _point_per_estimator.get("FLAML_sample_size") + _sample_size_from_starting_points = {} + if isinstance(starting_points, dict): + for _estimator, _point_per_estimator in starting_points.items(): + sample_size = ( + _point_per_estimator + and isinstance(_point_per_estimator, dict) + and _point_per_estimator.get("FLAML_sample_size") + ) + if sample_size: + _sample_size_from_starting_points[_estimator] = sample_size + elif _point_per_estimator and isinstance(_point_per_estimator, list): + _sample_size_set = set( + [ + config["FLAML_sample_size"] + for config in _point_per_estimator + if "FLAML_sample_size" in config + ] ) - if sample_size: - _sample_size_from_starting_points[_estimator] = sample_size - elif _point_per_estimator and isinstance( - _point_per_estimator, list - ): - _sample_size_set = set( - [ - config["FLAML_sample_size"] - for config in _point_per_estimator - if "FLAML_sample_size" in config - ] + if _sample_size_set: + _sample_size_from_starting_points[_estimator] = min( + _sample_size_set ) - if _sample_size_set: - _sample_size_from_starting_points[_estimator] = min( - _sample_size_set - ) - if len(_sample_size_set) > 1: - logger.warning( - "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format( - _estimator, _sample_size_set - ) + if len(_sample_size_set) > 1: + logger.warning( + "Using the min FLAML_sample_size of all the provided starting points for estimator {}. (Provided FLAML_sample_size are: {})".format( + _estimator, _sample_size_set ) + ) if not sample and isinstance(starting_points, dict): assert ( @@ -2867,46 +2895,6 @@ def is_to_reverse_metric(metric, task): error_metric = "customized metric" logger.info(f"Minimizing error metric: {error_metric}") - if "auto" == estimator_list: - if self._state.task == "rank": - estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"] - elif _is_nlp_task(self._state.task): - estimator_list = ["transformer"] - elif self._state.task == TS_FORECASTPANEL: - estimator_list = ["tft"] - else: - try: - import catboost - - estimator_list = [ - "lgbm", - "rf", - "catboost", - "xgboost", - "extra_tree", - "xgb_limitdepth", - ] - except ImportError: - estimator_list = [ - "lgbm", - "rf", - "xgboost", - "extra_tree", - "xgb_limitdepth", - ] - if self._state.task in TS_FORECAST: - # catboost is removed because it has a `name` parameter, making it incompatible with hcrystalball - if "catboost" in estimator_list: - estimator_list.remove("catboost") - if self._state.task in TS_FORECASTREGRESSION: - try: - import prophet - - estimator_list += ["prophet", "arima", "sarimax"] - except ImportError: - estimator_list += ["arima", "sarimax"] - elif "regression" != self._state.task: - estimator_list += ["lrl1"] # When no search budget is specified if no_budget: max_iter = len(estimator_list) From 1f3a4bb4754044cca772b0ae788ccb3040908361 Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Mon, 26 Dec 2022 15:47:01 -0800 Subject: [PATCH 06/12] typo --- flaml/automl/automl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index ae2fdf4164..9af1bb2afe 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -3571,8 +3571,8 @@ def _search_sequential(self): ) if ( - self._trained_estimator.params[self._trained_estimaotr.ITER_HP] - != self.best_config[self._trained_estimaotr.ITER_HP] + self._trained_estimator.params[self._trained_estimator.ITER_HP] + != self.best_config[self._trained_estimator.ITER_HP] ): logger.warning("early stopping happened") From 5db2ca7f769be4b08918ac067e036b49eb16abc3 Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Thu, 29 Dec 2022 21:05:09 -0800 Subject: [PATCH 07/12] add test for the warmstart warning --- test/automl/test_warmstart.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py index 74cf8c37f2..d65176b516 100644 --- a/test/automl/test_warmstart.py +++ b/test/automl/test_warmstart.py @@ -219,6 +219,17 @@ def test_FLAML_sample_size_in_starting_points(self): except AssertionError: pass + # In the following test case, the starting_points is not provided in the + # right format and thus we expect a warning for removing the provided + # starting_points when the fit function is called + automl5 = AutoML() + automl_settings["starting_points"] = automl3.best_config + automl5.fit( + X_train, + y_train, + **automl_settings, + ) + if __name__ == "__main__": unittest.main() From 3fb9bd4a4e53792bdbb4aaf0fdebc384a49ab6f6 Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Fri, 30 Dec 2022 13:46:48 -0800 Subject: [PATCH 08/12] revise warning condition --- flaml/automl/automl.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 9af1bb2afe..72b432ee7f 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -3569,12 +3569,24 @@ def _search_sequential(self): state.best_config, self.data_size_full, ) - - if ( - self._trained_estimator.params[self._trained_estimator.ITER_HP] - != self.best_config[self._trained_estimator.ITER_HP] + if getattr(self._trained_estimator, "params", {}) and getattr( + self._trained_estimator, "ITER_HP", None ): - logger.warning("early stopping happened") + _hp_trained_iter = self._trained_estimator.params.get( + self._trained_estimator.ITER_HP + ) + _best_config_iter = self.best_config.get( + self._trained_estimator.ITER_HP + ) + if _hp_trained_iter != _best_config_iter: + logger.warning( + "Early stopping happened when retraining a model with the best configuration." + f" The best config's ITER_HP is {_best_config_iter}" + f" and the actual ITER_HP used for retraining the model is {_hp_trained_iter}." + " This early stopping happens because because flaml needs to do its best effort to" + " retrain without violating the time budget when retrain_full is set to 'budget'. " + " If this mismatch is not desired, please set retrain_full to True." + ) logger.info( "retrain {} for {:.1f}s".format(self._best_estimator, retrain_time) From ba5f61fbfcf07eb2598279cd06ba6d066a6262fb Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Fri, 30 Dec 2022 13:49:11 -0800 Subject: [PATCH 09/12] fix typo --- flaml/automl/automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 72b432ee7f..74d270f2de 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -3583,7 +3583,7 @@ def _search_sequential(self): "Early stopping happened when retraining a model with the best configuration." f" The best config's ITER_HP is {_best_config_iter}" f" and the actual ITER_HP used for retraining the model is {_hp_trained_iter}." - " This early stopping happens because because flaml needs to do its best effort to" + " This early stopping happens because flaml needs to do its best effort to" " retrain without violating the time budget when retrain_full is set to 'budget'. " " If this mismatch is not desired, please set retrain_full to True." ) From 773c7ed79c9123f0ecb3288ae220c0a091076ced Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Sun, 1 Jan 2023 10:13:10 -0500 Subject: [PATCH 10/12] Update flaml/automl/automl.py Co-authored-by: Chi Wang --- flaml/automl/automl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 74d270f2de..0585043652 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -3581,8 +3581,8 @@ def _search_sequential(self): if _hp_trained_iter != _best_config_iter: logger.warning( "Early stopping happened when retraining a model with the best configuration." - f" The best config's ITER_HP is {_best_config_iter}" - f" and the actual ITER_HP used for retraining the model is {_hp_trained_iter}." + f" The best config's {self._trained_estimator.ITER_HP} is {_best_config_iter}" + f" and the actual {self._trained_estimator.ITER_HP} used for retraining the model is {_hp_trained_iter}." " This early stopping happens because flaml needs to do its best effort to" " retrain without violating the time budget when retrain_full is set to 'budget'. " " If this mismatch is not desired, please set retrain_full to True." From af19cef4ad02c0c51de6ebbef0528677dfb1848f Mon Sep 17 00:00:00 2001 From: godcallray Date: Mon, 2 Jan 2023 18:59:08 +0800 Subject: [PATCH 11/12] add_test --- test/automl/test_warmstart.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py index d65176b516..cf5b030140 100644 --- a/test/automl/test_warmstart.py +++ b/test/automl/test_warmstart.py @@ -231,5 +231,31 @@ def test_FLAML_sample_size_in_starting_points(self): ) + automl5 = AutoML() + settings = { + "time_budget": 10, # total running time in seconds + "metric": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2'] + "estimator_list": ['lgbm'], # list of ML learners; we tune lightgbm in this example + "task": 'regression', # task type + "log_file_name": 'houses_experiment.log', # flaml log file + "log_type": 'all', + "seed": 0, # random seed + } + automl5.fit(X_train=X_train, y_train=y_train, **settings) + warm_start_config = automl5.best_config + + # fitting again + settings = { + "time_budget": 10, # total running time in seconds + "metric": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2'] + "estimator_list": ['lgbm'], # list of ML learners; we tune lightgbm in this example + "task": 'regression', # task type + "log_file_name": 'houses_experiment2.log', # flaml log file (changed name from last run) + "log_type": 'all', + "seed": 0, # random seed + } + automl5.fit(X_train=X_train, y_train=y_train, **settings, starting_points=warm_start_config) + + if __name__ == "__main__": unittest.main() From 72dd4baa5ea92ed6078a7a96d06e9ba69bda29ec Mon Sep 17 00:00:00 2001 From: godcallray <62365520+godcallray@users.noreply.github.com> Date: Fri, 6 Jan 2023 02:15:23 -0500 Subject: [PATCH 12/12] Update test_warmstart.py --- test/automl/test_warmstart.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/test/automl/test_warmstart.py b/test/automl/test_warmstart.py index cf5b030140..8aaedb0289 100644 --- a/test/automl/test_warmstart.py +++ b/test/automl/test_warmstart.py @@ -229,33 +229,5 @@ def test_FLAML_sample_size_in_starting_points(self): y_train, **automl_settings, ) - - - automl5 = AutoML() - settings = { - "time_budget": 10, # total running time in seconds - "metric": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2'] - "estimator_list": ['lgbm'], # list of ML learners; we tune lightgbm in this example - "task": 'regression', # task type - "log_file_name": 'houses_experiment.log', # flaml log file - "log_type": 'all', - "seed": 0, # random seed - } - automl5.fit(X_train=X_train, y_train=y_train, **settings) - warm_start_config = automl5.best_config - - # fitting again - settings = { - "time_budget": 10, # total running time in seconds - "metric": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2'] - "estimator_list": ['lgbm'], # list of ML learners; we tune lightgbm in this example - "task": 'regression', # task type - "log_file_name": 'houses_experiment2.log', # flaml log file (changed name from last run) - "log_type": 'all', - "seed": 0, # random seed - } - automl5.fit(X_train=X_train, y_train=y_train, **settings, starting_points=warm_start_config) - - if __name__ == "__main__": unittest.main()