Skip to content

Commit 857bb02

Browse files
Merge pull request #332 from ZmeiGorynych/ray
Add support for Ray in running trials, fix Pandas warning
2 parents 2302262 + 185dcc8 commit 857bb02

File tree

10 files changed

+456
-246
lines changed

10 files changed

+456
-246
lines changed

causaltune/models/regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class ElasticNetEstimator(SKLearnEstimator):
1717
ITER_HP = "max_iter"
1818

1919
@classmethod
20-
def search_space(cls, data_size, task="regresssion", **params):
20+
def search_space(cls, data_size, task="regression", **params):
2121
return {
2222
"alpha": {
2323
"domain": tune.loguniform(lower=0.0001, upper=1.0),

causaltune/optimiser.py

Lines changed: 47 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
from econml.inference import BootstrapInference
1919

20-
from joblib import Parallel, delayed
21-
2220
from causaltune.search.params import SimpleParamService
2321
from causaltune.score.scoring import Scorer, metrics_to_minimize
2422
from causaltune.utils import treatment_is_multivalue
@@ -34,13 +32,9 @@
3432
from causaltune.dataset_processor import CausalityDatasetProcessor
3533
from causaltune.models.passthrough import feature_filter
3634

37-
# tune.run = run
38-
3935

4036
# Patched from sklearn.linear_model._base to adjust rtol and atol values
41-
def _check_precomputed_gram_matrix(
42-
X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2
43-
):
37+
def _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2):
4438
n_features = X.shape[1]
4539
f1 = n_features // 2
4640
f2 = min(f1 + 1, n_features - 1)
@@ -177,24 +171,17 @@ def __init__(
177171
self._settings["tuner"]["time_budget_s"] = time_budget
178172
self._settings["tuner"]["num_samples"] = num_samples
179173
self._settings["tuner"]["verbose"] = verbose
180-
self._settings["tuner"][
181-
"use_ray"
182-
] = use_ray # requires ray to be installed via pip install flaml[ray]
183174
self._settings["tuner"]["resources_per_trial"] = (
184175
resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
185176
)
186177
self._settings["try_init_configs"] = try_init_configs
187-
self._settings["include_experimental_estimators"] = (
188-
include_experimental_estimators
189-
)
178+
self._settings["include_experimental_estimators"] = include_experimental_estimators
190179

191180
# params for FLAML on component models:
192181
self._settings["component_models"] = {}
193182
self._settings["component_models"]["task"] = components_task
194183
self._settings["component_models"]["verbose"] = components_verbose
195-
self._settings["component_models"][
196-
"pred_time_limit"
197-
] = components_pred_time_limit
184+
self._settings["component_models"]["pred_time_limit"] = components_pred_time_limit
198185
self._settings["component_models"]["n_jobs"] = components_njobs
199186
self._settings["component_models"]["time_budget"] = components_time_budget
200187
self._settings["component_models"]["eval_method"] = "holdout"
@@ -221,6 +208,7 @@ def __init__(
221208
self.causal_model = None
222209
self.identified_estimand = None
223210
self.problem = None
211+
self.use_ray = use_ray
224212
# properties that are used to resume fits (warm start)
225213
self.resume_scores = []
226214
self.resume_cfg = []
@@ -239,9 +227,7 @@ def init_propensity_model(self, propensity_model: str):
239227
self.propensity_model = AutoML(
240228
**{**self._settings["component_models"], "task": "classification"}
241229
)
242-
elif hasattr(propensity_model, "fit") and hasattr(
243-
propensity_model, "predict_proba"
244-
):
230+
elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
245231
self.propensity_model = propensity_model
246232
else:
247233
raise ValueError(
@@ -266,9 +252,7 @@ def init_outcome_model(self, outcome_model):
266252
# The current default behavior
267253
return self.auto_outcome_model()
268254
else:
269-
raise ValueError(
270-
'outcome_model valid values are None, "auto", or an estimator object'
271-
)
255+
raise ValueError('outcome_model valid values are None, "auto", or an estimator object')
272256

273257
def auto_outcome_model(self):
274258
data = self.data
@@ -303,6 +287,7 @@ def fit(
303287
preprocess: bool = False,
304288
encoder_type: Optional[str] = None,
305289
encoder_outcome: Optional[str] = None,
290+
use_ray: Optional[bool] = None,
306291
):
307292
"""Performs AutoML on list of causal inference estimators
308293
- If estimator has a search space specified in its parameters, HPO is performed on the whole model.
@@ -326,6 +311,9 @@ def fit(
326311
Returns:
327312
None
328313
"""
314+
if use_ray is not None:
315+
self.use_ray = use_ray
316+
329317
if outcome is None and isinstance(data, CausalityDataset):
330318
outcome = data.outcomes[0]
331319

@@ -344,19 +332,15 @@ def fit(
344332
if preprocess:
345333
data = copy.deepcopy(data)
346334
self.dataset_processor = CausalityDatasetProcessor()
347-
self.dataset_processor.fit(
348-
data, encoder_type=encoder_type, outcome=encoder_outcome
349-
)
335+
self.dataset_processor.fit(data, encoder_type=encoder_type, outcome=encoder_outcome)
350336
data = self.dataset_processor.transform(data)
351337
else:
352338
self.dataset_processor = None
353339

354340
self.data = data
355341
treatment_values = data.treatment_values
356342

357-
assert (
358-
len(treatment_values) > 1
359-
), "Treatment must take at least 2 values, eg 0 and 1!"
343+
assert len(treatment_values) > 1, "Treatment must take at least 2 values, eg 0 and 1!"
360344

361345
self._control_value = treatment_values[0]
362346
self._treatment_values = list(treatment_values[1:])
@@ -378,8 +362,8 @@ def fit(
378362

379363
self.init_propensity_model(self._settings["propensity_model"])
380364

381-
self.identified_estimand: IdentifiedEstimand = (
382-
self.causal_model.identify_effect(proceed_when_unidentifiable=True)
365+
self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect(
366+
proceed_when_unidentifiable=True
383367
)
384368

385369
if bool(self.identified_estimand.estimands["iv"]) and bool(data.instruments):
@@ -450,9 +434,7 @@ def fit(
450434
and self._settings["tuner"]["num_samples"] == -1
451435
):
452436
self._settings["tuner"]["time_budget_s"] = (
453-
2.5
454-
* len(self.estimator_list)
455-
* self._settings["component_models"]["time_budget"]
437+
2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"]
456438
)
457439

458440
cmtb = self._settings["component_models"]["time_budget"]
@@ -485,9 +467,7 @@ def fit(
485467
# )
486468
# )
487469

488-
search_space = self.cfg.search_space(
489-
self.estimator_list, data_size=data.data.shape
490-
)
470+
search_space = self.cfg.search_space(self.estimator_list, data_size=data.data.shape)
491471
init_cfg = (
492472
self.cfg.default_configs(self.estimator_list, data_size=data.data.shape)
493473
if self._settings["try_init_configs"]
@@ -507,14 +487,12 @@ def fit(
507487
self._tune_with_config,
508488
search_space,
509489
metric=self.metric,
490+
# use_ray=self.use_ray,
510491
cost_attr="evaluation_cost",
511-
points_to_evaluate=(
512-
init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg
513-
),
514-
evaluated_rewards=(
515-
[] if len(self.resume_scores) == 0 else self.resume_scores
516-
),
492+
points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
493+
evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
517494
mode=("min" if self.metric in metrics_to_minimize() else "max"),
495+
# resources_per_trial= {"cpu": 1} if self.use_ray else None,
518496
low_cost_partial_config={},
519497
**self._settings["tuner"],
520498
)
@@ -529,12 +507,8 @@ def fit(
529507
self._tune_with_config,
530508
search_space,
531509
metric=self.metric,
532-
points_to_evaluate=(
533-
init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg
534-
),
535-
evaluated_rewards=(
536-
[] if len(self.resume_scores) == 0 else self.resume_scores
537-
),
510+
points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
511+
evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
538512
mode=("min" if self.metric in metrics_to_minimize() else "max"),
539513
low_cost_partial_config={},
540514
**self._settings["tuner"],
@@ -568,18 +542,25 @@ def _tune_with_config(self, config: dict) -> dict:
568542
Returns:
569543
(dict): values of metrics after optimisation
570544
"""
571-
estimates = Parallel(n_jobs=2, backend="threading")(
572-
delayed(self._estimate_effect)(config) for i in range(1)
573-
)[0]
545+
from causaltune.remote import remote_exec
546+
547+
if self.use_ray:
548+
# flaml.tune handles the interaction with Ray itself
549+
# estimates = self._estimate_effect(config)
550+
estimates = remote_exec(CausalTune._estimate_effect, (self, config), self.use_ray)
551+
else:
552+
estimates = remote_exec(CausalTune._estimate_effect, (self, config), self.use_ray)
553+
554+
# Parallel(n_jobs=2, backend="threading")(
555+
# delayed(self._estimate_effect)(config) for i in range(1)
556+
# ))[0]
574557

575558
if "exception" not in estimates:
576559
est_name = estimates["estimator_name"]
577560
current_score = estimates[self.metric]
578561

579562
estimates["optimization_score"] = current_score
580-
estimates["evaluation_cost"] = (
581-
1e8 # will be overwritten for successful runs
582-
)
563+
estimates["evaluation_cost"] = 1e8 # will be overwritten for successful runs
583564

584565
# Initialize best_score if this is the first estimator for this name
585566
if est_name not in self._best_estimators:
@@ -611,22 +592,19 @@ def _tune_with_config(self, config: dict) -> dict:
611592
"codec",
612593
"policy_risk",
613594
]:
614-
is_better = (
615-
np.isfinite(current_score) and current_score < best_score
616-
) or (np.isinf(best_score) and np.isfinite(current_score))
595+
is_better = (np.isfinite(current_score) and current_score < best_score) or (
596+
np.isinf(best_score) and np.isfinite(current_score)
597+
)
617598
else:
618-
is_better = (
619-
np.isfinite(current_score) and current_score > best_score
620-
) or (np.isinf(best_score) and np.isfinite(current_score))
599+
is_better = (np.isfinite(current_score) and current_score > best_score) or (
600+
np.isinf(best_score) and np.isfinite(current_score)
601+
)
621602

622603
# Store the estimator if we're storing all, if it's better, or if it's the first valid (non-inf) estimator
623604
if (
624605
self._settings["store_all"]
625606
or is_better
626-
or (
627-
self._best_estimators[est_name][1] is None
628-
and np.isfinite(current_score)
629-
)
607+
or (self._best_estimators[est_name][1] is None and np.isfinite(current_score))
630608
):
631609
self._best_estimators[est_name] = (
632610
current_score,
@@ -658,9 +636,7 @@ def _estimate_effect(self, config):
658636
# Do we need an boject property for this, instead of a local var?
659637
self.estimator_name = config["estimator"]["estimator_name"]
660638
outcome_model = self.init_outcome_model(self._settings["outcome_model"])
661-
method_params = self.cfg.method_params(
662-
config, outcome_model, self.propensity_model
663-
)
639+
method_params = self.cfg.method_params(config, outcome_model, self.propensity_model)
664640

665641
try: #
666642
# This calls the causal model's estimate_effect method
@@ -697,9 +673,7 @@ def _estimate_effect(self, config):
697673
}
698674

699675
def _compute_metrics(self, estimator, df: pd.DataFrame) -> dict:
700-
return self.scorer.make_scores(
701-
estimator, df, self.metrics_to_report, r_scorer=None
702-
)
676+
return self.scorer.make_scores(estimator, df, self.metrics_to_report, r_scorer=None)
703677

704678
def score_dataset(self, df: pd.DataFrame, dataset_name: str):
705679
"""
@@ -714,13 +688,9 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str):
714688
"""
715689
for scr in self.scores.values():
716690
if scr["estimator"] is None:
717-
warnings.warn(
718-
"Skipping scoring for estimator %s", scr["estimator_name"]
719-
)
691+
warnings.warn("Skipping scoring for estimator %s", scr["estimator_name"])
720692
else:
721-
scr["scores"][dataset_name] = self._compute_metrics(
722-
scr["estimator"], df
723-
)
693+
scr["scores"][dataset_name] = self._compute_metrics(scr["estimator"], df)
724694

725695
@property
726696
def best_estimator(self) -> str:
@@ -793,9 +763,7 @@ def effect(self, df, *args, **kwargs):
793763
"""
794764
return self.model.effect(df, *args, **kwargs)
795765

796-
def predict(
797-
self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs
798-
):
766+
def predict(self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs):
799767
"""Heterogeneous Treatment Effects for data CausalityDataset
800768
801769
Args:

causaltune/remote.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
def remote_exec(function, args, use_ray=False):
2+
if use_ray:
3+
import ray
4+
5+
remote_function = ray.remote(function)
6+
return ray.get(remote_function.remote(*args))
7+
else:
8+
from joblib import Parallel, delayed
9+
10+
return Parallel(n_jobs=2, backend="threading")(delayed(function)(*args) for i in range(1))[
11+
0
12+
]

0 commit comments

Comments
 (0)