17
17
18
18
from econml .inference import BootstrapInference
19
19
20
- from joblib import Parallel , delayed
21
-
22
20
from causaltune .search .params import SimpleParamService
23
21
from causaltune .score .scoring import Scorer , metrics_to_minimize
24
22
from causaltune .utils import treatment_is_multivalue
34
32
from causaltune .dataset_processor import CausalityDatasetProcessor
35
33
from causaltune .models .passthrough import feature_filter
36
34
37
- # tune.run = run
38
-
39
35
40
36
# Patched from sklearn.linear_model._base to adjust rtol and atol values
41
- def _check_precomputed_gram_matrix (
42
- X , precompute , X_offset , X_scale , rtol = 1e-4 , atol = 1e-2
43
- ):
37
+ def _check_precomputed_gram_matrix (X , precompute , X_offset , X_scale , rtol = 1e-4 , atol = 1e-2 ):
44
38
n_features = X .shape [1 ]
45
39
f1 = n_features // 2
46
40
f2 = min (f1 + 1 , n_features - 1 )
@@ -177,24 +171,17 @@ def __init__(
177
171
self ._settings ["tuner" ]["time_budget_s" ] = time_budget
178
172
self ._settings ["tuner" ]["num_samples" ] = num_samples
179
173
self ._settings ["tuner" ]["verbose" ] = verbose
180
- self ._settings ["tuner" ][
181
- "use_ray"
182
- ] = use_ray # requires ray to be installed via pip install flaml[ray]
183
174
self ._settings ["tuner" ]["resources_per_trial" ] = (
184
175
resources_per_trial if resources_per_trial is not None else {"cpu" : 0.5 }
185
176
)
186
177
self ._settings ["try_init_configs" ] = try_init_configs
187
- self ._settings ["include_experimental_estimators" ] = (
188
- include_experimental_estimators
189
- )
178
+ self ._settings ["include_experimental_estimators" ] = include_experimental_estimators
190
179
191
180
# params for FLAML on component models:
192
181
self ._settings ["component_models" ] = {}
193
182
self ._settings ["component_models" ]["task" ] = components_task
194
183
self ._settings ["component_models" ]["verbose" ] = components_verbose
195
- self ._settings ["component_models" ][
196
- "pred_time_limit"
197
- ] = components_pred_time_limit
184
+ self ._settings ["component_models" ]["pred_time_limit" ] = components_pred_time_limit
198
185
self ._settings ["component_models" ]["n_jobs" ] = components_njobs
199
186
self ._settings ["component_models" ]["time_budget" ] = components_time_budget
200
187
self ._settings ["component_models" ]["eval_method" ] = "holdout"
@@ -221,6 +208,7 @@ def __init__(
221
208
self .causal_model = None
222
209
self .identified_estimand = None
223
210
self .problem = None
211
+ self .use_ray = use_ray
224
212
# properties that are used to resume fits (warm start)
225
213
self .resume_scores = []
226
214
self .resume_cfg = []
@@ -239,9 +227,7 @@ def init_propensity_model(self, propensity_model: str):
239
227
self .propensity_model = AutoML (
240
228
** {** self ._settings ["component_models" ], "task" : "classification" }
241
229
)
242
- elif hasattr (propensity_model , "fit" ) and hasattr (
243
- propensity_model , "predict_proba"
244
- ):
230
+ elif hasattr (propensity_model , "fit" ) and hasattr (propensity_model , "predict_proba" ):
245
231
self .propensity_model = propensity_model
246
232
else :
247
233
raise ValueError (
@@ -266,9 +252,7 @@ def init_outcome_model(self, outcome_model):
266
252
# The current default behavior
267
253
return self .auto_outcome_model ()
268
254
else :
269
- raise ValueError (
270
- 'outcome_model valid values are None, "auto", or an estimator object'
271
- )
255
+ raise ValueError ('outcome_model valid values are None, "auto", or an estimator object' )
272
256
273
257
def auto_outcome_model (self ):
274
258
data = self .data
@@ -303,6 +287,7 @@ def fit(
303
287
preprocess : bool = False ,
304
288
encoder_type : Optional [str ] = None ,
305
289
encoder_outcome : Optional [str ] = None ,
290
+ use_ray : Optional [bool ] = None ,
306
291
):
307
292
"""Performs AutoML on list of causal inference estimators
308
293
- If estimator has a search space specified in its parameters, HPO is performed on the whole model.
@@ -326,6 +311,9 @@ def fit(
326
311
Returns:
327
312
None
328
313
"""
314
+ if use_ray is not None :
315
+ self .use_ray = use_ray
316
+
329
317
if outcome is None and isinstance (data , CausalityDataset ):
330
318
outcome = data .outcomes [0 ]
331
319
@@ -344,19 +332,15 @@ def fit(
344
332
if preprocess :
345
333
data = copy .deepcopy (data )
346
334
self .dataset_processor = CausalityDatasetProcessor ()
347
- self .dataset_processor .fit (
348
- data , encoder_type = encoder_type , outcome = encoder_outcome
349
- )
335
+ self .dataset_processor .fit (data , encoder_type = encoder_type , outcome = encoder_outcome )
350
336
data = self .dataset_processor .transform (data )
351
337
else :
352
338
self .dataset_processor = None
353
339
354
340
self .data = data
355
341
treatment_values = data .treatment_values
356
342
357
- assert (
358
- len (treatment_values ) > 1
359
- ), "Treatment must take at least 2 values, eg 0 and 1!"
343
+ assert len (treatment_values ) > 1 , "Treatment must take at least 2 values, eg 0 and 1!"
360
344
361
345
self ._control_value = treatment_values [0 ]
362
346
self ._treatment_values = list (treatment_values [1 :])
@@ -378,8 +362,8 @@ def fit(
378
362
379
363
self .init_propensity_model (self ._settings ["propensity_model" ])
380
364
381
- self .identified_estimand : IdentifiedEstimand = (
382
- self . causal_model . identify_effect ( proceed_when_unidentifiable = True )
365
+ self .identified_estimand : IdentifiedEstimand = self . causal_model . identify_effect (
366
+ proceed_when_unidentifiable = True
383
367
)
384
368
385
369
if bool (self .identified_estimand .estimands ["iv" ]) and bool (data .instruments ):
@@ -450,9 +434,7 @@ def fit(
450
434
and self ._settings ["tuner" ]["num_samples" ] == - 1
451
435
):
452
436
self ._settings ["tuner" ]["time_budget_s" ] = (
453
- 2.5
454
- * len (self .estimator_list )
455
- * self ._settings ["component_models" ]["time_budget" ]
437
+ 2.5 * len (self .estimator_list ) * self ._settings ["component_models" ]["time_budget" ]
456
438
)
457
439
458
440
cmtb = self ._settings ["component_models" ]["time_budget" ]
@@ -485,9 +467,7 @@ def fit(
485
467
# )
486
468
# )
487
469
488
- search_space = self .cfg .search_space (
489
- self .estimator_list , data_size = data .data .shape
490
- )
470
+ search_space = self .cfg .search_space (self .estimator_list , data_size = data .data .shape )
491
471
init_cfg = (
492
472
self .cfg .default_configs (self .estimator_list , data_size = data .data .shape )
493
473
if self ._settings ["try_init_configs" ]
@@ -507,14 +487,12 @@ def fit(
507
487
self ._tune_with_config ,
508
488
search_space ,
509
489
metric = self .metric ,
490
+ # use_ray=self.use_ray,
510
491
cost_attr = "evaluation_cost" ,
511
- points_to_evaluate = (
512
- init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg
513
- ),
514
- evaluated_rewards = (
515
- [] if len (self .resume_scores ) == 0 else self .resume_scores
516
- ),
492
+ points_to_evaluate = (init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg ),
493
+ evaluated_rewards = ([] if len (self .resume_scores ) == 0 else self .resume_scores ),
517
494
mode = ("min" if self .metric in metrics_to_minimize () else "max" ),
495
+ # resources_per_trial= {"cpu": 1} if self.use_ray else None,
518
496
low_cost_partial_config = {},
519
497
** self ._settings ["tuner" ],
520
498
)
@@ -529,12 +507,8 @@ def fit(
529
507
self ._tune_with_config ,
530
508
search_space ,
531
509
metric = self .metric ,
532
- points_to_evaluate = (
533
- init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg
534
- ),
535
- evaluated_rewards = (
536
- [] if len (self .resume_scores ) == 0 else self .resume_scores
537
- ),
510
+ points_to_evaluate = (init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg ),
511
+ evaluated_rewards = ([] if len (self .resume_scores ) == 0 else self .resume_scores ),
538
512
mode = ("min" if self .metric in metrics_to_minimize () else "max" ),
539
513
low_cost_partial_config = {},
540
514
** self ._settings ["tuner" ],
@@ -568,18 +542,25 @@ def _tune_with_config(self, config: dict) -> dict:
568
542
Returns:
569
543
(dict): values of metrics after optimisation
570
544
"""
571
- estimates = Parallel (n_jobs = 2 , backend = "threading" )(
572
- delayed (self ._estimate_effect )(config ) for i in range (1 )
573
- )[0 ]
545
+ from causaltune .remote import remote_exec
546
+
547
+ if self .use_ray :
548
+ # flaml.tune handles the interaction with Ray itself
549
+ # estimates = self._estimate_effect(config)
550
+ estimates = remote_exec (CausalTune ._estimate_effect , (self , config ), self .use_ray )
551
+ else :
552
+ estimates = remote_exec (CausalTune ._estimate_effect , (self , config ), self .use_ray )
553
+
554
+ # Parallel(n_jobs=2, backend="threading")(
555
+ # delayed(self._estimate_effect)(config) for i in range(1)
556
+ # ))[0]
574
557
575
558
if "exception" not in estimates :
576
559
est_name = estimates ["estimator_name" ]
577
560
current_score = estimates [self .metric ]
578
561
579
562
estimates ["optimization_score" ] = current_score
580
- estimates ["evaluation_cost" ] = (
581
- 1e8 # will be overwritten for successful runs
582
- )
563
+ estimates ["evaluation_cost" ] = 1e8 # will be overwritten for successful runs
583
564
584
565
# Initialize best_score if this is the first estimator for this name
585
566
if est_name not in self ._best_estimators :
@@ -611,22 +592,19 @@ def _tune_with_config(self, config: dict) -> dict:
611
592
"codec" ,
612
593
"policy_risk" ,
613
594
]:
614
- is_better = (
615
- np .isfinite ( current_score ) and current_score < best_score
616
- ) or ( np . isinf ( best_score ) and np . isfinite ( current_score ))
595
+ is_better = (np . isfinite ( current_score ) and current_score < best_score ) or (
596
+ np .isinf ( best_score ) and np . isfinite ( current_score )
597
+ )
617
598
else :
618
- is_better = (
619
- np .isfinite ( current_score ) and current_score > best_score
620
- ) or ( np . isinf ( best_score ) and np . isfinite ( current_score ))
599
+ is_better = (np . isfinite ( current_score ) and current_score > best_score ) or (
600
+ np .isinf ( best_score ) and np . isfinite ( current_score )
601
+ )
621
602
622
603
# Store the estimator if we're storing all, if it's better, or if it's the first valid (non-inf) estimator
623
604
if (
624
605
self ._settings ["store_all" ]
625
606
or is_better
626
- or (
627
- self ._best_estimators [est_name ][1 ] is None
628
- and np .isfinite (current_score )
629
- )
607
+ or (self ._best_estimators [est_name ][1 ] is None and np .isfinite (current_score ))
630
608
):
631
609
self ._best_estimators [est_name ] = (
632
610
current_score ,
@@ -658,9 +636,7 @@ def _estimate_effect(self, config):
658
636
# Do we need an boject property for this, instead of a local var?
659
637
self .estimator_name = config ["estimator" ]["estimator_name" ]
660
638
outcome_model = self .init_outcome_model (self ._settings ["outcome_model" ])
661
- method_params = self .cfg .method_params (
662
- config , outcome_model , self .propensity_model
663
- )
639
+ method_params = self .cfg .method_params (config , outcome_model , self .propensity_model )
664
640
665
641
try : #
666
642
# This calls the causal model's estimate_effect method
@@ -697,9 +673,7 @@ def _estimate_effect(self, config):
697
673
}
698
674
699
675
def _compute_metrics (self , estimator , df : pd .DataFrame ) -> dict :
700
- return self .scorer .make_scores (
701
- estimator , df , self .metrics_to_report , r_scorer = None
702
- )
676
+ return self .scorer .make_scores (estimator , df , self .metrics_to_report , r_scorer = None )
703
677
704
678
def score_dataset (self , df : pd .DataFrame , dataset_name : str ):
705
679
"""
@@ -714,13 +688,9 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str):
714
688
"""
715
689
for scr in self .scores .values ():
716
690
if scr ["estimator" ] is None :
717
- warnings .warn (
718
- "Skipping scoring for estimator %s" , scr ["estimator_name" ]
719
- )
691
+ warnings .warn ("Skipping scoring for estimator %s" , scr ["estimator_name" ])
720
692
else :
721
- scr ["scores" ][dataset_name ] = self ._compute_metrics (
722
- scr ["estimator" ], df
723
- )
693
+ scr ["scores" ][dataset_name ] = self ._compute_metrics (scr ["estimator" ], df )
724
694
725
695
@property
726
696
def best_estimator (self ) -> str :
@@ -793,9 +763,7 @@ def effect(self, df, *args, **kwargs):
793
763
"""
794
764
return self .model .effect (df , * args , ** kwargs )
795
765
796
- def predict (
797
- self , cd : CausalityDataset , preprocess : Optional [bool ] = False , * args , ** kwargs
798
- ):
766
+ def predict (self , cd : CausalityDataset , preprocess : Optional [bool ] = False , * args , ** kwargs ):
799
767
"""Heterogeneous Treatment Effects for data CausalityDataset
800
768
801
769
Args:
0 commit comments