-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclinical_trial_functions.py
877 lines (755 loc) · 48.4 KB
/
clinical_trial_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import regex as re
from copy import deepcopy
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import seaborn as sns
import sys
import pipeline_utilities as pu
import clinical_trial_classes as ctc
import data_loading_functions as dlf
import matplotlib.pyplot as plt
import random
from lifelines.plotting import add_at_risk_counts
import itertools
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.neighbors import KNeighborsClassifier
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test
import miceforest as mf
def pairwise_log_rank(clinical_trials, km_analyses, med_list=None):
"""Perform pairwise log-rank tests between all medications (and cohorts) in clinical_trials"""
if med_list == None:
med_list = list(clinical_trials.keys())
#construct pairwise comparisons
pairs = list(itertools.combinations([med for med in list(clinical_trials.keys()) if med in med_list], 2))
#for each pair, perform logrank test and print results
all_results = {}
for pair in pairs:
all_results[pair] = logrank_test(
durations_A=km_analyses[pair[0]].durations,
durations_B=km_analyses[pair[1]].durations,
event_observed_A=km_analyses[pair[0]].event_observed,
event_observed_B=km_analyses[pair[1]].event_observed,
weights_A=km_analyses[pair[0]].weights,
weights_B=km_analyses[pair[1]].weights,
)
print(f"Comparison between {pair}: p = {all_results[pair].p_value}, Adj. p = {all_results[pair].p_value * len(pairs)}")
return all_results
def enforce_positivity_on_subjects(trial_confounders, clinical_trials, trial_tables, enforce_IPTW, enforce_IPCW):
"""Make sure all subjects have positivity in propensity scores by removing outliers"""
for med in trial_confounders:
if enforce_IPTW:
#get propensity scores of in and out of trial subjects
in_trial_scores = trial_confounders[med].loc[trial_confounders[med].in_trial == 1].propensity_IPTW
out_trial_scores = trial_confounders[med].loc[trial_confounders[med].in_trial == 0].propensity_IPTW
#calculate the range of the propensity scores
in_trial_range = [np.min(in_trial_scores), np.max(in_trial_scores)]
out_trial_range = [np.min(out_trial_scores), np.max(out_trial_scores)]
#enforce a range between the maximum min, and minimum max
enforced_range_IPTW = [np.max([in_trial_range[0], out_trial_range[0]]), np.min([in_trial_range[1], out_trial_range[1]])]
trial_confounders[med] = trial_confounders[med].loc[(trial_confounders[med].propensity_IPTW >= enforced_range_IPTW[0]) & (trial_confounders[med].propensity_IPTW <= enforced_range_IPTW[1])]
clinical_trials[med].cohort = [subject for subject in clinical_trials[med].cohort if (subject.propensity_score['IPTW'] >= enforced_range_IPTW[0] and subject.propensity_score['IPTW'] <= enforced_range_IPTW[1])]
trial_tables[med] = trial_tables[med].loc[trial_tables[med].MRN.isin([subject.patient.pat_id for subject in clinical_trials[med].cohort])]
if enforce_IPCW:
#get propensity scores of censored and uncensored in-trial subjects
censored_scores = trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].is_censored == 1)].propensity_IPCW
uncensored_scores = trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].is_censored == 0)].propensity_IPCW
#calculate the range of the propensity scores
censored_range = [np.min(censored_scores), np.max(censored_scores)]
uncensored_range = [np.min(uncensored_scores), np.max(uncensored_scores)]
#enforce a range between the maximum min, and minimum max
enforced_range_IPCW = [np.max([censored_range[0], uncensored_range[0]]), np.min([censored_range[1], uncensored_range[1]])]
trial_confounders[med].loc[trial_confounders[med].in_trial == 1] = trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].propensity_IPCW >= enforced_range_IPCW[0]) & (trial_confounders[med].propensity_IPCW <= enforced_range_IPCW[1])]
trial_confounders[med] = trial_confounders[med].dropna(how='all')
clinical_trials[med].cohort = [subject for subject in clinical_trials[med].cohort if (subject.propensity_score['IPCW'] >= enforced_range_IPCW[0] and subject.propensity_score['IPCW'] <= enforced_range_IPCW[1])]
trial_tables[med] = trial_tables[med].loc[trial_tables[med].MRN.isin([subject.patient.pat_id for subject in clinical_trials[med].cohort])]
return trial_confounders, clinical_trials, trial_tables
def calculate_propensity_scores(clinical_trials, use_IPTW, use_IPCW, trial_tables, max_duration_days):
"""Calculate propensity scores using covariates. IPTW uses Gradient Boosted Models, IPCW uses Logistic Regression"""
#for each trial, gather the trial cohort, and the non-trial cohort (all other patients) confounders into a table
gb_models = {}
all_trial_confounders = {}
for trial_med in clinical_trials:
#get the confounders of patients in the trial
trial_confounders = []
for subject in clinical_trials[trial_med].cohort:
#add their trial information
trial_confounders.append(deepcopy(subject.confounders))
trial_confounders[-1]['in_trial'] = 1
trial_confounders[-1]['MRN'] = subject.patient.pat_id
#add their censoring information
#patient is censored if in trial_tables, they do not have the event before max_duration_days and they don't have any post_trial_visits
patient_trial_table = trial_tables[trial_med].loc[trial_tables[trial_med].MRN == subject.patient.pat_id]
if len(patient_trial_table) > 1:
raise
trial_confounders[-1]['is_censored'] = (bool(patient_trial_table.event.iloc[0]) == False) and (patient_trial_table.duration.iloc[0] < max_duration_days) # and (len(subject.post_trial_visits) < 1)
#get the non-trial confounders
nontrial_confounders = []
for nontrial_med in clinical_trials:
if nontrial_med == trial_med:
continue
for subject in clinical_trials[nontrial_med].cohort:
nontrial_confounders.append(deepcopy(subject.confounders))
nontrial_confounders[-1]['in_trial'] = 0
nontrial_confounders[-1]['MRN'] = subject.patient.pat_id
#patient is censored if in trial_tables, they do not have the event before max_duration_days and they don't have any post_trial_visits
patient_nontrial_table = trial_tables[nontrial_med].loc[trial_tables[nontrial_med].MRN == subject.patient.pat_id]
if len(patient_nontrial_table) > 1:
raise
nontrial_confounders[-1]['is_censored'] = (bool(patient_nontrial_table.event.iloc[0]) == False) and (patient_nontrial_table.duration.iloc[0] < max_duration_days) # and (len(subject.post_trial_visits) < 1)
# nontrial_confounders[-1]['is_censored'] = np.nan
#convert to DF
trial_confounders = pd.DataFrame(trial_confounders)
nontrial_confounders = pd.DataFrame(nontrial_confounders)
#concatenate and then drop missing values
all_confounders = pd.concat([trial_confounders, nontrial_confounders], ignore_index=True)
#create datasets for propensity scores for IPTW
x_iptw = all_confounders.drop(['in_trial', 'MRN', 'is_censored'], axis=1)
y_iptw = all_confounders.in_trial
#create datasets for propensity scores for IPCW
x_ipcw = trial_confounders.drop(['in_trial', 'MRN', 'is_censored'], axis=1)
y_ipcw = trial_confounders.is_censored
gb_models[trial_med] = {}
#calculate propensity scores for IPTW
if use_IPTW:
gbm_IPTW = GradientBoostingClassifier().fit(x_iptw, y_iptw)
all_confounders['propensity_IPTW'] = gbm_IPTW.predict_proba(x_iptw)[:,1]
#collect the gbm_IPTW
gb_models[trial_med]['IPTW'] = gbm_IPTW
#calculate propensity scores for IPCW
if use_IPCW:
gbm_IPCW = LogisticRegression().fit(x_ipcw, y_ipcw)
all_confounders['propensity_IPCW'] = gbm_IPCW.predict_proba(x_iptw)[:,1]
#collect the gbm_IPCW
gb_models[trial_med]['IPCW'] = gbm_IPCW
#calculate final propensity scores
if use_IPTW and use_IPCW:
all_confounders['propensity'] = all_confounders.apply(lambda x: x.propensity_IPTW * x.propensity_IPCW, axis=1)
elif use_IPTW:
all_confounders['propensity'] = all_confounders.propensity_IPTW
elif use_IPCW:
all_confounders['propensity'] = all_confounders.propensity_IPCW
#add propensity scores to subjects
for i in range(len(clinical_trials[trial_med].cohort)):
#double check the MRNs line up
if clinical_trials[trial_med].cohort[i].patient.pat_id != all_confounders.loc[i].MRN:
raise
clinical_trials[trial_med].cohort[i].propensity_score['overall'] = all_confounders.loc[i].propensity
if use_IPTW:
clinical_trials[trial_med].cohort[i].propensity_score['IPTW'] = all_confounders.loc[i].propensity_IPTW
if use_IPCW:
clinical_trials[trial_med].cohort[i].propensity_score['IPCW'] = all_confounders.loc[i].propensity_IPCW
all_trial_confounders[trial_med] = all_confounders
return gb_models, all_trial_confounders
def get_confounder_importances(gb_models, clinical_trials, propensity_set=None):
"""Get the weights of each covariate in the gradient boosted model to see their impact on treatment propensity"""
proto_importance = []
for med in gb_models:
if propensity_set is None:
importances = gb_models[med].feature_importances_
features = list(clinical_trials[med].cohort[0].confounders.keys())
proto_importance.append({features[i]:importances[i] for i in range(len(importances))})
else:
if isinstance(gb_models[med][propensity_set], LogisticRegression):
importances = gb_models[med][propensity_set].coef_.ravel()
else:
importances = gb_models[med][propensity_set].feature_importances_
features = list(clinical_trials[med].cohort[0].confounders.keys())
proto_importance.append({features[i]:importances[i] for i in range(len(importances))})
return pd.DataFrame(proto_importance)
def plot_iptw_positivity(trial_confounders, main_save_dir=None):
"""Plots the propensity score distributions for IPTW weighting for each ASM in the trial"""
for med in trial_confounders:
fig, axs = plt.subplots(1, 2, figsize=(10,3))
in_trial_scores = trial_confounders[med].loc[trial_confounders[med].in_trial == 1]['propensity_IPTW']
out_trial_scores = trial_confounders[med].loc[trial_confounders[med].in_trial == 0]['propensity_IPTW']
axs[0].hist(in_trial_scores, bins=np.linspace(0, 1, 26), alpha = 0.33, density=True)
axs[0].hist(out_trial_scores, bins=np.linspace(0, 1, 26), alpha = 0.33, density=True)
axs[0].legend(["In trial", "Out of trial"])
axs[0].set_xlabel("Propensity Score")
axs[1].boxplot([in_trial_scores, out_trial_scores])
axs[1].set_xticklabels(['In trial', 'Out of Trial'])
axs[1].set_ylabel("Propensity Score")
fig.suptitle(f"{med} IPTW")
if main_save_dir is not None:
plt.savefig(f"{main_save_dir}/{med}_IPTW_Positivity.png", dpi=600, bbox_inches='tight')
plt.savefig(f"{main_save_dir}/{med}_IPTW_Positivity.pdf", dpi=600, bbox_inches='tight')
plt.show()
def plot_ipcw_positivity(trial_confounders, main_save_dir=None):
"""Plots the propensity score distributions for PICW weighting for each ASM in the trial"""
for med in trial_confounders:
fig, axs = plt.subplots(1, 2, figsize=(10,3))
is_censored_scores = trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].is_censored == 1)]['propensity_IPCW']
is_uncensored_scores = trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].is_censored == 0)]['propensity_IPCW']
axs[0].hist(is_censored_scores, bins=np.linspace(0, 1, 26), alpha = 0.33, density=True)
axs[0].hist(is_uncensored_scores, bins=np.linspace(0, 1, 26), alpha = 0.33, density=True)
axs[0].legend(["Is Censored", "Is not Censored"])
axs[0].set_xlabel("Propensity Score")
axs[1].boxplot([is_censored_scores, is_uncensored_scores])
axs[1].set_xticklabels(["Is Censored", "Is not Censored"])
axs[1].set_ylabel("Propensity Score")
fig.suptitle(f"{med} IPCW")
if main_save_dir is not None:
plt.savefig(f"{main_save_dir}/{med}_IPCW_Positivity.png", dpi=600, bbox_inches='tight')
plt.savefig(f"{main_save_dir}/{med}_IPCW_Positivity.pdf", dpi=600, bbox_inches='tight')
plt.show()
def smd(mean_1, mean_2, var_1, var_2):
"""Formula for standard mean difference"""
return np.abs(mean_1 - mean_2) / np.sqrt((var_1 + var_2)/2)
def smd_series(mean_1, mean_2, var_1, var_2):
"""Calculates standard mean difference, but for a pandas series"""
return pd.Series({confound:smd(mean_1[confound], mean_2[confound], var_1[confound], var_2[confound]) for confound in var_1.index})
def weighted_var(weights, values, weighted_mean):
"""Calculates weighted variance using a weighted mean"""
numerator = np.sum(weights*(values-weighted_mean))
denominator = np.sum(weights)
return numerator/denominator
def weighted_var_series(weighted_confounders):
"""Calculates weighted variance for a pandas series"""
weighted_mean = weighted_confounders.sum()/weighted_confounders.weight.sum()
return pd.Series({confound:weighted_var(weighted_confounders.weight, weighted_confounders[confound], weighted_mean[confound]) for confound in weighted_confounders.columns})
def plot_pre_post_weighting_bias_balance_iptw(trial_confounders, void=True, main_save_dir=None):
"""Plots the bias balance before and after IPTW weighting"""
unweighted_balances = {}
weighted_balances = {}
#for each med, calculate the weighted and unweighted balances
for med in trial_confounders:
#get the weights
in_trial_weights = truncate_IP_weights(1/trial_confounders[med].loc[trial_confounders[med].in_trial==1]['propensity_IPTW'])
out_trial_weights = truncate_IP_weights(1/(1 - trial_confounders[med].loc[trial_confounders[med].in_trial==0]['propensity_IPTW']))
weights = in_trial_weights + out_trial_weights
#get the unweighted and weighted values of the covariates
drop_columns = ['MRN', 'propensity', 'is_censored']
if 'propensity_IPTW' in trial_confounders[med].columns:
drop_columns.append('propensity_IPTW')
if 'propensity_IPCW' in trial_confounders[med].columns:
drop_columns.append('propensity_IPCW')
unweighted_confounders = trial_confounders[med].drop(drop_columns, axis=1)
weighted_confounders = deepcopy(unweighted_confounders).mul(weights, axis=0)
weighted_confounders['weight'] = weights
#calculate means
unweighted_in_trial_mean = unweighted_confounders.loc[unweighted_confounders.in_trial > 0].mean()
unweighted_out_trial_mean = unweighted_confounders.loc[unweighted_confounders.in_trial == 0].mean()
weighted_in_trial_mean = weighted_confounders.loc[weighted_confounders.in_trial > 0].sum()/weighted_confounders.loc[weighted_confounders.in_trial > 0].weight.sum()
weighted_out_trial_mean = weighted_confounders.loc[weighted_confounders.in_trial == 0].sum()/weighted_confounders.loc[weighted_confounders.in_trial == 0].weight.sum()
#calculate variances of pooled sample
unweighted_in_trial_var = unweighted_confounders.loc[unweighted_confounders.in_trial > 0].var()
unweighted_out_trial_var = unweighted_confounders.loc[unweighted_confounders.in_trial == 0].var()
weighted_in_trial_var = weighted_var_series(weighted_confounders.loc[weighted_confounders.in_trial > 0])
weighted_out_trial_var = weighted_var_series(weighted_confounders.loc[weighted_confounders.in_trial == 0])
#calculate standardized mean differences
unweighted_balances[med] = smd_series(unweighted_in_trial_mean, unweighted_out_trial_mean, unweighted_in_trial_var, unweighted_out_trial_var).drop(['in_trial'])
weighted_balances[med] = smd_series(weighted_in_trial_mean, weighted_out_trial_mean, weighted_in_trial_var, weighted_out_trial_var).drop(['in_trial', 'weight'])
#plot balances
for med in unweighted_balances:
plt.figure()
plt.ylabel("Standardized Bias")
plt.scatter(range(len(unweighted_balances[med])), unweighted_balances[med], alpha=0.5)
plt.scatter(range(len(weighted_balances[med])), weighted_balances[med], alpha=0.5)
plt.legend(["Unweighted", "Weighted"])
plt.hlines(0.2, 0, len(unweighted_balances[med].index), colors='r', linestyles='dashed')
plt.hlines(0.1, 0, len(unweighted_balances[med].index), colors='g', linestyles='dashed')
plt.xticks(range(len(unweighted_balances[med])), labels=unweighted_balances[med].index, rotation=90)
plt.title(f"Bias Balance {med}, IPTW")
plt.ylim([0, 1])
if main_save_dir is not None:
plt.savefig(f"{main_save_dir}/{med}_IPTW_balance.png", dpi=600, bbox_inches='tight')
plt.savefig(f"{main_save_dir}/{med}_IPTW_balance.pdf", dpi=600, bbox_inches='tight')
plt.show()
if not void:
return unweighted_balances, weighted_balances
def plot_pre_post_weighting_bias_balance_ipcw(trial_confounders, void=True, main_save_dir=None):
"""Plots the bias balance before and after IPCW weighting"""
unweighted_balances = {}
weighted_balances = {}
#for each med, calculate the weighted and unweighted balances
for med in trial_confounders:
is_censored_weights = truncate_IP_weights(1/trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].is_censored == 1)]['propensity_IPCW'])
not_censored_weights = truncate_IP_weights(1/(1 - trial_confounders[med].loc[(trial_confounders[med].in_trial == 1) & (trial_confounders[med].is_censored == 0)]['propensity_IPCW']))
weights = is_censored_weights + not_censored_weights
#get the unweighted and weighted values of the covariates
drop_columns = ['MRN', 'propensity', 'in_trial']
if 'propensity_IPTW' in trial_confounders[med].columns:
drop_columns.append('propensity_IPTW')
if 'propensity_IPCW' in trial_confounders[med].columns:
drop_columns.append('propensity_IPCW')
unweighted_confounders = trial_confounders[med].loc[trial_confounders[med].in_trial == 1].drop(drop_columns, axis=1)
weighted_confounders = deepcopy(unweighted_confounders).mul(weights, axis=0)
weighted_confounders['weight'] = weights
#calculate means
unweighted_is_censored_mean = unweighted_confounders.loc[unweighted_confounders.is_censored > 0].mean()
unweighted_not_censored_mean = unweighted_confounders.loc[unweighted_confounders.is_censored == 0].mean()
weighted_is_censored_mean = weighted_confounders.loc[weighted_confounders.is_censored > 0].sum()/weighted_confounders.loc[weighted_confounders.is_censored > 0].weight.sum()
weighted_not_censored_mean = weighted_confounders.loc[weighted_confounders.is_censored == 0].sum()/weighted_confounders.loc[weighted_confounders.is_censored == 0].weight.sum()
#calculate variances of pooled sample
unweighted_is_censored_var = unweighted_confounders.loc[unweighted_confounders.is_censored > 0].var()
unweighted_not_censored_var = unweighted_confounders.loc[unweighted_confounders.is_censored == 0].var()
weighted_is_censored_var = weighted_var_series(weighted_confounders.loc[weighted_confounders.is_censored > 0])
weighted_not_censored_var = weighted_var_series(weighted_confounders.loc[weighted_confounders.is_censored == 0])
#calculate standardized mean differences
unweighted_balances[med] = smd_series(unweighted_is_censored_mean, unweighted_not_censored_mean, unweighted_is_censored_var, unweighted_not_censored_var).drop(['is_censored'])
weighted_balances[med] = smd_series(weighted_is_censored_mean, weighted_not_censored_mean, weighted_is_censored_var, weighted_not_censored_var).drop(['is_censored', 'weight'])
#plot balances
for med in unweighted_balances:
plt.figure()
plt.ylabel("Standardized Bias")
plt.scatter(range(len(unweighted_balances[med])), unweighted_balances[med], alpha=0.5)
plt.scatter(range(len(weighted_balances[med])), weighted_balances[med], alpha=0.5)
plt.legend(["Unweighted", "Weighted"])
plt.hlines(0.2, 0, len(unweighted_balances[med].index), colors='r', linestyles='dashed')
plt.hlines(0.1, 0, len(unweighted_balances[med].index), colors='g', linestyles='dashed')
plt.xticks(range(len(unweighted_balances[med])), labels=unweighted_balances[med].index, rotation=90)
plt.title(f"Bias Balance {med}, IPCW")
plt.ylim([0, 1])
if main_save_dir is not None:
plt.savefig(f"{main_save_dir}/{med}_IPCW_balance.png", dpi=600, bbox_inches='tight')
plt.savefig(f"{main_save_dir}/{med}_IPCW_balance.pdf", dpi=600, bbox_inches='tight')
plt.show()
if not void:
return unweighted_balances, weighted_balances
def truncate_IP_weights(proto_weights, lower_p=1, upper_p=99):
"""truncates propensity score weights between lower_p and upper_p percentiles"""
p1 = np.percentile(proto_weights,1)
p99 = np.percentile(proto_weights,99)
return [np.max([p1, np.min([weight, p99])]) for weight in proto_weights]
def construct_survival_table_retention(clinical_trial, max_duration_days=None):
"""
We want to create a KM table, where:
the rows are patients
column 0 is how long the patient is in the curve
column 1 is if the patient had the event (stopped taking the new ASM)
Patients who do not have the event but stay until the end of their prescription are censored
the only outcome is if they stop the drug, regardless of the other drugs
Patients who have the event "die" at time column 0.
"""
#preallocate the table
trial_table = {'duration':np.full(len(clinical_trial.cohort), np.nan), 'event':np.full(len(clinical_trial.cohort), np.nan), 'MRN':np.full(len(clinical_trial.cohort), "", dtype=object)}
#for each subject
for i in range(len(clinical_trial.cohort)):
subject = clinical_trial.cohort[i]
#a patient stops taking the drug if they have visits after stopping the drug
#we do not care if they start a new drug or not
#get the time the patient was in the trial
trial_table['MRN'][i] = subject.patient.pat_id
trial_table['duration'][i] = subject.time_in_trial.days
#if there are post-trial visits, then the patient stopped taking the drug and therefore had the event
trial_table['event'][i] = (len(subject.post_trial_visits) > 0)
#censor patients who have data beyond max_duration days
if max_duration_days != None:
if trial_table['duration'][i] > max_duration_days:
trial_table['duration'][i] = max_duration_days
trial_table['event'][i] = False
return pd.DataFrame(trial_table)
def skmice_impute(confounder_df, metadata_cols):
"""MICE imputation, but with SKlearn's implementation"""
#we need to deepcopy everything first
confounder_df = deepcopy(confounder_df)
confounder_imputer = IterativeImputer(initial_strategy='most_frequent')
only_confounders = confounder_df.drop(metadata_cols, axis='columns')
only_metadata = confounder_df[metadata_cols]
imputed_confounders = confounder_imputer.fit_transform(only_confounders)
imputed_confounders = pd.DataFrame(imputed_confounders, columns=only_confounders.columns)
return pd.concat([imputed_confounders, only_metadata], axis='columns')
def mice_impute(confounder_df, metadata_cols):
"""Mice imputation, but using Mice Forest"""
kds = mf.ImputationKernel(confounder_df.drop(metadata_cols, axis='columns'), save_all_iterations=False)
kds.mice(5, verbose=False, num_threads=7)
imputed_confounders = kds.complete_data()
return pd.concat([confounder_df[metadata_cols], imputed_confounders], axis='columns')
def empirical_impute(confounder_df):
"""Replace missing values with False"""
#we need to deepcopy everything first
confounder_df = deepcopy(confounder_df)
confounder_df.status_epilepticus.fillna(False, inplace=True)
confounder_df.febrile_history.fillna(False, inplace=True)
confounder_df.intellectual_disability.fillna(False, inplace=True)
return confounder_df
def impute_confounders(confounder_df, metadata_cols, imputation_strategy):
"""Impute missing confounders with the specified imputation_strategy"""
if imputation_strategy is None:
pass
elif imputation_strategy.upper() == "MICE":
confounder_df = mice_impute(confounder_df, metadata_cols)
elif imputation_strategy.upper() == "SK_MICE":
confounder_df = skmice_impute(confounder_df, metadata_cols)
elif imputation_strategy.lower() == "empirical":
confounder_df = empirical_impute(confounder_df)
else:
raise ValueError("Error - unknown Imputation Method. Choose from 'Empirical', 'SK_MICE', or 'MICE'")
return confounder_df.dropna()
def calculate_propensity_weights(clinical_trial):
"""Calculate weights from propensity scores, and truncate to be between 1st and 99th percentiles"""
weights = pd.DataFrame([{'MRN':subject.patient.pat_id, 'propensity':subject.propensity_score['overall']} for subject in clinical_trial.cohort])
weights['weight'] = truncate_IP_weights(1/weights.propensity)
return weights.drop(['propensity'], axis=1)
def retention_survival(all_agg_pats, med_list, minimum_prior_asms, maximum_prior_asms, min_cohort_size,
medication_start_regex, medication_end_regex, brand_to_generic, max_duration_days=None, shadow_propensities=False,
use_IPTW=False, use_IPCW=False, confounder_imputation='MICE', confounder_table=None, metadata_cols=None, enforce_positivity=True):
"""Conduct the retention clinical trial with the specified parameters"""
#initialize clinical trial objects and impute missing confounders
clinical_trials = {}
for med in med_list:
new_trial = ctc.ClinicalTrial(med,
outcome_measure='retention',
minimum_prior_asms=minimum_prior_asms,
maximum_prior_asms=maximum_prior_asms)
enroll_sub_stat = new_trial.enroll_subjects(all_agg_pats, medication_start_regex, medication_end_regex, brand_to_generic, debug=False)
if use_IPTW or use_IPCW or shadow_propensities:
imputed_confounder_table = impute_confounders(confounder_table, metadata_cols, confounder_imputation)
new_trial.cohort = [subject for subject in new_trial.cohort if subject.determine_confounders(imputed_confounder_table, metadata_cols) == 1]
if len(new_trial.cohort) >= min_cohort_size:
clinical_trials[med] = new_trial
#create survival tables
trial_tables = {}
for med in clinical_trials:
trial_tables[med] = construct_survival_table_retention(clinical_trials[med], max_duration_days)
#calculate propensity scores
if use_IPTW or use_IPCW:
gb_models, trial_confounders = calculate_propensity_scores(clinical_trials, use_IPTW, use_IPCW, trial_tables, max_duration_days)
if enforce_positivity:
trial_confounders, clinical_trials, trial_tables = enforce_positivity_on_subjects(trial_confounders, clinical_trials, trial_tables, enforce_IPTW=use_IPTW, enforce_IPCW=use_IPCW)
elif shadow_propensities:
gb_models, trial_confounders = calculate_propensity_scores(clinical_trials, True, True, trial_tables, max_duration_days)
if enforce_positivity:
trial_confounders, clinical_trials, trial_tables = enforce_positivity_on_subjects(trial_confounders, clinical_trials, trial_tables, enforce_IPTW=True, enforce_IPCW=True)
#for each trial, create and fit the KM models
km_analyses = {}
for med in clinical_trials:
km_model = KaplanMeierFitter()
if use_IPTW or use_IPCW:
#calculate weights, and truncate to be between 1st and 99th percentiles
weights = calculate_propensity_weights(clinical_trials[med])
trial_tables[med] = trial_tables[med].merge(weights, on='MRN', how='inner')
km_model.fit(trial_tables[med]['duration'], trial_tables[med]['event'], label=med, weights=trial_tables[med]['weight'])
else:
km_model.fit(trial_tables[med]['duration'], trial_tables[med]['event'], label=med)
km_analyses[med] = km_model
if use_IPTW or use_IPCW or shadow_propensities:
return clinical_trials, trial_tables, gb_models, trial_confounders, km_analyses
else:
return clinical_trials, trial_tables, km_analyses
def adjust_hasSz_by_elo(vis, trial_start_time):
"""if the algorithm thought the patient was having seizures, and a date of last seizure was recorded,
check when the last seizure was. If it was before the trial start time, then override the hasSz prediction"""
if (vis.hasSz == 1 or vis.hasSz == 2) and not np.any(pd.isnull(vis.elo)):
if vis.elo < trial_start_time:
return 0
return vis.hasSz
def construct_survival_table_szFreedom(clinical_trial, idk_behavior, max_duration_days=None):
"""
We want to create a KM table, where:
the rows are patients
column 0 is how long the patient is in the curve
column 1 is if the patient had the event (breakthrough seizure)
patients who do not have the event but stay the max length "survive"
patients who do not have the event but stay < max length "drop out"
patients who have the event "die" at time column 0.
"""
#preallocate the table
trial_table = {'duration':np.full(len(clinical_trial.cohort), np.nan), 'event':np.full(len(clinical_trial.cohort), np.nan), 'MRN':np.full(len(clinical_trial.cohort), "", dtype=object)}
#a container to store patients who have only visits with idk classifications
pat_idx_empty_by_idk = []
#for each subject
for i in range(len(clinical_trial.cohort)):
subject = clinical_trial.cohort[i]
#when did the patient join and leave the trial?
trial_start_time = subject.patient.medications[clinical_trial.name].start_date
trial_end_time = trial_start_time + subject.time_in_trial
#for each of their trial visits, track when they are seizurefree
subject_status = {'date':[], 'hasSz':[]}
for vis in subject.trial_visits:
subject_status['date'].append(vis.visit_date)
subject_status['hasSz'].append(adjust_hasSz_by_elo(vis, trial_start_time))
#if there are post_trial_visits
#and the first post_trial_visit is within 1 month of the trial_end_time, then also include that post_trial_visit
if len(subject.post_trial_visits) > 0:
if np.abs((subject.post_trial_visits[0].visit_date - trial_end_time).days) <= 30:
subject_status['date'].append(subject.post_trial_visits[0].visit_date)
subject_status['hasSz'].append(adjust_hasSz_by_elo(subject.post_trial_visits[0], trial_start_time))
#handle visits with an IDK classification
subject_status = pd.DataFrame(subject_status).replace(2, idk_behavior)
if pd.isnull(idk_behavior):
subject_status = subject_status.dropna(subset='hasSz')
if len(subject_status) < 1:
pat_idx_empty_by_idk.append(i)
continue
trial_table['MRN'][i] = subject.patient.pat_id
#if they started with seizures, they also have 0 duration and 1 event.
if subject_status['hasSz'].iloc[0] == 1:
#CHANGED-04162024: Instead of broadcasting only the first visit backwards to time 0 (worst case), we now assume the seizure happens on the visit date
#trial_table['duration'][i] = 0
trial_table['duration'][i] = (subject_status['date'].iloc[0] - trial_start_time).days
trial_table['event'][i] = True
#otherwise, the patient must be seizure free on the drug. Find out for how long.
else:
hasSz_idx = np.argwhere(subject_status['hasSz'].to_numpy() == 1).flatten()
#if there are hasSz after the szFree, then get the first index
if len(hasSz_idx) > 0:
next_hasSz_idx = np.min(hasSz_idx)
trial_table['duration'][i] = (subject_status['date'].iloc[next_hasSz_idx] - trial_start_time).days
trial_table['event'][i] = True
#otherwise the patient never had another seizure, and will be censored
else:
trial_table['duration'][i] = (subject_status['date'].iloc[-1] - trial_start_time).days
trial_table['event'][i] = False
#check if we've crossed the max duration and censor accordingly
if max_duration_days != None:
if trial_table['duration'][i] > max_duration_days:
trial_table['duration'][i] = max_duration_days
trial_table['event'][i] = False
#skip patients with IDK classifications who are still in the cohort
if pd.isnull(idk_behavior):
pat_idx_empty_by_idk = set(pat_idx_empty_by_idk)
clinical_trial.cohort = [pat for idx, pat in enumerate(clinical_trial.cohort) if idx not in pat_idx_empty_by_idk]
return pd.DataFrame(trial_table).dropna()
def sustained_freedom_survival(all_agg_pats, med_list, minimum_prior_asms, maximum_prior_asms, min_cohort_size, idk_behavior,
medication_start_regex, medication_end_regex, brand_to_generic, max_duration_days=None, shadow_propensities=False,
use_IPTW=False, use_IPCW=False, confounder_imputation='MICE', confounder_table=None, metadata_cols=None, enforce_positivity=True):
"""Conduct the seizure freedom clinical trials with the specified parameters"""
#set the trial up
clinical_trials = {}
for med in med_list:
new_trial = ctc.ClinicalTrial(med,
outcome_measure='sustained_freedom',
minimum_prior_asms=minimum_prior_asms,
maximum_prior_asms=maximum_prior_asms)
enroll_sub_stat = new_trial.enroll_subjects(all_agg_pats, medication_start_regex, medication_end_regex, brand_to_generic, debug=False)
if use_IPTW or use_IPCW or shadow_propensities:
imputed_confounder_table = impute_confounders(confounder_table, metadata_cols, confounder_imputation)
new_trial.cohort = [subject for subject in new_trial.cohort if subject.determine_confounders(imputed_confounder_table, metadata_cols) == 1]
if len(new_trial.cohort) >= min_cohort_size:
clinical_trials[med] = new_trial
#Create KM trial tables
trial_tables = {}
for med in clinical_trials:
trial_tables[med] = construct_survival_table_szFreedom(clinical_trials[med], idk_behavior, max_duration_days)
#calculate propensitity scores
if use_IPTW or use_IPCW:
gb_models, trial_confounders = calculate_propensity_scores(clinical_trials, use_IPTW, use_IPCW, trial_tables, max_duration_days)
if enforce_positivity:
trial_confounders, clinical_trials, trial_tables = enforce_positivity_on_subjects(trial_confounders, clinical_trials, trial_tables, enforce_IPTW=use_IPTW, enforce_IPCW=use_IPCW)
elif shadow_propensities:
gb_models, trial_confounders = calculate_propensity_scores(clinical_trials, True, True, trial_tables, max_duration_days)
if enforce_positivity:
trial_confounders, clinical_trials, trial_tables = enforce_positivity_on_subjects(trial_confounders, clinical_trials, trial_tables, enforce_IPTW=True, enforce_IPCW=True)
#for each trial, create and fit the KM analysis
km_analyses = {}
for med in clinical_trials:
km_model = KaplanMeierFitter()
if use_IPTW or use_IPCW:
#calculate weights, and truncate to be between 1st and 99th percentiles
weights = calculate_propensity_weights(clinical_trials[med])
trial_tables[med] = trial_tables[med].merge(weights, on='MRN', how='inner')
km_model.fit(trial_tables[med]['duration'], trial_tables[med]['event'], label=med, weights=trial_tables[med]['weight'])
else:
km_model.fit(trial_tables[med]['duration'], trial_tables[med]['event'], label=med)
km_analyses[med] = km_model
if use_IPTW or use_IPCW or shadow_propensities:
return clinical_trials, trial_tables, gb_models, trial_confounders, km_analyses
else:
return clinical_trials, trial_tables, km_analyses
def get_trial_statistics(clinical_trial):
"""Print basic information on the trial cohort"""
num_pats = {}
num_vis = {}
num_pres = {}
num_pat_years = {}
num_prior_asms_summary = []
epiType_counts = {}
for med in clinical_trial:
#calculate trial size values
num_pats[med] = len(clinical_trial[med].cohort)
num_vis[med] = np.sum([len(subject.patient.aggregate_visits) for subject in clinical_trial[med].cohort])
num_pres[med] = np.sum([len(subject.patient.medications[med].prescriptions) for subject in clinical_trial[med].cohort])
num_pat_years[med] = np.sum([(subject._sorted_visit_dates[-1] - subject._sorted_visit_dates[0]).days for subject in clinical_trial[med].cohort])/365
#now we count epilepsy types
epiType_counts[med] = {}
for subject in clinical_trial[med].cohort:
if subject.patient.epiType not in epiType_counts[med]:
epiType_counts[med][subject.patient.epiType] = 0
epiType_counts[med][subject.patient.epiType] += 1
#now we count how many prior ASMs there were
for subject in clinical_trial[med].cohort:
num_prior_asms_summary.append(len(subject.prior_asms))
print("Number of patients")
print(num_pats)
print(f"Total: {np.sum(list(num_pats.values()))}")
print()
print("Number of visits")
print(num_vis)
print(f"Total: {np.sum(list(num_vis.values()))}")
print()
print("Number of prescriptions")
print(num_pres)
print(f"Total: {np.sum(list(num_pres.values()))}")
print()
print("Total patient years")
print(num_pat_years)
print(f"Total: {np.sum(list(num_pat_years.values()))}")
print()
print("Epilepsy Types")
print(epiType_counts)
print()
print("Num prior ASMs")
print(f"Mean: {np.mean(num_prior_asms_summary)}")
print(f"Median: {np.median(num_prior_asms_summary)}")
print(f"Stddev: {np.std(num_prior_asms_summary)}")
def get_szFreq_stats(clinical_trial, cmap, figsize=(6,4), return_vals=False, max_sz=32, kde_plot=False):
"""
Analyze the seizure frequency of the trial cohort prior before they were enrolled to see if there is possible confounding from differences in seizure frequency
Counts the number of seizure frequency values we get within 1 year of the starting trial date
Returns the distribution of these values
"""
all_szFreqs = {} #the raw values of the seizure frequencies
all_szFreq_cts = {} #the number of seizure frequency hits
all_median_szFreqs = {} #the median seizure frequencies
for med in clinical_trial:
all_szFreqs[med] = []
#for each subject
for subject in clinical_trial[med].cohort:
#find their trial start date
trial_start = subject.patient.medications[med].start_date
subject_szFreqs = []
#for each pre-trial visit, find those within 1 year of trial_start and add their pqfs
for vis in subject.pre_trial_visits:
if (vis.visit_date - trial_start).days <= 365 and not pd.isnull(vis.pqf):
if vis.pqf > 0:
subject_szFreqs.append(vis.pqf)
all_szFreqs[med].append(subject_szFreqs)
#once we have all of the szFreqs for each subject, we count how many szFreqs each one has
all_szFreq_cts[med] = [len(sub_szFreqs) for sub_szFreqs in all_szFreqs[med]]
#and now we calculate the median seizure frequency for each subject, capped at 30 seizures per month
all_median_szFreqs[med] = [np.min((np.median(sub_szFreqs), max_sz)) for sub_szFreqs in all_szFreqs[med]]
#now we plot number of szFreq values per patient
fig = plt.figure(figsize=figsize, dpi=300)
leg_el = []
for med in all_szFreq_cts:
if not kde_plot:
plt.hist(all_szFreq_cts[med], bins=range(0,15), density=True, color=cmap[med], alpha=0.33)
else:
sns.kdeplot(all_szFreq_cts[med], color=cmap[med])
leg_el.append(med)
plt.legend(leg_el)
plt.xlabel("Number of seizure frequency values within one year of trial start")
plt.ylabel("Proportion of patients")
plt.show()
#now we plot median of szFreq values per patient
fig = plt.figure(figsize=figsize, dpi=300)
leg_el = []
for med in all_median_szFreqs:
if not kde_plot:
plt.hist(all_median_szFreqs[med], bins=range(0,31), density=True, color=cmap[med], alpha=0.33)
else:
sns.kdeplot(all_median_szFreqs[med], color=cmap[med])
leg_el.append(med)
plt.legend(leg_el)
plt.xlabel("Median seizure frequency per patient within one year of trial start")
plt.ylabel("Proportion of patients")
plt.show()
if return_vals:
return all_szFreqs, all_szFreq_cts, all_median_szFreqs
def get_confounder_stats(clinical_trial):
"""Prints out frequencies of covariates in the trial cohort"""
pats_with_GTCS = {}
pats_with_OSz = {}
pats_with_status = {}
pats_with_febrile = {}
pats_with_intel = {}
pats_with_psych = {}
pats_with_fam = {}
num_pats = {}
for med in clinical_trial:
pats_with_GTCS[med] = [subject.confounders['GTCS'] for subject in clinical_trial[med].cohort]
pats_with_OSz[med] = [subject.confounders['other_Sz_Types'] for subject in clinical_trial[med].cohort]
pats_with_status[med] = [subject.confounders['status_epilepticus'] for subject in clinical_trial[med].cohort]
pats_with_febrile[med] = [subject.confounders['febrile_history'] for subject in clinical_trial[med].cohort]
pats_with_intel[med] = [subject.confounders['intellectual_disability'] for subject in clinical_trial[med].cohort]
pats_with_psych[med] = [subject.confounders['has_psy_com'] for subject in clinical_trial[med].cohort]
pats_with_fam[med] = [subject.confounders['family_history'] for subject in clinical_trial[med].cohort]
num_pats[med] = len(clinical_trial[med].cohort)
print("Number of GTCs")
num_pats_with_conf = {med:np.sum(pats_with_GTCS[med]) for med in pats_with_GTCS}
perc_pats_with_conf = {med:np.mean(pats_with_GTCS[med]) for med in pats_with_GTCS}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
print("Number of OSz")
num_pats_with_conf = {med:np.sum(pats_with_OSz[med]) for med in pats_with_OSz}
perc_pats_with_conf = {med:np.mean(pats_with_OSz[med]) for med in pats_with_OSz}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
print("Number of Status")
num_pats_with_conf = {med:np.sum(pats_with_status[med]) for med in pats_with_status}
perc_pats_with_conf = {med:np.mean(pats_with_status[med]) for med in pats_with_status}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
print("Number of Febrile")
num_pats_with_conf = {med:np.sum(pats_with_febrile[med]) for med in pats_with_febrile}
perc_pats_with_conf = {med:np.mean(pats_with_febrile[med]) for med in pats_with_febrile}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
print("Number of Intel")
num_pats_with_conf = {med:np.sum(pats_with_intel[med]) for med in pats_with_intel}
perc_pats_with_conf = {med:np.mean(pats_with_intel[med]) for med in pats_with_intel}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
print("Number of Psych")
num_pats_with_conf = {med:np.sum(pats_with_psych[med]) for med in pats_with_psych}
perc_pats_with_conf = {med:np.mean(pats_with_psych[med]) for med in pats_with_psych}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
print("Number of Fam")
num_pats_with_conf = {med:np.sum(pats_with_fam[med]) for med in pats_with_fam}
perc_pats_with_conf = {med:np.mean(pats_with_fam[med]) for med in pats_with_fam}
print(f"Number of patients: {num_pats_with_conf}")
print(f"% of patients: {perc_pats_with_conf}")
print(f"Total number of patients: {np.sum(list(num_pats_with_conf.values()))}")
print(f"Total % of patients: {np.sum(list(num_pats_with_conf.values()))/ np.sum(list(num_pats.values()))}")
print()
def plot_KM_curve(clinical_trials, km_analyses, plt_elements, cmap, figsize=(8,6), save_path=None, ylim=[0,0.60], xlim=None):
"""Plot the Kaplan-Meier Curves from the Clinical Trial"""
plt.figure(figsize=figsize)
leg_el = []
#plot the curve
for med in clinical_trials:
kmf_plot = km_analyses[med].plot(ci_show=False, legend=False, c=cmap[med], linewidth=2)
leg_el.append(f"{med} - {len(clinical_trials[med].cohort)}")
#overlay just the censoring
for med in clinical_trials:
km_analyses[med].plot(show_censors=True, censor_styles={'ms':12, 'marker':'|', 'alpha':0.3}, ci_show=True, legend=False, linewidth=0, label='_nolegend_', c=cmap[med])
plt.xlabel(plt_elements['xlabel'])
plt.ylabel(plt_elements['ylabel'])
plt.xticks(ticks=plt_elements['xticks'], labels=plt_elements['xtick_labels'])
if xlim is not None:
plt.xlim(xlim)
plt.ylim(ylim)
plt.legend(leg_el)
add_at_risk_counts(*[km_analyses[med] for med in clinical_trials])
if save_path is not None:
plt.savefig(f"{save_path}.png", dpi=600, bbox_inches='tight')
plt.savefig(f"{save_path}.pdf", dpi=600, bbox_inches='tight')
plt.show()
all_results = pairwise_log_rank(clinical_trials, km_analyses)