From b9226c17962a293e6978d74c7f5550b9937527f8 Mon Sep 17 00:00:00 2001 From: landonjhaller17 <94189973+landonjhaller17@users.noreply.github.com> Date: Thu, 14 Jul 2022 12:37:38 -0700 Subject: [PATCH] Update SAS_ LassoIndUnionNumPredCstat_BS500.sas --- SAS_ LassoIndUnionNumPredCstat_BS500.sas | 104 ++++++++++++++++++----- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git a/SAS_ LassoIndUnionNumPredCstat_BS500.sas b/SAS_ LassoIndUnionNumPredCstat_BS500.sas index 9c17d52..773a73b 100644 --- a/SAS_ LassoIndUnionNumPredCstat_BS500.sas +++ b/SAS_ LassoIndUnionNumPredCstat_BS500.sas @@ -3,6 +3,8 @@ *Purpose: Compute summary statistics for number of predictors and C-statistic for LASSO Individual and Union Methods using 500 bootstrap samples ; *Statisticians: Grisell Diaz-Ramirez and Siqi Gan ; *Finished: 2021.03.01 ; +*Modified: 2022.07.09 ; +*Reason modified: Recompute C-stat-Original as described below ; ***********************************************************************************************************************************************************************************; /*Check system options specified at SAS invocation*/ @@ -466,7 +468,31 @@ proc delete data=bsample2 ctablesim_union ctablesim_ind; run; quit; /****************************************************************************************************************************************************/ -/*** Fit each of the BS model (obtained using BS data) on the original case study data and calculate their C-statistic (C-stat-Original) ***/ +/*** Fit each of the BS model (obtained using BS data) on the original case study data and calculate their C-statistic (C-stat-Original) + +In previous version, we used the variables selected in each bootstrap model to fit the model with these variables in the original data +This could be thought as bootstrap of the selection method. + +The general steps are: +1) Obtain final model and corresponding C-statistic on the case study data, namely C-statistic-apparent +2) Obtain final models of each bootstrap sample and compute the C-statistic of each bootstrap model, namely C-statistic-boot +3) Compute the C-statistic of each bootstrap model evaluated in the original case-study data, namely C-statistic-original + +A more correct way is: +"Freeze" the model obtained in 2 and obtain the C-statistic of each bootstrap model evaluated in the original data, this means: +3a) use the coefficient estimates from bootstrap model to obtain predictions in original data: + +Model 1: PHREG and STORE=item-store statement: requests that the fitted model be saved to an item store +Model 2: PLM restore=item-store created in 1) + SCORE data=&DATAorig out=BSOUT predicted: score (Linear predictor) new observations based on the item store that was created in Model1 + +3b) use predictions in 3a) as covariate in model fitted in orginal data +Model 3: PHREG with MODEL statement and NOFIT option and ROC statement using PRED=predicted + This gives the same C-statistic as having the MODEL statement with covariate predicted + +These updates do not cause any significant changes in our results + +***/ data originaldata; set originaldata; @@ -483,6 +509,8 @@ options noquotelenmax; /*supress warning: THE QUOTED STRING CURRENTLY BEING PROC ods select none; /*to create output data sets through the ODS OUTPUT statement and suppress the display of all output*/ *Define macro variables; +%let DATAorig=originaldata; +%let DATAboot=bsample3; %let NUMOUTCOMES=4; /*number of outcomes*/ %let ALLOUTCOME=status_adldepdth status_iadldifdth status_walkdepdth death; %let ALLTIME=time_adldepdth time_iadldifdth time_walkdepdth time2death; @@ -492,8 +520,16 @@ proc sql noprint; select max(replicate) format 3. into :S from union_intersect; %macro c_bs_ori(predictors=, common_pred=, model=); %do i=1 %to &S; - /*For each bs dataset define VARNAME as the union/intersect*/ + proc sql noprint; select (&i-1)*(nobs/&S)+1 into :fobs from dictionary.tables where libname='WORK' and memname='BSAMPLE2'; quit; /*create macro variable with the first id of ith bs dataset*/ + proc sql noprint; select &i*(nobs/&S) into :lobs from dictionary.tables where libname='WORK' and memname='BSAMPLE2'; quit; /*create macro variable with the last id of ith bs dataset*/ + + data bsample3; + set bsample2 (FIRSTOBS=&fobs OBS=&lobs); + run; + sasfile WORK.bsample3 load; + + /*For each bs dataset define VARNAME as the union*/ %if &common_pred=yes %then %do; data _null_; set union_intersect (keep=replicate &predictors); @@ -506,12 +542,23 @@ proc sql noprint; select max(replicate) format 3. into :S from union_intersect; %let TIME=%scan(&ALLTIME,&j); %let LABEL=%scan(&ALLLABEL,&j); - proc phreg data = originaldata CONCORDANCE=HARRELL; + /*Get bootstrap model fitted in bootstrap data */ + proc phreg data = &DATAboot; class &VARNAME; model &time*&outcome(0) = &VARNAME; - ods output CONCORDANCE=concord ; + store bootmodel; /*requests that the fitted model be saved to an item store */ run; - + /*Get linear predictions (predicted) using fitted model above in original data */ + proc plm restore=bootmodel; + score data=&DATAorig out=BSOUT predicted; /* score (Linear predictor) new observations based on the item store bootmodel that was created above*/ + run; + /*Using linear predictions "predicted" above compute the C-statistic-original*/ + proc phreg data = BSOUT CONCORDANCE=HARRELL; + class &VARNAME; + model &time*&outcome(0) = &VARNAME / nofit; + roc 'Original' pred=predicted; + ods output CONCORDANCE=concord; + run; data CTABLE_&label; set concord (keep= estimate rename=(estimate=cbs_ori_&label)); length VARINMODEL $1000; @@ -519,8 +566,7 @@ proc sql noprint; select max(replicate) format 3. into :S from union_intersect; VARINMODEL="&VARNAME"; run; - proc delete data=concord; run; quit; - + proc delete data=concord BSOUT; run; quit; %end; /*j loop*/ data ctable; @@ -541,27 +587,38 @@ proc sql noprint; select max(replicate) format 3. into :S from union_intersect; %let LABEL=%scan(&ALLLABEL,&j); %let PREDICTOR=%scan(&predictors,&j); + /*For each bs dataset define VARNAME as individual-model for each outcome*/ data _null_; set union_intersect (keep=replicate &PREDICTOR); where replicate=&i; call symputx ('VARNAME' , &PREDICTOR); run; - proc phreg data = originaldata CONCORDANCE=HARRELL; + /*Get bootstrap model fitted in bootstrap data */ + proc phreg data = &DATAboot; class &VARNAME; model &time*&outcome(0) = &VARNAME; - ods output CONCORDANCE=concord ; + store bootmodel; /*requests that the fitted model be saved to an item store */ run; - + /*Get linear predictions (predicted) using fitted model above in original data */ + proc plm restore=bootmodel; + score data=&DATAorig out=BSOUT predicted; /* score (Linear predictor) new observations based on the item store bootmodel that was created above*/ + run; + /*Using linear predictions "predicted" above compute the C-statistic-original*/ + proc phreg data = BSOUT CONCORDANCE=HARRELL; + class &VARNAME; + model &time*&outcome(0) = &VARNAME / nofit; + roc 'Original' pred=predicted; + ods output CONCORDANCE=concord; + run; data CTABLE_&label; set concord (keep= estimate rename=(estimate=cbs_ori_&label)); - length VARINMODEL_&label $1000; + length VARINMODEL $1000; replicate=&i; - VARINMODEL_&label="&VARNAME"; + VARINMODEL="&VARNAME"; run; - proc delete data=concord; run; quit; - + proc delete data=concord BSOUT; run; quit; %end; /*j loop*/ data ctable; @@ -574,6 +631,9 @@ proc sql noprint; select max(replicate) format 3. into :S from union_intersect; %end; /*&common pred DO*/ + sasfile WORK.bsample3 close; + proc delete data=bsample3; run; quit; + proc append base=ctable_ori_sim_&model data=ctable force; run; proc delete data=ctable; run; quit; @@ -588,15 +648,14 @@ proc sql noprint; select max(replicate) format 3. into :S from union_intersect; %c_bs_ori (predictors=VARINMODEL_adl VARINMODEL_iadl VARINMODEL_walk VARINMODEL_death, common_pred=no, model=ind); %PUT ======MONITORING: %SYSFUNC(DATE(),YYMMDD10.), %LEFT(%SYSFUNC(TIME(),HHMM8.))======; /* -======MONITORING: 2020-10-07, 15:39====== -======MONITORING: 2020-10-07, 15:55====== -======MONITORING: 2020-10-07, 16:09====== +======MONITORING: 2022-06-22, 10:51====== +======MONITORING: 2022-06-22, 11:30====== +======MONITORING: 2022-06-22, 12:09====== */ *Save permanent datasets; -data outdata.ctable_ori_sim_union; set ctable_ori_sim_union; run; -data outdata.ctable_ori_sim_ind; set ctable_ori_sim_ind; run; - +data outdata2.ctable_ori_sim_union; set ctable_ori_sim_union; run; +data outdata2.ctable_ori_sim_ind; set ctable_ori_sim_ind; run; /*** Calculate degree of optimism: Optimism= Average (Absolute difference: C-stat-BS- C-stat-Original) across 500 BS @@ -608,7 +667,7 @@ To compute the average Optimism for the 3 outcomes: 3-) For each BS: Compute Absolute difference: C-stat-BS-avg - C-stat-Original-avg 4-) Compute Average (Absolute difference: C-stat-BS-avg - C-stat-original-avg) across 500 BS -The corrected C-stat final models = C-stat of original sample (without Wolbers approximation) – degree of optimism (using Wolbers approximation). +The corrected C-stat final models = C-stat of original sample (without Wolbers approximation) – degree of optimism (using Wolbers approximation). ***/ %let S=500; @@ -720,7 +779,7 @@ on the bootstrap-based optimism correction methods. arXiv preprint arXiv:2005.01 /*Method description: Algorithm 1 (Location-shifted bootstrap confidence interval) 1. For a multivariable prediction model, let theta_hat_app be the apparent predictive measure for the derivation population and - let theta_hat be the optimism-corrected predictive measure obtained from the Harrell’s bias correction, 0.632, or 0.632+ method. + let theta_hat be the optimism-corrected predictive measure obtained from the Harrell’s bias correction, 0.632, or 0.632+ method. 2. In the computational processes of theta_hat, we can obtain a bootstrap estimate of the sampling distribution of theta_hat_app from the B bootstrap samples. Compute the bootstrap confidence interval of theta_app from the bootstrap distribution, (theta_hat_app_L, theta_hat_app_U); for the 95% confidence interval, they are typically calculated by the 2.5th and 97.5th percentiles of the bootstrap distribution. @@ -852,3 +911,4 @@ RUN; +