-
Notifications
You must be signed in to change notification settings - Fork 2
/
11a.SAS_SimSce3corigTrain.sas
182 lines (152 loc) · 9.03 KB
/
11a.SAS_SimSce3corigTrain.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
***********************************************************************************************************************************************************************************;
*Program: 11a.SAS_SimSce3corigTrain ;
*Purpose: Import training simulated data sets under Scenario 3 (Full method) with case-study censoring from R and obtain some general statistics ;
*Statistician: Grisell Diaz-Ramirez ;
*Started: 2020.11.25 ;
*Finished: 2021.01.28 ;
***********************************************************************************************************************************************************************************;
libname savedata "path";
libname outtrain "path";
/*********************************************************************** Prepare simulated and orginal datasets to get stats *********************************************************************************/
options nosource nonotes; /*nosource: suppress the listing of the SAS statements to the log, causes only source lines that contain errors to be written to the log*/
*Import data with simulations done in R by "sim.survdata" function;
%macro impdata (inputdata=, rep=, outputdata=);
%do i=1 %to &rep;
PROC IMPORT OUT= temp
DATAFILE= "path/&inputdata.&i..csv"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
data temp (rename=sim2=sim);
set temp;
sim2=sim+10*(&i-1);
drop sim;
run;
proc append base=&outputdata data=temp force; run;
proc delete data=temp; run; quit;
%end;
proc sort data=&outputdata; by newid; run;
%mend impdata;
%impdata(inputdata=sim10trainFullcorig_20210109_, rep=50, outputdata=outtrain.sim500trainFullcorig);
/*N(5531)*Sim(500)*4=11,062,000 and 7 variables.*/
*Merge with original dataset to get the covariates;
data simdata2;
merge savedata.originaldata (drop=time_adldepdth status_adldepdth time_iadldifdth status_iadldifdth time_walkdepdth status_walkdepdth time2death death)
outtrain.sim500trainFullcorig;
by newid;
run;
/*N(5531)*Sim(500)*4=11,062,000 and 56+6=62 variables.*/
proc contents data=simdata2; run;
*QC;
proc freq data=simdata2;
tables outcome*status*failed /list missing;
run;
proc means data=simdata2 n nmiss min max p25 median mean p75;
var time y;
class outcome;
run;
proc freq data=simdata2; tables sim*outcome /list missing; run;
*Modify dataset to apply Wolber modification to Competing-risk regression;
data finaldata;
set savedata.originaldata;
if status_adldepdth=2 then do; status_adldepdth=0; time_adldepdth=15.0278689; end;
if status_iadldifdth=2 then do; status_iadldifdth=0; time_iadldifdth=15.0278689; end;
if status_walkdepdth=2 then do; status_walkdepdth=0; time_walkdepdth=15.0278689; end;
run;
/*5531 observations and 64 variables.*/
data finaldata2; set savedata.originaldata; sim=0; proc sort; by sim newid; run;
/*Change format of simulated dataset from long where outcomes are stacked to wide where outcomes are merged*/
*ADL;
data adl (keep=sim newid time_adldepdth status_adldepdth);
set simdata2 (rename=(time=time_adldepdth status=status_adldepdth));
where outcome="adl";
proc sort; by sim newid; run; /*2,765,500 observations and 4 variables.*/
*IADL;
data iadl (keep=sim newid time_iadldifdth status_iadldifdth);
set simdata2 (rename=(time=time_iadldifdth status=status_iadldifdth));
where outcome="iadl";
proc sort; by sim newid; run; /*2,765,500 observations and 4 variables.*/
*Walk;
data walk (keep=sim newid time_walkdepdth status_walkdepdth);
set simdata2 (rename=(time=time_walkdepdth status=status_walkdepdth));
where outcome="walk";
proc sort; by sim newid; run; /*2,765,500 observations and 4 variables.*/
*Death;
data death (keep=sim newid time2death death);
set simdata2 (rename=(time=time2death status=death));
where outcome="death";
proc sort; by sim newid; run; /*2,765,500 observations and 4 variables.*/
data simdata3;
merge adl iadl walk death;
by sim newid;
run; /*2,765,500 observations and 10 variables.*/
/********************************************************************* Compare stats of simulated datasets vs original dataset *****************************************************************************/
ods select all; /*to print results below*/
%let SIM=500; /*number of simulations*/
title;
ods rtf file='path\Results_StatsSimTrainFullcorigVsOriginal.rtf' startpage=no;
ods text= "Simulated datasets (S=&SIM and original censoring)";
proc means data=simdata2 N min max mean std; var newid; class sim; run;
ods text= "Original dataset without Wolber modification";
proc means data=savedata.originaldata N min max mean std; var newid; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) ADL";
proc freq data=simdata2 ; table status; where outcome="adl"; run;
ods text= "Original dataset with Wolber modification";
proc freq data=finaldata; table status_adldepdth; run;
ods text= "Original dataset without Wolber modification";
proc freq data=savedata.originaldata; table status_adldepdth; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) IADL";
proc freq data=simdata2 ; table status; where outcome="iadl"; run;
ods text= "Original dataset with Wolber modification";
proc freq data=finaldata; table status_iadldifdth; run;
ods text= "Original dataset without Wolber modification";
proc freq data=savedata.originaldata; table status_iadldifdth; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) WALK";
proc freq data=simdata2 ; table status; where outcome="walk"; run;
ods text= "Original dataset with Wolber modification";
proc freq data=finaldata; table status_walkdepdth; run;
ods text= "Original dataset without Wolber modification";
proc freq data=savedata.originaldata; table status_walkdepdth; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) DEATH";
proc freq data=simdata2 ; table status; where outcome="death"; run;
ods text= "Original dataset without Wolber modification";
proc freq data=savedata.originaldata; table death; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) ADL";
proc means data=simdata2 n min max mean std median p25 p75; var time; class status; where outcome="adl"; run;
ods text= "Original dataset with Wolber modification";
proc means data=finaldata n min max mean std median p25 p75; var time_adldepdth; class status_adldepdth; run;
ods text= "Original dataset without Wolber modification";
proc means data=savedata.originaldata n min max mean std median p25 p75; var time_adldepdth; class status_adldepdth; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) IADL";
proc means data=simdata2 n min max mean std median p25 p75; var time; class status; where outcome="iadl"; run;
ods text= "Original dataset with Wolber modification";
proc means data=finaldata n min max mean std median p25 p75; var time_iadldifdth; class status_iadldifdth; run;
ods text= "Original dataset without Wolber modification";
proc means data=savedata.originaldata n min max mean std median p25 p75; var time_iadldifdth; class status_iadldifdth; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) WALK";
proc means data=simdata2 n min max mean std median p25 p75; var time; class status; where outcome="walk"; run;
ods text= "Original dataset with Wolber modification";
proc means data=finaldata n min max mean std median p25 p75; var time_walkdepdth; class status_walkdepdth; run;
ods text= "Original dataset without Wolber modification";
proc means data=savedata.originaldata n min max mean std median p25 p75; var time_walkdepdth; class status_walkdepdth; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring) DEATH";
proc means data=simdata2 n min max mean std median p25 p75; var time; class status; where outcome="death"; run;
ods text= "Original dataset without Wolber modification";
proc means data=savedata.originaldata n min max mean std median p25 p75; var time2death; class death; run;
ods startpage=now;
ods text= "Simulated datasets (S=&SIM and original censoring)";
proc means data=simdata2 n min max mean std median p25 p75; var time; class outcome; run;
ods text= "Original dataset with Wolber modification";
proc means data=finaldata n min max mean std median p25 p75 nolabels; var time_adldepdth time2death time_iadldifdth time_walkdepdth ; run;
ods text= "Original dataset without Wolber modification";
proc means data=savedata.originaldata n min max mean std median p25 p75 nolabels; var time_adldepdth time2death time_iadldifdth time_walkdepdth ; run;
ods rtf close;