-
Notifications
You must be signed in to change notification settings - Fork 0
/
fit_gp_dropx5.r
205 lines (171 loc) · 5.6 KB
/
fit_gp_dropx5.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# This script allows us to fit the data to a GP by dropping X5
setwd('~/Documents/STAT-Project/') #change it the appropriate path
set.seed(150)
source("dace.sim.r")
source("sampler.r")
#---------------------Parameters------------------------
filename = 'data.csv'
N = 40 # no. of samples
maxIter = 20000 #max. iterations of Gibbs/MCMC
burn = 10000 #no. of initial samples to throw away
se = 10 #save samples after this many iterations
x.drop = 5 #don't change it
F.drop = c(3,5)
#-----------------------------------------------------
df = read.csv(filename,header = TRUE)
colnames(df) = c('X1','X2','X3','X4','X5','Y','S')
X = df[1:N,1:5] # inputs
Y = df[1:N,6] # response (stress)
# nomalize the inputs to [0,1]
xmax = apply(X,2,max)
xmin = apply(X,2,min)
X = input.normalize(X,xmax,xmin)
F = input.transform(X) #non-linear transformation
# drop column
F = F[,-F.drop]
X = X[,-x.drop]
#F = X
F = cbind(rep(1,nrow(F)),F)
# Choose prior parameters
# Do a linear fitting of Y=F*beta
fitbeta = lm(Y~.,data=data.frame(Y=Y,F=F))
#beta.mu = c(fitbeta$coefficients[1],fitbeta$coefficients[3:7])
beta.mu = fitbeta$coefficients[-2]
beta.prec = 1/(0.5*5)^2
Ye = as.vector(F%*%beta.mu)
# precision (lambda) ~ Gamma(a,b)
lambda.a = 2
lambda.b = 0.5
# correlation (rho) ~ Beta(a,b)
rho.a = c(5,2,5,2)
rho.b = c(1,5,1,2)
# Grid of input points
l1=list(m1=abs(outer(X[,1],X[,1],"-")))
l2=list(m2=abs(outer(X[,2],X[,2],"-")))
l3=list(m3=abs(outer(X[,3],X[,3],"-")))
l4=list(m4=abs(outer(X[,4],X[,4],"-")))
#l5=list(m5=abs(outer(X[,5],X[,5],"-")))
#l.design=list(l1=l1,l2=l2,l3=l3,l4=l4,l5=l5)
l.design=list(l1=l1,l2=l2,l3=l3,l4=l4)
##-------------Fit the data----------------------
pi = list(az=lambda.a,bz=lambda.b,rhoa=rho.a,rhob=rho.b,
beta_mean=beta.mu,beta_prec=beta.prec,F=F) #prior parameters
init=list(rho0=rho.a/(rho.a+rho.b),lambda0=1/var(Ye),beta0=beta.mu)#initial guess
nit= list(maxIter=maxIter, burn=burn, se=se)
mh = list(rr=0.07)
est = regress(Y,l.design,nit,pi,mh,init)
##---------------Predict-------------------------
Xpred = df[(N+1):50,1:5]
#Xpred = df[50,1:5]
Xpred = input.normalize(Xpred,xmax,xmin)
F0 = input.transform(Xpred)
#drop columns
Xpred = Xpred[,-x.drop]
F0 = F0[,-F.drop]
Xall = rbind(X,Xpred)
F0 = cbind(rep(1,nrow(F0)),F0)
l1=list(m1=abs(outer(Xall[,1],Xall[,1],"-")))
l2=list(m2=abs(outer(Xall[,2],Xall[,2],"-")))
l3=list(m3=abs(outer(Xall[,3],Xall[,3],"-")))
l4=list(m4=abs(outer(Xall[,4],Xall[,4],"-")))
l.v=list(l1=l1,l2=l2,l3=l3,l4=l4)
Ypred = df[(N+1):50,6]
pred = predict(l.v,est,F,F0,eps=1e-6)
pred.error = sum((pred$mean-Ypred)^2)/length(Ypred)
cat('\nPrediction error =',pred.error)
##---------------Plot results-------------------------
# predictions and 95% confidence interval
par(mfrow=c(2,2))
for(k in 1:4){
ind = order(Xpred[,k])
plot(Xpred[,k],Ypred,col='red',pch=20,xlab=sprintf('X%d',k),ylab='Y')
for(i in 1:nrow(pred$Y)){
lines(Xpred[ind,k],pred$Y[i,ind],col='gray',lwd=0.25)
}
lines(Xpred[ind,k],pred$mean[ind],col='blue',lwd=1)
lines(Xpred[ind,k],pred$mean[ind]-1.96*pred$sd[ind],lwd=0.75,col="black")
lines(Xpred[ind,k],pred$mean[ind]+1.96*pred$sd[ind],lwd=0.75,col="black")
points(Xpred[,k],Ypred,col='red',pch=20)
}
# draws of Gibbs/MCMC sampler
# plot 1000 random draws (else the plot gets messy)
ind.draws = sample(length(est$lambdaz),1000)
# rho
# display the posterior mean
est.rhoz.mean = apply(est$rhoz,2,mean)
cat('\nestimated rho =',est.rhoz.mean)
# plot the draws
par(mfrow=c(2,2))
for(k in 1:4){
plot(est$rhoz[ind.draws,k],type='l',xlab='draws',ylab=sprintf('rho%d',k))
}
# plot the auto-correlation of samples of rho_i
par(mfrow=c(2,2))
for(k in 1:ncol(est$rhoz)){
acf(est$rhoz[,k],main=sprintf('rho%d',k))
}
# beta
# display the posterior mean
est.betaz.mean = apply(est$betaz,2,mean)
cat('\nestimated beta =',est.betaz.mean)
# plot the draws
par(mfrow=c(2,2))
for(k in 1:ncol(est$betaz)){
plot(est$betaz[ind.draws,k],type='l',xlab='draws',ylab=sprintf('beta%d',k))
}
# plot the auto-correlation of samples of beta_i
par(mfrow=c(2,2))
for(k in 1:ncol(est$betaz)){
acf(est$betaz[,k],main=sprintf('beta%d',k))
}
# lambda
# display the posterior mean
est.lambdaz.mean = mean(est$lambdaz)
cat('\nestimated lambda =',est.lambdaz.mean)
# plot the posterior samples and the autocorrelation of lambda
par(mfrow=c(2,1))
plot(est$lambdaz[ind.draws],type='l',xlab='draws',ylab='lambda')
acf(est$lambdaz,main='lambda')
# empirical posterior density of samples from Gibbs/MCMC
# rho
par(mfrow=c(2,2))
for(k in 1:ncol(est$rhoz)){
boxplot(est$rhoz[,k],xlab=sprintf('rho%d',k))
}
# beta
par(mfrow=c(2,2))
for(k in 1:ncol(est$betaz)){
boxplot(est$betaz[,k],xlab=sprintf('beta%d',k))
}
# lambda
par(mfrow=c(1,1))
boxplot(est$lambdaz,xlab='lambda')
# Sensitivity Analysis
library(sensitivity)
gp_yhat <- function(pred.loc){
#pred.loc should be normalized
colnames(pred.loc) = colnames(X)
Xall = rbind(X,pred.loc)
l1=list(m1=abs(outer(Xall[,1],Xall[,1],"-")))
l2=list(m2=abs(outer(Xall[,2],Xall[,2],"-")))
l3=list(m3=abs(outer(Xall[,3],Xall[,3],"-")))
l4=list(m4=abs(outer(Xall[,4],Xall[,4],"-")))
l.v=list(l1=l1,l2=l2,l3=l3,l4=l4)
pred.loc = cbind(pred.loc,0)
F0 = input.transform(pred.loc)
F0 = F0[,-F.drop]
F0 = cbind(rep(1,nrow(F0)),F0)
pred = predict(l.v,est,F,F0,eps=1e-6)
return(pred$mean)
}
set.seed(200)
N=20
X1=data.frame(matrix(runif(N*4),ncol=4))
X2=data.frame(matrix(runif(N*4),ncol=4))
si.S =sobolEff(model=gp_yhat,X1=X1,X2=X2,order=1,nboot=0)
si.TS=sobolEff(model=gp_yhat,X1=X1,X2=X2,order=0,nboot=0)
#si.S2=sobolEff(model=gp_yhat,X1=X1,X2=X2,order=2,nboot=0)
cat('1-Way sensitivity')
print(si.S$S)
cat('Total sensitivity')
print(si.TS$S)