-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLocal_Projection.R
450 lines (339 loc) · 18 KB
/
Local_Projection.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
############################################################################
#
# Replication of Metcalf and Stock (2020) The Macroeconomic Impact of
# Europe's Carbon Taxes
# Jeffrey Giddens
# Submitted September 30 2021
#
############################################################################
#### Description ####
# This is an R script that will execute Local Projections (Jordà 2005) and generate Impulse Response Functions (IRFs)
# using the method of Sims (1986). This is the same method used by Metcalf and Stock (2020)
#
# A run of the script will generate one IRF. The controls below in Part 2: Options will determine which IRF is generated
# Select the controls by commenting out the unwanted options or applying T/F or number desired
#
# data.csv will need to be loaded into the working environment
#
# There is a mistake in the code submitted via usb drive. To fix it, on a line before line 97, type c=""
#
############################################################################
############################################################################
#### Part 1: Setup ####
#set directory to where data files (data.csv) are, or load them in manually
#setwd("~/Documents/Thesis/final_code")
### Packages ###
#Install packages as necessary
#install.packages("lpirfs")
#library(lpirfs)
library(dplyr)
library(fastDummies)
library(lfe)
library(lmtest)
library(ggplot2)
### Load Data ###
### When running the script multiple times, start again here (reload the data) - it needs to rebuild the correct columns
#ctax_gdp_AERPP <- read.csv("ctax_gdp_AERPP.csv") #this loads the Metcalf dataset
#data = ctax_gdp_AERPP
data = read.csv("data.csv") #this dataset up to 2020 dataset includes all of the Metcalf dataset
#Reload the data for each run
############################################################################
#### Part 2: Options ####
### 1: Choose dependent variable - 6 options
y = "dlrgdp" #GDP through 2018
#y = "dlrgdp21" #GDP through 2020
#y = "dlemptot" #Employment through 2018
#y = "dlemptot21" #Employment through 2020
#y = "dlemission_trans" #Emissions from land transportation through 2019
#y = "dlemission_total" #Emissions from all GHGs through 2019
### 2 and 3: Choose tax series - 3 options
tax_series = "rater_LCU_USD18" #Metcalf tax series through 2018
#tax_series = "ctax_21_real" #tax rates from World Bank divided by GDP deflator
#tax_series = "ctax_21_raw" #nominal tax rates reported by World Bank
### Choose coverage share - 2 options
share = "share19" #use when using Metcalf tax series
#share = "share21" #use when using updated series #see Appendix A for differences
### 4: apply population weights or not
weighted = F #T for weighted. Weights the regressions by pop18 - country's population in 2018 in millions
### 5: cumulative IRF or regular
cumulative = F #T for cumulative IRF
### 6: Lags and leads
p = 6 #number of horizons to consider. 6 is default. y axes are scaled for p=6
lplags = 4 #number of lags in the model. 4 is the default used (lowest AIC)
### 7: Other adjustments below
# 1: Carbon tax path: lines 137-143
# The default is 40 for the full h horizons, with a coverage rate of 30%. Since this is a linear model, changing this only scales the results
# 2: Sample: lines 123-127
# The default is EU+ - 32 countries
# 3: y axis of the plot - it does not adjust automatically for specifications not in the thesis and may need to be changed manually. Lines 431-441
# Note: Figure 6.5, using the new carbon tax rate only up to 2018, involved adjusting a couple date parameters in the body of the code
#
############################################################################
#### Part 3 - Adjustments ####
### Generation of variables
c = ""
ydep = paste0(c,y)
data["tax_rate"] = data[tax_series]*data[share] #tax series times coverage share
#to run the model without multiplying by coverage share, comment out *data[share]
x = "tax_rate" #x is a string that identifies the 'independent' (tax) variable in the model
#identifies countries with rates over 10 or 20 in any year. Also can be used to apply any condition to all years of a country. Not used in most runs.
max_rate = data %>% group_by(ID) %>% slice(which.max(tax_rate))
CT20_countries = max_rate[max_rate["tax_rate"] >= 20,]
CT10sw_countries = max_rate[max_rate["tax_rate"] >= 10,]
for (i in 1:nrow(data)) {
data$CT20[i] = (ifelse(data$ID[i] %in% CT20_countries$ID, 1,0))
data$CT10sw[i]= (ifelse(data$ID[i] %in% CT10sw_countries$ID, 1,0))
}
data = transform(data, cnum = as.numeric(factor(ID)))
#data = select(data, cnum, year, row_ID, country, ID, share19, dlrgdp, dlemptot, dlemission, ctaxever, rater_LCU_USD18, rater_LCU_USD18sw, ctaxyear, EU, EU2, CT20, CT10sw)
#data = data[data$year <=2018,] #used to eliminate 2019,2020 #if encountering an error, make sure this is off
### Sample: Cutoff for large carbon tax countries or any specific country set #####
#data = data[data$CT10 == 1,] # works
smple = "EU+" #for most runs of the model
#smple = "CT10sw" #enter the column name of the sampling version here #"CT10sw for large carbon tax countries (7 of them)
#data = data[data[paste0(smple)] == 1,] #will keep only rows in the sample
#smple = "
data = tibble::rowid_to_column(data, "sample_row_ID")
lpleads = p
pp1 = p+1
irfno = 0 # this is here identify which model run when setting up a loop to do multiples.
#nc = length(unique(data$cnum))
#for degrees of freedom
rateinit = 40
#counterfactual $40/ton CO2. Model is linear so scaling it just scales the result.
swfac = .3
# 30% coverage on emissions counterfactual. Just scales the tax rate
xpath = matrix(rateinit, pp1, 1)
#counterfactual carbon tax path. Default with pp1=7 is 40 for h=0:6
#xpath = matrix(c(40,50,60,70,80,90,100))
est = "LP" #only using LP for this analysis
irfno = irfno+1
theta11 = matrix(data = 1,nrow = pp1, ncol=1)
#will be populated with the Beta on carbon tax at t=0 from regression of carbon tax h periods forward on carbon tax 0:4 lags back, dependent variable 1:4 lags, Fixed effects
#The rate that carbon tax today predicts carbon tax in the future (endogenous/non shock part of the tax)
b99 = matrix(0,pp1,1)
#will be populated with the Beta on carbon tax at t=0 from regression of dependent variable h periods forward on carbon tax 0:4 lags back, dependent variable 1:4 lags, Fixed effects
#the rate that carbon tax today predicts dependent variable at h periods hence
#data1 = data
#data = data1 #restore
############################################################################
#### Part 4: Generating lags, leads, and dummy variables
### Loop for making 4 lags and p leads for the two variables (each lag/lead is a column)
#insert variable list to make them all
#,"dlemission"
# ydep and x are set above, for example "dlrgdp", "dlemptot", "rater_LCU_USD18sw", "dlemission_trans", "dlemission_total"
for (z in c(ydep, x)) {
print(z)
for (i in 0:lplags) {
print(i)
data = data %>% group_by(cnum) %>% mutate(name = dplyr::lag(get(z),i, default = NA))
colnames(data)[ncol(data)] <- paste0("lag_",z,i)
}
for (j in 0:lpleads) {
print(j)
data = data %>% group_by(cnum) %>% mutate(name = dplyr::lead(get(z),j, default = NA))
colnames(data)[ncol(data)] <- paste0("lead_",z,j)
}
}
data = dummy_cols(data, select_columns = 'ID') #make dummies. The code usually uses these columns for the fixed effects. The last regression is an felm and can split cnum and year automatically
data = dummy_cols(data, select_columns = "year")
#data2 = data #backup
#data = data2 #restore
#
#
#
############################################################################
#### Part 5 - Executing Local Projections ####
### Loop over the h horizons to run regressions and save the estimates
#h=0 #for testing
for (h in 0:p) {
hp1 = h+1
###Model 1 - dependent variable at t+h on 0:4 lags carbon tax, 1:4 lags dependent variable, fixed effects
# Stata: reg F`h'.d`y' L(0/`lplags').`x' L(1/`lplags').d`y' $`controls' i.cnum, r
# used to populate b99, residuals used in Model 3
xnam1 = paste0("lag_",x, 0:lplags) #which tax lags to use
xnam2 = paste0("lag_",y, 1:lplags) #which dependent variable lags to use
controls_year = paste0("year_", 1985:if(tax_series=="rater_LCU_USD18"){2018}else{2020}) #which year effects to use. if statement for whether continuing to 2018 or 2020
controls_ID = paste0(colnames(data[,grepl("ID_", colnames(data))])) #which country effects to use
fmla = as.formula(paste0("lead_",ydep, h, " ~ ", paste(c(xnam1, xnam2, controls_year, controls_ID), collapse = "+")))
#model formula
if(weighted==T){
model1 = lm(fmla, data, na.action = na.exclude, weights=pop18)}else{
model1 = lm(fmla, data, na.action = na.exclude)
} #linear regression #sorry for the messy - just adds the weights if a weighted estimation is called
model1sum = summary(model1)
assign(paste0("model_lead_",y,h), model1) #rename the model
#coeftest(model1, vcov = vcovHC(model1, "HC1"))
# gets the same std. errors as in stata robust
# https://stats.stackexchange.com/questions/117052/replicating-statas-robust-option-in-r
b99[hp1] = model1$coefficients[2] #populates b99
k = model1sum$df[1] - 1
#number of variables k=69 at h=0 and drops by 1 with increasing h as 1 less year effect is linearly independent each time (R drops multicollinear effects automatically)
# columnA is a 1 for each row used in the regression (sample size changes with increasing h, but this setup uses the full sample size from each h to calculate standard errors instead of using only observations available for all h)
columnA = data$sample_row_ID %in% rownames(model1$model)
data[paste0("smpl",h)] = columnA #saving columnA
#Model 1 residuals
data$resid <- NA
data$resid[columnA] <- model1$residuals
colnames(data)[colnames(data) == "resid"] <- paste0("e",h)
###Model 2 - carbon tax at t on 1:4 lags carbon tax, 1:4 lags dependent variable, fixed effects
#Stata: reg `x' L(1/`lplags').`x' L(1/`lplags').d`y' $`controls' i.cnum if smpl`h', r
# Residuals used in Model 3
xnam3 = paste0("lag_",x, 1:lplags)
xnam4 = paste0("lag_",y, 1:lplags)
#controls_year = paste0("year_", 1985:2018) #same as above
#controls_ID = paste0(colnames(data[,grepl("ID_", colnames(data))])) #same as above
fmla2 = as.formula(paste0(x, " ~ ", paste(c(xnam3, xnam4, controls_year, controls_ID), collapse = "+")))
sample = data[[paste0("smpl",h)]]
#double bracket for atomic vector not list
if(weighted==T){
model2 = lm(fmla2, data, na.action = na.exclude, subset = sample, weights=pop18)}else{
model2 = lm(fmla2, data, na.action = na.exclude, subset = sample)
} #glm, family="gaussian"
#Model 2 residuals
data$resid <- NA
data$resid[columnA] <- model2$residuals
colnames(data)[colnames(data) == "resid"] <- paste0("etax",h)
###Model 3 - calculation of zz`h'- joint residual calculation based on residuals of first 2 models
#zz`h' are used to calculate vcov matrix v99 (variance of b99) - variance of the estimates
# joint
# formula : (n/(n-1)) * (e`h' * etax`h' / Var) / sqrt(n-k)
data$zz = NA
data$zz = (sum(columnA) / (sum(columnA) - 1)) * (data[paste0("e",h)] * data[paste0("etax",h)] / drop(var(data[paste0("etax",h)],na.rm=T))) / sqrt(sum(columnA) - k)
colnames(data)[colnames(data) == "zz"] <- paste0("zz",h)
###Model 4 - carbon tax at t+h on 0:4 lags carbon tax, 1:4 lags dependent variable, fixed effects
# used to populate theta11, the rate that carbon tax today predicts carbon tax in the future (endogenous/non shock part of the tax)
#Stata: areg F`h'.`x' L(0/`lplags').`x' L(1/`lplags').d`y' $`controls', absorb(cnum) vce(r)
#theta11[1] = 1 - carbon tax at t=0 predicts carbon tax at t=0 with beta 1
if (h>0) {
print(h) #just keeping track of calculation speed
#h = 1 #testing
xnam5 = paste0("lag_",x, 0:lplags)
xnam6 = paste0("lag_",y, 1:lplags)
#controls_year = paste0("year_", 1985:2018) #same as above
#controls_ID = paste0(colnames(data[,grepl("ID_", colnames(data))])) #same as above
fmla4 = as.formula(paste0("lead_",x, h, " ~ ", paste(c(xnam5, xnam6, controls_year), collapse = "+"), " | ", "cnum")) #paste(controls_ID, collapse = "+")
# fmla5 = as.formula(paste0("lead_",x, h, " ~ ", paste(c(xnam5, xnam6, controls_year, controls_ID), collapse = "+")))
if(weighted==T){
model4 = felm(fmla4, data, weights = data$pop18[sample], subset = sample)}else{
model4 = felm(fmla4, data)
}#, na.action = na.exclude)
#or lm() - both are very close
#coeftest(model4, vcov = vcovHC(model3, "HC1"))
summary(model4, robust=T) # these are now exact
theta11[h+1,1] = model4$coefficients[1]
} #end of if statement
} #end of loop over horizons h
############################################################################
#### Part 6: Generating the IRF and VIRF ####
###
###Build v99 - vcov matrix of b99 (estimates of carbon tax on dep. variable h periods forward)
v99 = diag(p+1)
#v99
i=0
j = 1
for (i in 0:p) {
print(i)
v99[i+1,i+1] = var(data[paste0("zz",i)], na.rm=T)
ip1 = i+1
for (j in ip1:p){
v99[i+1,j+1] = cov(data[paste0("zz",i)], data[paste0("zz",j)], use="complete.obs")
v99[j+1,i+1] = cov(data[paste0("zz",i)], data[paste0("zz",j)], use="complete.obs")
}
}
v99
###Build B - lower triangular of theta11 expanded
#tk explain more if able
B = diag(pp1)
for (h in 1:p) {
for (i in 1:h) {
B[h+1,i] = theta11[h-i+2,1]
}
}
B
#epsx is e* in Metcalf - path of shocks to dependent variable needed to produce the counterfactual tax increase
epsx = solve(B) %*% xpath %*% swfac
epsx
###Build shockmat - upper triangular (use transpose for lower triangular) of shocks (epsx expanded) applied to the estimates b99 and variances v99
shockmat = diag(pp1)
for (i in 1:pp1) {
for (j in i:pp1) {
shockmat[i,j] = epsx[j-i+1,1]
}
}
shockmat
###irf is the point estimates of the IRF
###virf is the vcov matrix of the IRF
###seirf is the standard errors on the diagonal (used as the errors in the IRF)
irf = t(shockmat) %*% b99
virf = t(shockmat) %*% v99 %*% shockmat
seirf = sqrt(diag(virf))
############################################################################
#### Part 7 - Generating the IRF plot ####
###plotmat sets up the values to graph point estimates with error bars at 1 standard dev (67% CI) and 1.96 st. dev (95%CI)
plotmat = as.data.frame(cbind(0:p,irf, irf+seirf, irf + 1.96*seirf, irf-seirf, irf- 1.96*seirf))
colnames(plotmat) = c("period", "irf", "upper_irf", "upper_irfx2", "lower_irf", "lower_irfx2")
###Cumulative plotmat - adds the point estimates sequentially and adds the variances
# Variance calculations run slighlty wider than Metcalf, as this may not be their definition of cumulative IRF
if (cumulative == T){
cumirf=c()
cumirf[1] = irf[1]
for (i in 2:pp1) {
cumirf[i]=sum(cumirf[i-1]+irf[i])
}
cumvarianceirf = c()
cumvarianceirf[1] = virf[1,1]
#cumvarianceirf[2] = virf[1,1]+virf[2,2]+2*virf[1,2]
for (i in 2:pp1){
cumvarianceirf[i] = sum(virf[1:i,1:i])
# var(a+b) = var(a) + var(b) - 2cov(ab)
}
cumseirf = sqrt(cumvarianceirf)
cumplotmat = as.data.frame(cbind(0:p, cumirf, cumirf+cumseirf, cumirf+1.96*cumseirf, cumirf-cumseirf, cumirf-1.96*cumseirf))
colnames(cumplotmat) = c("period", "irf", "upper_irf", "upper_irfx2", "lower_irf", "lower_irfx2")
} #end of cumulative if statement
#####################
######## PLOT #######
#This is a long block because of all the if statements that insert the correct text and axes based on which graph it's producing
plot <-
ggplot(get(paste0(if(cumulative==T){"cum"},"plotmat")), aes(x=period, y=irf, ymin=lower_irf, ymax=upper_irf, xmin = 0)) +
geom_hline(yintercept = 0, color="black") +
geom_ribbon(fill="red", alpha=0.4) +
geom_ribbon(
aes(
x=period,
ymin = lower_irfx2,
ymax = upper_irfx2
),
fill = "red",
alpha = 0.2
) +
geom_line(color="red") +
theme_bw() +
ggtitle(paste0(if(cumulative==T){"Cumulative i"}else{"I"}, "mpulse response function for $40 carbon tax increase: ",est),
subtitle = paste0("Carbon tax rate ", if(tax_series=="ctax_21_raw"){"(nominal)"}else{"(real, 2018 USD)"
}, " wtd by coverage share ","\n Dep. vble: ",y,"; Sample: ",smple,"; ","Date range: ",
if(y=="dlemission_trans" | y=="dlemission_total"){"1996-"}else{"1985-"
},if(tax_series=="rater_LCU_USD18"){2018
}else if(y=="dlemission_trans" | y=="dlemission_total"){2019}else{2020}))+
ylab("Percentage points")+
xlab(paste0("Years after implementation \n 67% and 95% confidence bands. Includes ",lplags," lags of all regressors.")) +
scale_x_discrete(limits=(0:p)) +
theme(plot.title = element_text(size = 13, hjust=0.5), axis.title.y = element_text(size=11), panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
if(cumulative==T & (y=="dlemission_trans")){
scale_y_continuous(breaks = c(seq(-100,100,by=20)), n.breaks = 17, limits = c(-100,100))
}else if(cumulative==T & (y=="dlemission_total")){
scale_y_continuous(breaks = c(seq(-30,30,by=10)), n.breaks = 17, limits = c(-30,30))
}else if(cumulative==F & y=="dlemission_trans"){
scale_y_continuous(breaks = c(seq(-100,100,by=20)), n.breaks = 17, limits = c(-101,100))
}else if(cumulative==F & (y=="dlemission_total")){
scale_y_continuous(breaks = c(seq(-30,30,by=10)), n.breaks = 17, limits = c(-30,30))
}else if(cumulative==T){
scale_y_continuous(breaks = c(seq(-8,8,by=2)), n.breaks = 17, limits = c(-8,8))
}else{ylim(c(-4,4))
}
plot
############################################################################
#Data and Code Source (Basis for this project):
#https://www.openicpsr.org/openicpsr/project/120535/version/V1/view;jsessionid=50F86AA74FE95A5057594DA28B417345?path=/openicpsr/120535/fcr:versions/V1/readme_AERPP.txt&type=file