forked from yimengyin16/PenSimMacro_Data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExplore_RevGSP.R
533 lines (402 loc) · 16.8 KB
/
Explore_RevGSP.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
# This script explore the relationship between GDP and gov revenue and does exploratory modeling
#**********************************************************************
# Notes ####
#**********************************************************************
# What to model:
# 1. GSP growth and state total tax revenue growth
# 2. GSP growth and personal income tax growth
# 3. GSP growth and general sales tax growth
# 4. GSP grwoth and non-PIT-general sales tax growth: who cyclical it is
# 5. Construct stylized facts
# Other relationships to examine:
# 1. GSP growth and total own soure revenue growth,
# 2. GSP growth and total tax revenue growth
# 3. GSP growth and personal income tax growth
# 4. GSP growth and sales tax growth
# 5. GSP growth and property tax growth
# 6. GSP growth and corporate income tax growth
# 7. GSP growth and growth of total tax revenue minus PIT and sales
# 8. national GDP growth and GSP growth
# Notes on real and nominal variables
# - results for nominal variables for 1995-2015 can be compared with Pew results
# - GDP/GSP are modeled in real terms in the macro model, so the GDP-revenue relationship should be
# modeld using real term. But note that pension contributions are modeled in nominal terms, which
# must be compared against nominal gov revenue.
# - Real revenue variables are in 2015 dollar and adjustment factor for inflation is the same across
# all states, gov levels and types of tax, while real GSPs are in 2009 dollar and adjustment factors
# for inflation differ across states.
# How sales taxes are divided between state and local governments:
# https://taxfoundation.org/state-and-local-sales-tax-rates-in-2017/
# Five states do not have statewide sales taxes: Alaska, Delaware, Montana, New Hampshire, and Oregon.
# Of these, Alaska and Montana allow localities to charge local sales taxes.
#**********************************************************************
# Packages ####
#**********************************************************************
library(tidyverse)
library(broom)
library(readxl)
library(magrittr)
library(stringr)
library(forcats)
library(grid)
library(gridExtra)
library(scales)
library(knitr)
# packages for time series modeling
library(astsa) # companion package
library(TSA) # companion package; arimax: flexible transfer function model
library(tseries) #
library(forecast) # Arima
library(MSwM)
library(TTR)
#library(MSBVAR)
# packages for ts
library(zoo)
library(xts)
library(timetk)
library(tidyquant)
library(lubridate)
library(feather)
library(psych) # describe
options(tibble.print_max = 60, tibble.print_min = 60)
# check tidyquant, timetk, sweep (broom ), tibbletime
# Intro to zoo cran.r-project.org/web/packages/zoo/vignettes/zoo-quickref.pdf
# sweep: http://www.business-science.io/code-tools/2017/07/09/sweep-0-1-0.html
#**********************************************************************
# Global settings and tools ####
#**********************************************************************
dir_data_raw <- "data_raw/"
dir_data_out <- "data_out/"
# NBER recession periods, post-WWII
recessionPeriods <-
matrix(c(
1953+2/4, 1954+2/4,
1957+3/4, 1958+2/4,
1960+2/4, 1961+1/4,
1969+4/4, 1970+4/4,
1973+4/4, 1975+1/4,
1980+1/4, 1980+3/4,
1981+3/4, 1982+4/4,
1990+3/4, 1991+1/4,
2001+1/4, 2001+4/4,
2007+4/4, 2009+2/4
) , ncol = 2, byrow = T) %>%
as.data.frame() %>%
rename(peak = V1,
trough = V2) %>%
mutate(peak = peak - 1/4,
trough = trough - 1/4)
get_logReturn <- function(x){
if(any(x <= 0, na.rm = TRUE)) stop("Nagative value(s)")
log(x/lag(x))
}
#**********************************************************************
# Loading Data ####
#**********************************************************************
# Loading saved data
load(paste0(dir_data_out, "data_RevGSP.RData"))
load(paste0(dir_data_out, "dataAll.RData"))
# df_RevGSP loaded
# Variables in df_RevGSP
# Indices: state, state_abb, year
# GSP variables
# - RGSP_SIC: From BEA, 1963-1997, based on SIC
# - RGSP_NAICS: From BEA, 1997-2016, based on NAICS
# - NGSP: From FRED, 1997-2016
# Tax and revenue variables: 1977-2015
# - suffix for gov levels:
# - local
# - state
# - SL: state and local
# - suffix for real or nominal: real / nom
#
# - variables:
# urban code Var name Var description
# 'R01', 'rev_tot', 'Total revenue',
# 'R02', 'rev_tot_ownSrc', 'Total Rev-Own Sources',
# 'R03', 'rev_gen', 'General Revenue',
# 'R04', 'rev_gen_ownSrc', 'Gen Rev-Own Sources',
# 'R05', 'tax_tot', 'Total Taxes',
# 'R06', 'tax_property', 'Property Taxes',
# 'R08', 'tax_sales_tot', 'Tot Sales & Gr Rec Tax',
# 'R09', 'tax_sales_gen', 'Total Gen Sales Tax (T09)',
# 'R10', 'tax_sales_select', 'Total select Sales Tax',
# 'R27', 'tax_indivIncome', 'Individual Income Tax (T40)',
# 'R28', 'tax_corpIncome', 'Corp Net Income Tax',
# 'R36', 'chgs_Misc', 'Tot Chgs and Misc Rev'
# - with missing values
#**********************************************************************
# Creating variables ####
#**********************************************************************
# Relationship to examine:
# 1. GSP growth and total own soure revenue growth,
# 2. GSP growth and total tax revenue growth
# 3. GSP growth and personal income tax growth
# 4. GSP growth and sales tax growth
# 5. GSP growth and property tax growth
# 6. GSP growth and corporate income tax growth
# 7. GSP growth and growth of total tax revenue minus PIT and sales
# price index variables, just in case
df_inflation <-
df_dataAll_y %>% select(year, CPIU_SA_FRED, CPIU_NA_FRED, CPIc_SA_FRED, Inflation_Index)
df_inflation
# Vars in real terms.
df_real <-
df_RevGSP %>%
select(state_abb, year,
RGSP_SIC,
RGSP_NAICS,
rev_gen_ownSrc_real_SL,
tax_tot_real_SL,
tax_indivIncome_real_SL,
tax_sales_tot_real_SL,
tax_property_real_SL,
tax_corpIncome_real_SL) %>%
filter(state_abb %in% df_us_states$state_abb)
df_real
df_nominal <-
df_RevGSP %>%
select(state_abb, year,
RGSP_SIC,
RGSP_NAICS,
rev_gen_ownSrc_nom_state,
rev_gen_ownSrc_nom_local,
rev_gen_ownSrc_nom_SL,
tax_tot_nom_SL,
tax_indivIncome_nom_SL,
tax_sales_tot_nom_SL,
tax_property_nom_SL,
tax_corpIncome_nom_SL) %>%
filter(state_abb %in% df_us_states$state_abb)
df_dl_real <-
df_real %>%
#filter(year>1978) %>%
group_by(state_abb) %>%
mutate_at(vars(-year), funs(100 * log(./lag(.))))
df_dl_nominal <-
df_nominal %>%
#filter(year>1978) %>%
group_by(state_abb) %>%
mutate_at(vars(-year), funs(100 * log(./lag(.))))
# df1 <-
# df_RevGSP %>%
# select(state_abb, year,
# rev_gen_ownSrc_nom_state,
# rev_gen_ownSrc_nom_local,
# rev_gen_ownSrc_nom_SL,
#
# tax_tot_nom_state,
# tax_tot_nom_local,
# tax_tot_nom_SL,
#
#
# tax_indivIncome_nom_state,
# tax_indivIncome_nom_local,
# tax_indivIncome_nom_SL,
#
# tax_sales_tot_nom_state,
# tax_sales_tot_nom_local,
# tax_sales_tot_nom_SL,
#
# tax_property_nom_state,
# tax_property_nom_local,
# tax_property_nom_SL,
#
# tax_corpIncome_nom_state,
# tax_corpIncome_nom_local,
# tax_corpIncome_nom_SL) %>%
# filter(state_abb %in% df_us_states$state_abb)
#
#*******************************************************************************
# 1. Revenue structure: General own source revenue and total tax revneu ####
#*******************************************************************************
# Question 1: Comparing own source revenue and Tax revenue
# Variables: rev_gen_ownSrc_nom_XX, tax_tot_nom_XX
df <-
df_RevGSP %>%
filter(year>=1977) %>%
select(state_abb, year,
rev_gen_ownSrc_nom_SL,
rev_gen_ownSrc_nom_state,
rev_gen_ownSrc_nom_local,
tax_tot_nom_SL,
tax_tot_nom_state,
tax_tot_nom_local
) %>%
mutate(tax_pct_SL = 100 * tax_tot_nom_SL / rev_gen_ownSrc_nom_SL,
tax_pct_state = 100 * tax_tot_nom_state / rev_gen_ownSrc_nom_state,
tax_pct_local = 100 * tax_tot_nom_local / rev_gen_ownSrc_nom_local)
# National level: tax as % of general own-source reveneu.
df_US <-
df %>%
filter(state_abb %in% "US") %>%
select(state_abb, year,
tax_pct_SL, tax_pct_state, tax_pct_local)
df_US %>% filter(year %in% c(1977, 1990, 2000, 2010, 2015))
# tax as % of general own-source revenue: quantiles across states, all years.
df_pct <-
df %>%
group_by(year) %>%
summarise(
tax_pct_SL_p10 = quantile(tax_pct_SL, 0.10, na.rm = T),
tax_pct_SL_p25 = quantile(tax_pct_SL, 0.25, na.rm = T),
tax_pct_SL_p50 = quantile(tax_pct_SL, 0.50, na.rm = T),
tax_pct_SL_p75 = quantile(tax_pct_SL, 0.75, na.rm = T),
tax_pct_SL_p90 = quantile(tax_pct_SL, 0.90, na.rm = T),
tax_pct_state_p10 = quantile(tax_pct_state, 0.10, na.rm = T),
tax_pct_state_p25 = quantile(tax_pct_state, 0.25, na.rm = T),
tax_pct_state_p50 = quantile(tax_pct_state, 0.50, na.rm = T),
tax_pct_state_p75 = quantile(tax_pct_state, 0.75, na.rm = T),
tax_pct_state_p90 = quantile(tax_pct_state, 0.90, na.rm = T),
tax_pct_local_p10 = quantile(tax_pct_local, 0.10, na.rm = T),
tax_pct_local_p25 = quantile(tax_pct_local, 0.25, na.rm = T),
tax_pct_local_p50 = quantile(tax_pct_local, 0.50, na.rm = T),
tax_pct_local_p75 = quantile(tax_pct_local, 0.75, na.rm = T),
tax_pct_local_p90 = quantile(tax_pct_local, 0.90, na.rm = T)
)
# tax as % of general own-source revenue: ordered from high to low, for year 2015
df_arrange <-
df %>% select(state_abb, year, tax_pct_SL, tax_pct_state, tax_pct_local) %>%
filter(year == 2015) %>%
arrange(tax_pct_SL)
df_arrange
# tax as % of general own-source revenue: a single state
df %>% select(state_abb, year, tax_pct_SL, tax_pct_state, tax_pct_local) %>%
filter(state_abb == "AK")
## Observations:
# 1. The share of tax revnue in general own source revenue has been decreasing over the past 40 years.
# For the national total of state and local, the share of tax decreased from about 80% to 70%.
# In 2015, tax as a % of state revenue (73%) is higher than tax as % for local revenue (65%).
# 2. For the total state and local revenue, the share of tax revenue in general own-source revenue generally ranges from
# 60% ~ 80%.
# 3. Tax revenue in AK accouts for a very small share of own source revenue (35% in 2015), especially at the state level (20% in 2015).
# The tax share in AK was not so low in 1977, it declined drastically in 1970s and 1980s, mainly due to the drop at state level. The share of tax
# at the state level also fluctuate a lot.
## TODO: make box-whisker plots
#*******************************************************************************
# 2. Revenue structure: share of income tax, sales tax, and property tax ####
#*******************************************************************************
## Structure of state tax revenue
df_state <-
df_RevGSP %>%
filter(year >= 1977) %>%
select(state_abb, year,
tax_tot_nom_state,
tax_indivIncome_nom_state,
tax_sales_gen_nom_state,
tax_property_nom_state,
tax_corpIncome_nom_state) %>%
mutate(indivIncome_pct = 100 * tax_indivIncome_nom_state/tax_tot_nom_state,
sales_pct = 100 * tax_sales_gen_nom_state/tax_tot_nom_state,
property_pct = 100 * tax_property_nom_state/tax_tot_nom_state,
corpIncome_pct = 100 * tax_corpIncome_nom_state/tax_tot_nom_state,
PITsales_pct = indivIncome_pct + sales_pct) %>%
select(state_abb, year,
indivIncome_pct,
sales_pct,
property_pct,
corpIncome_pct,
PITsales_pct)
## for US and specific states
df_state %>%
filter(state_abb %in% "US")
# For US aggregate:
# The share of sales tax is quite stable, ~50%
# The share of personal income taxes has increased by half, from 25% in 1977 to 37% in 2015
# The share of property tax is around 1.5%~2.5%.
# The share of corp income tax decreased from 9% to 5%
## Sales+PIT as a % of total tax revenue.
df_state %>%
filter(year == 2015) %>%
arrange(desc(PITsales_pct))
# Sales + PIT accounts for 70%+ of tax revenue (most above 80%),
# States with sales + PIT less than 70% of tax rev are: AK(30%), WY(42%), NH(43%), ND(43%), DE(47%), VT(57%), MT(63%)
# Year 2015: ordered by share of individual income tax
df_state %>%
filter(year == 2015) %>%
arrange(desc(indivIncome_pct))
# Top Five in 2015: OR(69%), VA(58%), NY(56%), MA(54%), CA(52%)
# states with % PIT < 5%: NH, TN, AK(0), FL, NV, SD, TX WA, WY
# Year 2015: ordered by share of sales tax
df_state %>%
filter(year == 2015) %>%
arrange(desc(sales_pct))
# Top five in 2015: TX(86.5%), SD(82%), FL(82%), NV(80%), WA(79%)
# Lowest five in 2015: OR(14%), DE(14%), MT(21%), AK(30%), NY(31%) [NH 39%, why]
# Five states do not have statewide sales taxes: Alaska, Delaware, Montana, New Hampshire, and Oregon.
# Of these, Alaska and Montana allow localities to charge local sales taxes.
## Plotting % of sales against % of PIT
df_state %>%
filter(year == 2015, !is.na(state_abb)) %>%
ggplot(aes(x = indivIncome_pct, y = sales_pct, label = state_abb)) +
geom_point() +
geom_text(nudge_x = 1, size = 2)
#*******************************************************************************
# 3. How do tax revenues change over time ####
# and how are they affected economic conditions
#*******************************************************************************
# National level, examine how total tax revenue, sales tax revenue, and income tax (state level)
# revenue change over time, and how they are compared against economic conditions
df_RevGSP %<>%
left_join(df_dataAll_y %>%
select(year, gdp_g = GDP_growth_FRED))
# National level: GDP growth (real), state PIT, and state sales tax (nominal)
df_RevGSP %>%
select(state_abb, year,
gdp_g,
tax_indivIncome_nom_state,
tax_sales_gen_nom_state) %>%
filter(state_abb == "US", year %in% 1977:2015) %>%
mutate_at(vars(-state_abb, -year, -gdp_g), funs(100 * log(./lag(.)))) %>%
gather(var, value, -state_abb, -year) %>%
ggplot(aes(x = year, y = value, color = var)) + theme_bw() +
geom_line() +
geom_hline(yintercept = 0, linetype = 2) +
scale_color_manual(values = c("darkgray", "green", "blue"))
# Observations:
# State PIT growth is more volatile than sales tax
# Tax income, especially income tax, has become more responsive to business cycle
# The correlation between tax revenue and GDP growth are more prominent in the two recent recessions.
# Questions is: will the high correlatoin last? Pew report uses 1995-2015 data (only uses NAICS GSP) to estimate the
# the correlation, which only capture the high correlation period.
# State level: GDP growth (real), state PIT, and state sales tax (nominal)
state_select <- "CA"
df_RevGSP %>%
select(state_abb, year,
gdp_g,
tax_indivIncome_nom_state,
tax_sales_gen_nom_state) %>%
filter(state_abb == state_select, year %in% 1977:2015) %>%
mutate_at(vars(-state_abb, -year, -gdp_g), funs(100 * log(./lag(.)))) %>%
gather(var, value, -state_abb, -year) %>%
ggplot(aes(x = year, y = value, color = var)) + theme_bw() +
geom_line() +
geom_hline(yintercept = 0, linetype = 2) +
scale_color_manual(values = c("darkgray", "green", "blue"))
# GDP growth (real), real estate and corp income tax (nominal)
df_RevGSP %>%
select(state_abb, year,
gdp_g,
tax_property_nom_SL,
tax_corpIncome_nom_SL) %>%
filter(state_abb == "US", year %in% 1977:2015) %>%
mutate_at(vars(-state_abb, -year, -gdp_g), funs(100 * log(./lag(.)))) %>%
gather(var, value, -state_abb, -year) %>%
ggplot(aes(x = year, y = value, color = var)) + theme_bw() +
geom_line() +
geom_hline(yintercept = 0, linetype = 2) +
scale_color_manual(values = c("darkgray", "green", "blue"))
# Corp income tax are volatile, but its share is small
# The correlation between property tax and gdp is weak.
df_RevGSP %>%
select(state_abb, year,
gdp_g,
rev_gen_ownSrc_nom_state,
tax_tot_nom_state) %>%
mutate(ownSrc_nontax = rev_gen_ownSrc_nom_state - tax_tot_nom_state) %>%
filter(state_abb == "US", year %in% 1977:2015) %>%
mutate_at(vars(-state_abb, -year, -gdp_g), funs(100 * log(./lag(.)))) %>%
gather(var, value, -state_abb, -year) %>%
ggplot(aes(x = year, y = value, color = var)) + theme_bw() +
geom_line() +
geom_hline(yintercept = 0, linetype = 2)