-
Notifications
You must be signed in to change notification settings - Fork 0
/
NC_analysis.do
131 lines (101 loc) · 3.93 KB
/
NC_analysis.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
cd "/Users/nabarun/Documents/GitHub/covidnc/data"
// Process Google app check-in data
clear
import delimited "/Users/nabarun/Documents/GitHub/covidnc/data/export-2020-04-05.csv"
drop v1
* Convert proportions to percents
ds, has(type numeric)
foreach var of varlist `r(varlist)' {
replace `var'=`var'*100
}
* Format date and retain latest data
rename subunit_name county
order county, a(report_date)
replace report_date=substr(report_date,1,10)
gen googledate=date(report_date, "YMD")
format googledate %td
order googledate, first
drop report_date
su googledate
local latest: disp %td r(max)
di "Keeping only records in Google mobility scrape from `latest'"
keep if googledate==r(max)
keep if unit_name=="North Carolina"
drop unit*
save ncgoogle_mobility, replace
// Import cell tower mobility data
clear
import delimited "https://raw.githubusercontent.com/descarteslabs/DL-COVID-19/master/DL-us-mobility-daterow.csv"
* Drop state aggregates
drop if admin2==""
* Limit to NC
keep if admin1=="North Carolina"
drop admin1 admin_level country_code
rename admin2 county
* Date format
rename date dateoriginal
gen date=date(dateoriginal, "YMD")
format date %td
order date, first
drop dateoriginal
save nc_mobility, replace
* Note data start and end dates for graphs
su date
local latest: disp %td r(max)
di "`latest'"
local earliest: disp %td r(min)
di "`'earliest'"
* Basic graphs
la var m50 "Median daily distance traveled (km)"
la var m50_index "Median % reduction in mobility since March 7, 2020"
la var date "Dates: `earliest' to `latest'"
* line m50_index date, by(county) note("Baseline: Feb 17 to March 7, 2020")
* graph export "/Users/nabarun/Documents/GitHub/covidnc/docs/nc_mobility_km.png", as(png) name("Graph") replace
* line m50 date, by(county)
* graph export "/Users/nabarun/Documents/GitHub/covidnc/docs/nc_mobility_change.png", as(png) name("Graph") replace
* Short county name
gen shortcounty=county
replace shortcounty=regexr(shortcounty, " County", "")
* twoway lowess m50_index date, by(county)
* Create last 3 day moving average of last 3 weekdays
bysort county (date): egen weekdays=seq() if dow(date)!=0 & dow(date)!=6
by county: egen lastweekday=max(weekdays)
by county: egen last3_m50=mean(m50) if weekdays >= lastweekday-2 & weekdays!=.
by county: egen last3_sample=total(samples) if weekdays >= lastweekday-2 & weekdays!=.
by county: egen last3_pctchange=mean(m50_index) if weekdays >= lastweekday-2 & weekdays!=.
collapse (max) last3_m50 last3_pctchange last3_sample date (sum) samples, by(county fips)
la var last3_m50 "Median km traveled (last 3 weekdays)"
la var last3_sample "Number of cell trace samples during last 3 weekdays"
la var last3_pctchange "% change in median mobility since baseline"
note last3_pctchange: Baseline 17Feb to 07Mar; % change since then until last 3 weekdays
* Merge in Google data
merge 1:1 county using ncgoogle_mobility, keep(1 3) nogen
* Sample size
qui: su last3_sample
local sample3=r(sum)
di "Total samples in last 3 days: `sample3'"
di "Samples per day:"
di `sample3'/3
* Merge in RUCC data
* merge 1:1 fips using rucc, keep(1) nogen
/*
** IN PROGRESS
program define quintile
xtile temp = trend, nq(5)
gen iso5=.
replace iso5=1 if temp==5
replace iso5=2 if temp==4
replace iso5=3 if temp==3
replace iso5=4 if temp==2
replace iso5=5 if temp==1
order iso5, a(trend)
la var iso5 "Social Distancing: Lowest (1) to Highest (5)"
drop temp
end
)*/
// Save file for export and codebook
export delimited using "/Users/nabarun/Documents/GitHub/covidnc/data/nc_cell_tower_data_collapsed.csv", replace
note: Dataset for COVID-19 mobility analysis. Dataset created by N. Dasgupta.
log using "Codebook.txt", text replace
codebook, h n
log close