-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathIPEDS12MoEnrl02to18.do
195 lines (163 loc) · 7.98 KB
/
IPEDS12MoEnrl02to18.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
set more off
clear all
cls
// Use this code to download, build, and save to the local computer
// data from the DIRECTORY INFORMATION survey at the US DOE's
// Integrated Postsecondary Education Data Stystem.
// Dec 2019: Adam Ross Nelson - Updated to include 2018 datafiles.
// May/2019: Adam Ross Nelson - Updated to include 2017 datafiles.
// Jan/2018: Naiya Patel - Edited/cleaned up code using rename function.
// Oct/2017: Adam Ross Nelson - Updated to include 2016 datafiles.
// Oct/2017: Adam Ross Nelson - Updated to use sshnd file picker.
// Oct/2017: Adam Ross Nelson - GitHub ReBuild.
// Apr/2017: Adam Ross Nelson - Initial build.
/*#############################################################################
Maintained/more information at:
https://github.com/adamrossnelson/StataIPEDSAll
##############################################################################*/
// Utilizes preckage version of sshnd (interactive file picker)/
// Stable 1.0 version of sshnd documentation available at:
// https://github.com/adamrossnelson/sshnd/tree/1.0
do https://raw.githubusercontent.com/adamrossnelson/sshnd/master/sshnd.do
capture log close // Close stray log files.
log using "$loggbl", append // Append sshnd established log file.
local sp char(13) char(10) char(13) char(10) // Define spacer.
version 13 // Enforce version compatibility.
di c(pwd) // Confrim working directory.
forvalues yindex = 2002 / 2018 {
// Stata 13 introduced support for copy to work with https.
// Use command -update all- if Stata 13 and copy returns an error.
copy https://nces.ed.gov/ipeds/datacenter/data/EFFY`yindex'_Data_Stata.zip .
unzipfile EFFY`yindex'_Data_Stata.zip, replace
copy https://nces.ed.gov/ipeds/datacenter/data/EFFY`yindex'_Stata.zip .
unzipfile EFFY`yindex'_Stata.zip, replace
// The NCES provided -do- files hae some lines that need to be removed
// before we can call them from the master -do- file.
scalar fcontents = fileread("effy`yindex'.do")
scalar fcontents = subinstr(fcontents, "insheet", "// insheet", 1)
scalar fcontents = subinstr(fcontents, "save", "// save", .)
scalar byteswritten = filewrite("effy`yindex'.do", fcontents, 1)
// Naming conventions changed. Manage evolving name conventions.
// 2002 to 2006 there are no revised survey data files.
// Most recent year often does not have revised survey file.
if inlist(`yindex', 2002, 2003, 2004, 2005, 2006, 2018) {
import delimited effy`yindex'_data_stata.csv, clear
}
else {
import delimited effy`yindex'_rv_data_stata.csv, clear
}
// Run the NCES provided do files.
di "QUIET RUN OF effy`yindex'.do" // Provide ser information for log file.
qui do effy`yindex'.do // Quiety run NCES provided do files.
// Drop the imputation fields and lstudy which is effylev redundant.
drop x* // Remove imputation flags, reduce filesize.
drop lstudy // lstudy not consistent. Remove to simplify.
// Make adjustments for changes in variable names.
if `yindex' < 2008 {
rename fyrace01 efynralm // Nonresident alien men
rename fyrace02 efynralw // Nonresident alien women
rename fyrace13 efyunknm // Race/ethnicity unknown men
rename fyrace14 efyunknw // Race/ethnicity unknown women
rename fyrace15 efytotlm // Grand total men
rename fyrace16 efytotlw // Grand total women
rename fyrace17 efynralt // Nonresident alien total
rename fyrace23 efyunknt // Race/ethnicity unknown total
rename fyrace24 efytotlt // Grand total
}
// Save a copy of the effy file with all three levels for later reference.
saveold "effy`yindex'_data_stata.dta", version(13) replace
di `sp' // Spacer for the output.
/*######################################################################
This block of code reshapes the data. reshape (command)
might be a better approach. Advantage of not using reshape
is opportunity to modify data lables for better
identification of variables with -desc-, -codebook-.
#######################################################################*/
// Proceed to prepare the ALL Students Level set of effy variables.
di " Keeping only effylev == 1 to prepare the ALL version of this file"
keep if effylev == 1
rename * *all
rename unitidall unitid
drop effylev*
// Foreach loop adds "ALL" previx to all of the variable lables.
// Which will make identifying the level in wide format more intuitive.
foreach varname of varlist _all {
local templab : var label `varname'
label variable `varname' "ALL `templab'"
}
// Restores the unitid variable lable to default.
label variable unitid "Unique identification number of the institution"
saveold "effy`yindex'_data_stata.all.dta", version(13) replace
// Proceed to prepare the Undergraduate Level set of effy variables.
di " Keeping only effylev == 2 to prepare the UGR version of this file"
use effy`yindex'_data_stata.dta, clear
keep if effylev == 2
rename * *ugr
rename unitidugr unitid
drop effylev*
// Foreach loop adds "UGR" previx to all of the variable lables.
// Which will make identifying the level in wide format more intuitive.
foreach varname of varlist _all {
local templab : var label `varname'
label variable `varname' "UGR `templab'"
}
// Restores the unitid variable lable to default.
label variable unitid "Unique identification number of the institution"
saveold "effy`yindex'_data_stata.ugr.dta", version(13) replace
// Proceed to prepare the Graduate Level set of effy variables.
di " Keeping only effylev == 3 to prepare the GRA version of this file"
use effy`yindex'_data_stata.dta, clear
keep if effylev == 4
rename * *gra
rename unitidgra unitid
drop effylev*
// Foreach loop adds "GRA" prefix to all of the variable lables.
// Which will make identifying the level in wide format more intuitive.
foreach varname of varlist _all {
local templab : var label `varname'
label variable `varname' "GRA `templab'"
}
// Restores the unitid variable lable to default.
label variable unitid "Unique identification number of the institution"
saveold "effy`yindex'_data_stata.gra.dta", version(13) replace
di `sp' // Spacer for the output.
/*######################################################################
Note: End of code block used to reshape.
#######################################################################*/
// Rebuild the effy data in wide format.
di " Rebuilding the WIDE version of this file"
use effy`yindex'_data_stata.all.dta, clear
merge 1:1 unitid using "effy`yindex'_data_stata.ugr.dta", nogenerate
merge 1:1 unitid using "effy`yindex'_data_stata.gra.dta", nogenerate
gen int isYr = `yindex' // Add the isYr index for later merge.
order isYr, after(unitid) // Order isYr after unitid, easier browsing.
saveold "effy`yindex'_wide_data_stata.dta", version(13) replace
di `sp'
}
// Build the multi-year panel data set.
use effy2018_wide_data_stata.dta, clear // Open most recent file
forvalues fname = 2017(-1)2002 { // as the base. Then,
append using effy`fname'_wide_data_stata.dta, force // assemble the panel set.
}
// Correct duplicate value labels (which will work with Stata). But causes
// error when using data in other settings (e.g. Python Pandas).
// Documentation: https://stackoverflow.com/a/46038793/9572143
qui labelbook, length(12)
qui return list, all
qui numlabel `r(nuniq)', add
// Move up file directory level, compress, add notes.
// Save resulting panel data set.
cd ..
compress
label data "PanelBuildInfo: https://github.com/adamrossnelson/StataIPEDSAll/tree/master"
notes _dta: "PanelBuildInfo: https://github.com/adamrossnelson/StataIPEDSAll/tree/master"
notes _dta: "Panel built on `c(current_date)'"
saveold "$dtagbl", replace version(13)
qui {
noi di "#####################################################################"
noi di ""
noi di " Saved $dtagbl"
noi di ""
noi di "######################################################################"
}
log close