This repository has been archived by the owner on Jan 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathremedia.ado
executable file
·318 lines (238 loc) · 8.8 KB
/
remedia.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
*! Version 1.2.0 Ishmail Azindoo Baako (IPA) Mar 18, 2017
/* This program copies scto media files from the data source folder to a new folder
and renames them using variables specified.
Please note the following for this current version of remedia.
1. Uses values not labels.
2. Can log in up to three layers of subfolders specified with BY.
3. Uses id and enumerator to label the new media file.
* Version 1.1.0 handles issues of missing data from the local storage
* Version 1.2.0 Handle different formats in media files. Eg, Audio Files in 3gpp and m4a in an audio audit var
*/
cap version 13
program define remedia
syntax varname(string) /*media variable*/ ///
[if] [in], /*Condistions*/ ///
[BY(varlist)] /*Variables for sorting media files*/ ///
id(varname) /*ID variable*/ ///
ENUMerator(varname) /*Enumerator Variable*/ ///
from(string) /*Data Source Folder*/ ///
to(string) /*Media Destination Folder*/ ///
[RESOlve(varname)] /*Required if id var has dups*/
qui {
tempvar uid_dup fname mf_valid mf_miss ext
/***********************************************************************
Check syntax, source and destination folders
***********************************************************************/
// Check that dataset has observations
if (_N==0) {
noi di as err "No Observation"
err 2000
}
// Check that media variable contains at least one non missing value
cap assert mi(`varlist')
if !_rc {
noi di as err "`varlist' has all missing values"
err 2000
}
// Preserve original data set
preserve
// Apply [in] and [if] condition
if !mi("`in'") {
keep `in'
}
if !mi("`if'") {
keep `if'
}
// Drop observation without media
drop if mi(`varlist')
// Check that id variable is unique
cap isid `id'
if _rc == 459 {
noi di in red "{p} Warning: The variable `id' does not uniquely identify observations with non missing values for `varlist'{smcl}"
duplicates tag `id', gen (`uid_dup')
if !mi("`resolve'") {
cap isid `id' `resolve'
if !_rc {
noi di "{p} Media files with duplicate `id' will be resolved using the variable `resolve'. {smcl}"
decap `resolve'
}
else {
noi di as err "{p} resolve error: The variables `id' and `resolve' do not uniquely identify observations with non missing values for `varlist'. {smcl}"
exit 459
}
}
// Stop and insist the user specifies option resolve
else {
noi di as err "resolve option required"
exit 198
}
}
else {
gen `uid_dup' = 0
}
// Check that source and destination folders exist
foreach path in "`from'" "`to'" {
cap confirm file "`path'/nul"
if _rc == 601 {
di in red "Folder `path' not found"
exit 601
}
}
// Check that the variables in by are 3 or less
if !mi("`by'") {
local bycount: word count `by'
if `bycount' > 3 {
noi di in red "Too many variables specified with option by, Max is 3"
exit 103
}
forval bynum = 1/`bycount' {
local fvar: word `bynum' of `by'
// Check that the by() vars have no missing values and warn user if otherwise
cap assert !mi(`fvar') if !mi(`varlist')
if _rc {
noi di in red "Warning: `fvar' has missing values. Some Media Files will be logged to wrong folders"
}
decap `fvar'
}
}
noi di in green "This might take a while, please wait ..."
/***********************************************************************
Create Destination subfolders using the varlist in option BY
***********************************************************************/
gen `fname' = ""
// Set the folder destination path as destination for media if by() is not specified
if mi("`by'") {
replace `fname' = "`to'"
}
// Else Create Folder Names from var1 in by()
else {
local fvar1: word 1 of `by'
tostring `fvar1', force replace
levelsof `fvar1', local (fvar1_levels)
foreach level in `fvar1_levels' {
cap confirm file "`to'/`level'/nul"
if _rc == 601 {
mkdir "`to'/`level'"
}
// Replace fname with destination path
replace `fname' = "`to'/`level'" if `fvar1' == "`level'"
// Create subfolders from var 2 in by()
if `bycount' >= 2 {
local fvar2: word 2 of `by'
tostring `fvar2', force replace
levelsof `fvar2' if `fvar1' == "`level'", local (fvar2_levels)
foreach level2 in `fvar2_levels' {
cap confirm file "`to'/`level'/`level2'/nul"
if _rc == 601 {
mkdir "`to'/`level'/`level2'"
}
// Replace fname with destination path
replace `fname' = "`to'/`level'/`level2'" if `fvar2' == "`level2'" & `fvar1' == "`level'"
// Create subfolders from var3 in by()
if `bycount' ==3 {
local fvar3: word 3 of `by'
tostring `fvar3', force replace
levelsof `fvar3' if `fvar2' == "`level2'" & `fvar1' == "`level'", local (fvar3_levels)
foreach level3 in `fvar3_levels' {
cap confirm file "`to'/`level'/`level2'/`level3'/nul"
if _rc == 601 {
mkdir "`to'/`level'/`level2'/`level3'"
}
// Replace fname with destination path
replace `fname' = "`to'/`level'/`level2'/`level3'" if `fvar3' == "`level3'" & ///
`fvar2' == "`level2'" & `fvar1' == "`level'"
}
}
}
}
}
}
/*******************************************************************
Log Media Files into respective folders
*******************************************************************/
gen `mf_valid' = substr(`varlist', 1, 5) == "media"
// remove "media/" from the media name
replace `varlist' = subinstr(`varlist', "media\", "", .)
// generate tempvar ext to hold the extension for each media file
gen `ext' = substr(`varlist', -(strpos(reverse(`varlist'), ".")), .)
local mf_track 0
local uid_track 0
local mf_miss_track 0
gen int `mf_miss' = 0
local N = _N
forval mf = 1/`N' {
local mf_copy = `mf_valid'[`mf']
if `mf_copy' == 1 {
local mf_id = `id'[`mf']
local mf_enum = `enumerator'[`mf']
local mf_sav = `fname'[`mf']
local mf_file = `varlist'[`mf']
local mf_dup = `uid_dup'[`mf']
local mf_ext = `ext'[`mf']
if `mf_dup' == 0 {
cap confirm file "`mf_sav'/`mf_id'_`mf_enum'`mf_ext'"
if _rc == 601 {
cap copy "`from'/`mf_file'" "`mf_sav'/`mf_id'_`mf_enum'`mf_ext'", replace
if _rc == 601 {
replace `mf_miss' = 1 if `id' == "`mf_id'" & `varlist' == "`mf_file'"
local ++mf_miss_track
}
else if !_rc {
local ++mf_track
}
}
}
else {
local uid_key = `resolve'[`mf']
cap confirm file "`mf_sav'/`mf_id'_`mf_enum'_`uid_key'`mf_ext'"
if _rc == 601 {
copy "`from'/`mf_file'" "`mf_sav'/`mf_id'_`mf_enum'_`uid_key'`mf_ext'", replace
local ++uid_track
}
}
}
}
noi di in green "`mf_track' Media Files Logged"
if `uid_track' > 0 {
noi di "`uid_track' Media Files Duplicate on `id'. Differences resolved using variable `resolve'"
}
tempfile mf_temp
save `mf_temp', replace
drop if `mf_valid'
if _N > 0 {
noi di as result _N as text in red " missing media file(s) from SurveyCTO server"
noi di "id" _column(20) "enumerator"
local N = _N
forval mf = 1/`N' {
local mf_id = `id'[`mf']
local mf_enum = `enumerator'[`mf']
noi di "`mf_id'" _column(20) "`mf_enum'"
}
}
use `mf_temp', clear
drop if !`mf_miss' | mi(`mf_miss')
if _N > 0 {
noi di as result _N as text in red " missing media files from directory (`from')"
noi di "id" _column(20) "enumerator"
local N = _N
forval mf = 1/`N' {
local mf_id = `id'[`mf']
local mf_enum = `enumerator'[`mf']
noi di "`mf_id'" _column(20) "`mf_enum'"
}
}
restore
}
end
program decap
qui {
// Replace char ":", "/", "\" with "_" if resolve variable is string
args tmp_dv
cap confirm string var `tmp_dv'
if !_rc {
replace `tmp_dv' = subinstr(`tmp_dv', "/", "_", .)
replace `tmp_dv' = subinstr(`tmp_dv', "\", "_", .)
replace `tmp_dv' = subinstr(`tmp_dv', ":", "_", .)
}
}
end