-
Notifications
You must be signed in to change notification settings - Fork 62
/
Do to SMCL.do
563 lines (459 loc) · 14.9 KB
/
Do to SMCL.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
/*
Purpose: Convert a pseudo-SMCL file to a SMCL file and do-file. Pseudo-SMCL is like
SMCL, with the following differences:
1. Pseudo-SMCL looks more like a do-file than a SMCL file, although it may
contain SMCL directives. Commands are converted to clickable {stata}
directives and comments are converted to text. Paragraph directives are
rare in pseudo-SMCL: the conversion handles most of that by converting
whitespace in the pseudo-SMCL file.
- Loops and other blocks are converted to text, but there's a clickable
"Click here to execute" below them that executes them. The blocks' code is
stored in a do-file associated with the resulting SMCL file.
2. There are more directives. They are specified through `subinstr'.
*/
vers 10
/*
c postk
loc module Concepts/Saved results
loc infile Pseudo-SMCL/`module'.do
loc smclfile SMCL/`module'.smcl
loc dofile Do/`module'.do
loc replace 1
*/
********************************************************************************
********************************PARSE PARAMETERS********************************
********************************************************************************
if "`infile'`smclfile'`dofile'`trim'`viewsmcl'`viewdo'`replace'`preserve'" == "" & !`:length loc subinstr' {
mata: st_local("0", ", " + st_local("0"))
syntax, infile(str) smclfile(str) dofile(str) [trim(str) subinstr(str asis) viewsmcl viewdo replace preserve]
foreach option in viewsmcl viewdo replace preserve {
loc `option' = "``option''" != ""
}
}
else {
foreach option in viewsmcl viewdo replace preserve {
if "``option''" == "" ///
loc `option' 0
}
}
conf f "`infile'"
assert regexm("`smclfile'", "\.smcl$")
assert regexm("`dofile'", "\.do$")
assert !inlist("`infile'", "`smclfile'", "`dofile'")
foreach option in viewsmcl viewdo replace preserve {
assert inlist(``option'', 0, 1)
}
if !`replace' {
conf new f "`smclfile'"
conf new f "`dofile'"
}
loc ok commands comments
loc trimcmds 0
loc trimcomments 0
foreach el of loc trim {
if !`:list el in ok' {
di as err "invalid option trim()"
ex 198
}
if "`el'" == "commands" ///
loc trimcmds 1
else if "`el'" == "comments" ///
loc trimcomments 1
}
* Pilfered from -cfout2-
* -subinstr()-
glo subinstr "`subinstr'"
loc syntaxerr 0
loc len : length loc subinstr
while `len' {
* "dq" for "double quotes"
gettoken from subinstr : subinstr, p("=") qed(dq)
if !`dq' ///
loc syntaxerr 1
* "cq" for "compound quotes"
loc temp : subinstr loc from `"""' "", count(loc cq)
loc subfromcq `subfromcq' `cq'
if `cq' ///
loc subfrom "`subfrom' `"`from'"'"
else ///
loc subfrom "`subfrom' "`from'""
* "thisao" for "this assignment operator"
gettoken thisao subinstr : subinstr, p("=")
loc ao "="
if !`:list thisao == ao' ///
loc syntaxerr 1
gettoken to subinstr : subinstr, p("\") qed(dq)
if !`dq' ///
loc syntaxerr 1
loc temp : subinstr loc to `"""' "", count(loc cq)
loc subtocq `subtocq' `cq'
if `cq' ///
loc subto "`subto' `"`to'"'"
else ///
loc subto "`subto' "`to'""
gettoken thisbs subinstr : subinstr, p("\")
loc bs \
if `:length loc thisbs' & !`:list thisbs == bs' ///
loc syntaxerr 1
if `syntaxerr' ///
continue, break
else ///
loc len : length loc subinstr
}
if `syntaxerr' {
di as err "option subinstr() invalid"
ex 198
}
glo subfrom "`subfrom'"
glo subto "`subto'"
********************************************************************************
**********************************IMPORT FILE***********************************
* Variables created: line, ws, indent
********************************************************************************
if `preserve' preserve
tempname fh
file open `fh' using "`infile'", r
clear
gen line = ""
loc first 1
while `first' | !r(eof) {
file r `fh' line
set obs `=_N + 1'
replace line = `"`macval(line)'"' in L
loc first 0
}
file close `fh'
/*
* -subinstr()-
forv i = 1/`:list sizeof subfrom' {
foreach str in from to {
loc `str' : word `i' of `sub`str''
loc `str'cq : word `i' of `sub`str'cq'
if ``str'cq' ///
loc `str' "`"``str''"'"
else ///
loc `str' ""``str''""
}
qui replace line = subinstr(line, `from', `to', .)
}*/
* -subinstr()-
assert "`:type line'" != "str`c(maxstrvarlen)'"
recast str244 line
mata:
X = st_sdata(., "line")
subfrom = tokens(st_local("subfrom"))
subto = tokens(st_local("subto"))
for (i = 1; i <= cols(subfrom); i++) {
X = subinstr(X, subfrom[i], subto[i], .)
}
end
mata:
// "pstr" for "parse string"
pstr = "{O} "
if (!any(strpos(X, pstr)))
st_sstore(., "line", X)
else {
split = J(0, 1, "")
sort = J(0, 2, .)
n = 1::rows(X)
X = X :+ pstr
pstrlen = strlen(pstr)
max = max((strlen(X) - strlen(subinstr(X, pstr, "", .))) / pstrlen)
for(i = 1; i <= max; i++) {
// Extract the next piece from X.
pos = strpos(X, pstr)
piece = select(substr(X, 1, pos :- 1), pos)
// Add the piece to split.
split = split \ piece
// Remove the piece from X.
X = substr(X, pos :+ pstrlen, .)
// Sort order of piece
rows = rows(piece)
sort = sort \ select(n, pos), J(rows(piece), 1, i)
}
_collate(split, order(sort, 1..2))
st_addobs(rows(split) - st_nobs())
st_sstore(., "line", split)
}
end
assert line != "XXX"
cou if strpos(strlower(line), "other data checks")
loc n1 = r(N)
cou if strpos(strlower(line), "problem set") & _n < 20
loc n2 = r(N)
if `n1' & `n2' & !regexm(strlower("`infile'"), "alternative|working") ///
di //stop
compress line
assert "`:type line'" != "str`c(maxstrvarlen)'"
gen isstata = cond(line == "STATACMD", 1, cond(line == "MATACMD", 0, .))
replace isstata = isstata[_n - 1] if mi(isstata)
replace isstata = 1 if mi(isstata)
drop if inlist(line, "STATACMD", "MATACMD")
* Replace tabs as spaces.
replace line = subinstr(line, char(9), " ", .)
* Save leading whitespace in variable ws.
gen ws = regexs(0) if regexm(line, "^ *")
* Remove leading and trailing whitespace.
replace line = trim(line)
cap pr drop indent
pr indent
syntax varlist(min=3 max=3 num), Generate(str)
foreach var of loc varlist {
qui cou if `var' != floor(`var') | `var' < 0 | missing(`var')
if r(N) {
di as err "varlist: `var': " ///
"nonmissing, nonnegative integer variable expected"
ex 198
}
}
conf new var `generate'
gettoken p1var varlist : varlist
gettoken p2var varlist : varlist
gettoken p3var varlist : varlist
gen `generate' = "{p " + strofreal(`p1var') + " " + ///
strofreal(`p2var') + " " + strofreal(`p3var') + "}"
#d ;
loc p
{pstd} 4 4 2
{psee} 4 13 2
{phang} 4 8 2
{pmore} 8 8 2
{phang2} 8 12 2
{pmore2} 12 12 2
{phang3} 12 16 2
{pmore3} 16 16 2
;
#d cr
assert mod(`:list sizeof p', 4) == 0
while `:list sizeof p' {
gettoken direc p : p
forv i = 1/3 {
gettoken p`i' p : p
conf integer n `p`i''
}
qui replace indent = "`direc'" ///
if `p1var' == `p1' & `p2var' == `p2' & `p3var' == `p3'
}
end
* Variable indent contains the SMCL paragraph mode setting.
gen p1 = strlen(ws) + 4
gen p2 = strlen(ws) + 4
gen p3 = 2
indent p1-p3, gen(indent)
********************************************************************************
************************************COMMENTS************************************
* Variables created: comment
********************************************************************************
* Note: // comment indicators aren't accounted for.
* * comment indicator
gen comment = regexm(line, "^\*")
* /* */ comment delimiter
gen incomment = regexm(line, "^/\*")
gen endcomment = regexm(line, "\*/$")
replace incomment = 1 if incomment[_n - 1] == 1 & !endcomment[_n - 1]
replace comment = 2 if incomment
drop incomment endcomment
* Remove comment indicators.
replace line = regexr(line, "^\*", "") if comment == 1
replace line = regexr(regexr(line, "^/\*", ""), "\*/$", "") if comment == 2
replace line = trim(line)
* -trim()-
if `trimcomments' ///
replace line = itrim(line) if comment
else {
count if strpos(line, " ") & comment
while r(N) {
gen spaces = regexs(0) if regexm(line, " +") & comment
replace line = subinstr(line, spaces, "{space " + strofreal(strlen(spaces)) + "}", 1) if spaces != "" & comment
drop spaces
count if strpos(line, " ") & comment
}
}
* Indent comments.
gen toindent1 = comment & !regexm(line, "^[`=char(13)'`=char(10)']*$") & ///
!regexm(line, "^({\.\.\.})?({marker [^}]+})?({\.\.\.})?$") & substr(line, 1, 3) != "{* "
gen toindent2 = !regexm(line, "^[`=char(13)'`=char(10)']*{(hline|p|pstd|pmore[23]?|(p|p2colset) [0-9 ]+)}") & ///
(!toindent1[_n - 1] | indent != "{pstd}")
//(!comment[_n - 1] | regexm(line[_n - 1], "^[`=char(13)'`=char(10)']*$") | indent != "{pstd}")
gen toindent = toindent1 & toindent2
replace line = indent + line if toindent
replace line = "{p_end}" + line + "{p_end}{pstd}" if toindent & toindent1[_n - 1] & indent != "{pstd}"
//cou if strpos(line, "XXXXX")
//if r(N) stop
********************************************************************************
************************************COMMANDS************************************
* Variables created: cmd
* Note: Not dropping other variables for debugging purposes. They won't appear
* after this section.
********************************************************************************
gen cmd = !comment & line != ""
replace p2 = p2 + 4 if cmd
drop indent
indent p1-p3, gen(indent)
* -trim()-
if `trimcmds' replace line = itrim(line) if cmd
gen startblock = regexm(line, "{$") & cmd
gen endblock = -(regexm(line, "(^|[^({BLOCK)])}") & cmd)
gen inblock = sum(startblock + endblock)
* Now all variables are 0 or 1, and embedded loops have startblock != 0 and
* endblock != 0.
replace startblock = 0 if inblock > 1
replace endblock = 0 if inblock
replace endblock = -endblock
* Final close braces
replace inblock = 1 if !inblock & endblock
replace inblock = 1 if inblock
gen linenum = _n
cap pr drop extendends
pr extendends
syntax, start(varname) in(varname) end(varname)
gsort -linenum
tempvar newstart
gen `newstart' = 0
replace `newstart' = 1 if (`start'[_n - 1] == 1 | `newstart'[_n - 1] == 1) & line != ""
replace `start' = 0 if `newstart'[_n + 1] == 1
replace `in' = 1 if `newstart'
replace `newstart' = 0 if `newstart'[_n + 1] == 1
sort linenum
replace `start' = 1 if `newstart'
tempvar newend
gen `newend' = 0
replace `newend' = 1 if (`end'[_n - 1] == 1 | `newend'[_n - 1] == 1) & line != ""
replace `end' = 0 if `newend'[_n + 1] == 1
replace `in' = 1 if `newend'
replace `newend' = 0 if `newend'[_n + 1] == 1
replace `end' = 1 if `newend'
end
extendends, start(startblock) in(inblock) end(endblock)
gen startdelim = regexm(line, "^#(d|de|del|deli|delim|delimi|delimit) +;$") & cmd
gen delim = startdelim
gen enddelim = regexm(line, "^#(d|de|del|deli|delim|delimi|delimit) +cr;?$") & cmd
replace delim = 1 if delim[_n - 1] == 1 & !enddelim[_n - 1]
extendends, start(startdelim) in(delim) end(enddelim)
assert !(inblock & ((startdelim & !startblock) | (enddelim & !endblock)))
replace startblock = 1 if startdelim
replace inblock = 1 if delim
replace endblock = 1 if enddelim
gen join = regexm(line, "///$") & cmd
gen startjoin = join & !join[_n - 1]
gen endjoin = !join & join[_n - 1] == 1
replace join = 1 if endjoin
extendends, start(startjoin) in(join) end(endjoin)
assert !(inblock & ((startjoin & !startblock) | (endjoin & !endblock)))
replace startblock = 1 if startjoin
replace inblock = 1 if join
replace endblock = 1 if endjoin
* "direc" for "{BLOCK} directive"
gen direc = regexm(line, "^{BLOCK}") & cmd
gen startdirec = direc & !direc[_n - 1]
gen enddirec = direc & !direc[_n + 1]
assert !(inblock & ((startdirec & !startblock) | (enddirec & !endblock)))
replace startblock = 1 if startdirec
replace inblock = 1 if direc
replace endblock = 1 if enddirec
replace line = regexr(line, "^{BLOCK}", "") if cmd
gen blocknum = sum(startblock)
count if startblock
if r(N) {
* Write do-file.
if "`:type line'" == "str`c(maxstrvarlen)'" {
di as err "line too long"
ex 198
}
file open `fh' using "`dofile'", w replace
#d ;
file w `fh'
"args example" _n(2)
`"loc trace = c(trace) == "on""' _n
"if " _char(96) "trace' set trace off" _n(2)
"local mata mata:" _n(2)
;
#d cr
forv i = 1/`=_N' {
if inblock[`i'] {
if blocknum[`i'] != blocknum[`i' - 1] {
* Open -if- block.
file w `fh' (cond(blocknum[`i'] > 1, "else ", "")) "if " ///
_char(96) "example' == " (blocknum[`i']) " {" _n
* -set trace on-
file w `fh' _tab "if " _char(96) "trace' set trace on" _n
* Begin -noisily-.
file w `fh' _tab "noi "
loc isstata = isstata[`i']
if `isstata' ///
file w `fh' "{"
else ///
file w `fh' _char(96) "mata'"
file w `fh' _n(2)
}
loc line = line[`i']
file w `fh' `"`=ws[`i']'`macval(line)'"' _n
if endblock[`i'] {
* End -noisily-.
file w `fh' _n _tab (cond(`isstata', "}", "end")) _n
* -set trace off-
file w `fh' _tab "if " _char(96) "trace' set trace off" _n
* Close -if- block.
file w `fh' "}" _n
}
}
}
file close `fh'
}
* Bold blocks.
replace line = "{cmd}" + line + "{txt}" if inblock & line != ""
* -trim()-
gen spaceline = line if cmd
if !`trimcmds' {
count if strpos(spaceline, " ")
while r(N) {
gen spaces = regexs(0) if regexm(spaceline, " +") & cmd
replace spaceline = subinstr(spaceline, spaces, "{space " + strofreal(strlen(spaces)) + "}", 1) ///
if spaces != "" & cmd
drop spaces
count if strpos(spaceline, " ")
}
}
* Enclose commands outside blocks in {stata} directives,
* adding indent and internal spaces.
gen nointernal = line == spaceline
replace line = indent + "{bf:{" + cond(isstata, "stata", "matacmd") + ///
" `" + `"""' + line + `"""' + "'}}{p_end}" ///
if cmd & !inblock & nointernal
replace line = indent + "{bf:{" + cond(isstata, "stata", "matacmd") + ///
" `" + `"""' + line + `"""' + "':" + spaceline + "}}{p_end}" ///
if cmd & !inblock & !nointernal
drop nointernal
* Add indent and internal spaces to comments to blocks.
replace line = indent + spaceline + "{p_end}" if inblock & line != ""
drop spaceline
replace line = line + "{p_end}" if comment & (toindent | toindent1[_n - 1]) & cmd[_n + 1] == 1
* Add "Click here to execute" after blocks.
expand 4 if endblock, gen(copy)
replace line = "" if copy
* So "Click here to execute" is not written multiple times
replace endblock = 0 if copy
* So "Click here to execute" is not indented
replace indent = "{pstd}" if copy
sort linenum copy
replace line = indent + "{stata `" + `""run "`dofile'" "' + strofreal(blocknum) + `"""' + "':Click here to execute.}" ///
if endblock[_n - 2] == 1
drop if copy & line == "" & line[_n + 1] == ""
drop copy
********************************************************************************
***********************************FINISH UP************************************
********************************************************************************
* Write .smcl file.
if "`:type line'" == "str`c(maxstrvarlen)'" {
di as err "line too long"
ex 198
}
file open `fh' using "`smclfile'", w replace
file w `fh' "{smcl}" _n "{txt}{...}" _n
forv i = 1/`=_N' {
loc line = line[`i']
file w `fh' `"`macval(line)'"' `=cond(`i' < _N, "_n", "")'
}
file close `fh'
if `viewsmcl' view "`smclfile'"
if `viewdo' doedit "`dofile'"