diff --git a/src/ado_files/iebaltab.ado b/src/ado_files/iebaltab.ado index b752efdf..b10f5ba8 100644 --- a/src/ado_files/iebaltab.ado +++ b/src/ado_files/iebaltab.ado @@ -1,8 +1,8 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org - +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + capture program drop iebaltab program iebaltab - + syntax varlist(numeric) [if] [in], /// /// /*Group variable*/ /// @@ -31,7 +31,7 @@ BALMISS(string) /// BALMISSReg(string) /// COVMISS(string) /// - COVMISSReg(string) /// + COVMISSReg(string) /// MISSMINmean(numlist min=1 max=1 >0) /// weight(string) /// /// @@ -41,8 +41,11 @@ FNOOBS /// /// /*Output display*/ /// + NOTtest /// + NORMDiff /// PTtest /// PFtest /// + FEQTest /// PBoth /// STDev /// STARlevels(numlist descending min=3 max=3 >0 <1) /// @@ -65,245 +68,254 @@ SAVEBRowse /// REPLACE /// ] - - - ***************TODO*************** - * 1. Add number of clusters - + + ********POTENTIAL UPDATES********* *1. Implement option for bootstrap - + ********HELPFILE TODO********* *1. Explain difference in se between group by itself and the standard errors used in the t-test - - preserve + + preserve qui { - + *Set minimum version for this command version 11 - + *Remove observations excluded by if and in if ("`if'`in'"!="") { keep `if' `in' - } - + } + if 1 { /*********************************************** ************************************************ - + Set initial constants - + ************************************************* ************************************************/ - + *Create local for balance vars with more descriptive name local balancevars `varlist' - + ** Column Options - + *Is option control() used: - if "`control'" == "" local CONTROL_USED = 0 - if "`control'" != "" local CONTROL_USED = 1 - + if "`control'" == "" local CONTROL_USED = 0 + if "`control'" != "" local CONTROL_USED = 1 + *Is option order() used: - if "`order'" == "" local ORDER_USED = 0 - if "`order'" != "" local ORDER_USED = 1 - + if "`order'" == "" local ORDER_USED = 0 + if "`order'" != "" local ORDER_USED = 1 + *Is option grpcodes used: - if "`grpcodes'" == "" local NOGRPLABEL_USED = 0 - if "`grpcodes'" != "" local NOGRPLABEL_USED = 1 - + if "`grpcodes'" == "" local NOGRPLABEL_USED = 0 + if "`grpcodes'" != "" local NOGRPLABEL_USED = 1 + *Is option nolabel used: - if "`grplabels'" == "" local GRPLABEL_USED = 0 - if "`grplabels'" != "" local GRPLABEL_USED = 1 - + if "`grplabels'" == "" local GRPLABEL_USED = 0 + if "`grplabels'" != "" local GRPLABEL_USED = 1 + *Is option total() used: - if "`total'" == "" local TOTAL_USED = 0 - if "`total'" != "" local TOTAL_USED = 1 - + if "`total'" == "" local TOTAL_USED = 0 + if "`total'" != "" local TOTAL_USED = 1 + *Is option totallable() used: - if "`totallabel'" == "" local TOTALLABEL_USED = 0 - if "`totallabel'" != "" local TOTALLABEL_USED = 1 - - - ** Row Options - + if "`totallabel'" == "" local TOTALLABEL_USED = 0 + if "`totallabel'" != "" local TOTALLABEL_USED = 1 + + + ** Row Options + *Is option total() used: - if "`rowvarlabels'" == "" local ROWVARLABEL_USED = 0 - if "`rowvarlabels'" != "" local ROWVARLABEL_USED = 1 - + if "`rowvarlabels'" == "" local ROWVARLABEL_USED = 0 + if "`rowvarlabels'" != "" local ROWVARLABEL_USED = 1 + *Is option totallable() used: - if "`rowlabels'" == "" local ROWLABEL_USED = 0 - if "`rowlabels'" != "" local ROWLABEL_USED = 1 - + if "`rowlabels'" == "" local ROWLABEL_USED = 0 + if "`rowlabels'" != "" local ROWLABEL_USED = 1 + *Is option totallable() used: if "`onenrow'" != "" local onerow = "onerow" //Old name still supported for backward compatibility - if "`onerow'" == "" local ONEROW_USED = 0 - if "`onerow'" != "" local ONEROW_USED = 1 - - - + if "`onerow'" == "" local ONEROW_USED = 0 + if "`onerow'" != "" local ONEROW_USED = 1 + + + ** Stats Options - + *Is option ftest used: - if "`ftest'" == "" local FTEST_USED = 0 - if "`ftest'" != "" local FTEST_USED = 1 + if "`ftest'" == "" local FTEST_USED = 0 + if "`ftest'" != "" local FTEST_USED = 1 *Is option fmiss used: - if "`fmissok'" == "" local F_MISS_OK = 0 - if "`fmissok'" != "" local F_MISS_OK = 1 - + if "`fmissok'" == "" local F_MISS_OK = 0 + if "`fmissok'" != "" local F_MISS_OK = 1 + *Is option fnoobs used: - if "`fnoobs'" == "" local F_NO_OBS = 0 - if "`fnoobs'" != "" local F_NO_OBS = 1 - + if "`fnoobs'" == "" local F_NO_OBS = 0 + if "`fnoobs'" != "" local F_NO_OBS = 1 + *Is option fixedeffect() used: - if "`fixedeffect'" == "" local FIX_EFFECT_USED = 0 - if "`fixedeffect'" != "" local FIX_EFFECT_USED = 1 - + if "`fixedeffect'" == "" local FIX_EFFECT_USED = 0 + if "`fixedeffect'" != "" local FIX_EFFECT_USED = 1 + *Is option covariates() used: - if "`covariates'" == "" local COVARIATES_USED = 0 - if "`covariates'" != "" local COVARIATES_USED = 1 - + if "`covariates'" == "" local COVARIATES_USED = 0 + if "`covariates'" != "" local COVARIATES_USED = 1 + *Is option covarmissok used: - if "`covarmissok'" == "" local COVARMISSOK_USED = 0 - if "`covarmissok'" != "" local COVARMISSOK_USED = 1 - + if "`covarmissok'" == "" local COVARMISSOK_USED = 0 + if "`covarmissok'" != "" local COVARMISSOK_USED = 1 + *Is option cluster() used: - if "`vce'" == "" local VCE_USED = 0 - if "`vce'" != "" local VCE_USED = 1 - + if "`vce'" == "" local VCE_USED = 0 + if "`vce'" != "" local VCE_USED = 1 + *Is option balmiss() used: - if "`balmiss'" == "" local BALMISS_USED = 0 - if "`balmiss'" != "" local BALMISS_USED = 1 - + if "`balmiss'" == "" local BALMISS_USED = 0 + if "`balmiss'" != "" local BALMISS_USED = 1 + *Is option missreg() used: - if "`balmissreg'" == "" local BALMISSREG_USED = 0 - if "`balmissreg'" != "" local BALMISSREG_USED = 1 - - *Is option covmiss() used: - if "`covmiss'" == "" local COVMISS_USED = 0 - if "`covmiss'" != "" local COVMISS_USED = 1 - + if "`balmissreg'" == "" local BALMISSREG_USED = 0 + if "`balmissreg'" != "" local BALMISSREG_USED = 1 + + *Is option covmiss() used: + if "`covmiss'" == "" local COVMISS_USED = 0 + if "`covmiss'" != "" local COVMISS_USED = 1 + *Is option covmissreg() used: - if "`covmissreg'" == "" local COVMISSREG_USED = 0 - if "`covmissreg'" != "" local COVMISSREG_USED = 1 - + if "`covmissreg'" == "" local COVMISSREG_USED = 0 + if "`covmissreg'" != "" local COVMISSREG_USED = 1 + *Is option missminmean() used: - if "`missminmean'" == "" local MISSMINMEAN_USED = 0 - if "`missminmean'" != "" local MISSMINMEAN_USED = 1 - + if "`missminmean'" == "" local MISSMINMEAN_USED = 0 + if "`missminmean'" != "" local MISSMINMEAN_USED = 1 + *Is option starlevels() used: - if "`starlevels'" == "" local STARLEVEL_USED = 0 - if "`starlevels'" != "" local STARLEVEL_USED = 1 - + if "`starlevels'" == "" local STARLEVEL_USED = 0 + if "`starlevels'" != "" local STARLEVEL_USED = 1 + *Is option starsnoadd used: - if "`starsnoadd'" == "" local STARSNOADD_USED = 0 - if "`starsnoadd'" != "" local STARSNOADD_USED = 1 - + if "`starsnoadd'" == "" local STARSNOADD_USED = 0 + if "`starsnoadd'" != "" local STARSNOADD_USED = 1 + + *Is option nottest used: + if "`nottest'" == "" local TTEST_USED = 1 + if "`nottest'" != "" local TTEST_USED = 0 + *Is option pttest() used: - if "`pttest'" == "" local PTTEST_USED = 0 - if "`pttest'" != "" local PTTEST_USED = 1 - + if "`pttest'" == "" local PTTEST_USED = 0 + if "`pttest'" != "" local PTTEST_USED = 1 + *Is option pftest() used: - if "`pftest'" == "" local PFTEST_USED = 0 - if "`pftest'" != "" local PFTEST_USED = 1 - + if "`pftest'" == "" local PFTEST_USED = 0 + if "`pftest'" != "" local PFTEST_USED = 1 + *Is option pboth() used: - if "`pboth'" == "" local PBOTH_USED = 0 - if "`pboth'" != "" local PBOTH_USED = 1 + if "`pboth'" == "" local PBOTH_USED = 0 + if "`pboth'" != "" local PBOTH_USED = 1 if `PBOTH_USED' == 1 local PTTEST_USED = 1 if `PBOTH_USED' == 1 local PFTEST_USED = 1 *Is option pftest() used: - if "`stdev'" == "" local STDEV_USED = 0 - if "`stdev'" != "" local STDEV_USED = 1 - + if "`stdev'" == "" local STDEV_USED = 0 + if "`stdev'" != "" local STDEV_USED = 1 + *Is option weight() used: - if "`weight'" == "" local WEIGHT_USED = 0 - if "`weight'" != "" local WEIGHT_USED = 1 - - - ** Output Options - + if "`weight'" == "" local WEIGHT_USED = 0 + if "`weight'" != "" local WEIGHT_USED = 1 + + *Is option feqtest() user: + if "`feqtest'" == "" local FEQTEST_USED = 0 + if "`feqtest'" != "" local FEQTEST_USED = 1 + + *Is option normdiff() used: + if "`normdiff'" == "" local NORMDIFF_USED = 0 + if "`normdiff'" != "" local NORMDIFF_USED = 1 + + + ** Output Options + *Is option format() used: - if "`format'" == "" local FORMAT_USED = 0 - if "`format'" != "" local FORMAT_USED = 1 - + if "`format'" == "" local FORMAT_USED = 0 + if "`format'" != "" local FORMAT_USED = 1 + *Is option save() used: - if "`save'" == "" local SAVE_USED = 0 - if "`save'" != "" local SAVE_USED = 1 - + if "`save'" == "" local SAVE_USED = 0 + if "`save'" != "" local SAVE_USED = 1 + *Is option savetex() used: - if "`savetex'" == "" local SAVE_TEX_USED = 0 - if "`savetex'" != "" local SAVE_TEX_USED = 1 - + if "`savetex'" == "" local SAVE_TEX_USED = 0 + if "`savetex'" != "" local SAVE_TEX_USED = 1 + *Is option texnotewidth() used: if "`texnotewidth'" == "" local NOTEWIDTH_USED = 0 if "`texnotewidth'" != "" local NOTEWIDTH_USED = 1 - + *Is option texnotewidth() used: if "`texcaption'" == "" local CAPTION_USED = 0 if "`texcaption'" != "" local CAPTION_USED = 1 - + *Is option texnotewidth() used: if "`texlabel'" == "" local LABEL_USED = 0 if "`texlabel'" != "" local LABEL_USED = 1 - + *Is option texdocument() used: if "`texdocument'" == "" local TEXDOC_USED = 0 if "`texdocument'" != "" local TEXDOC_USED = 1 - + *Is option texlinespace() used: if "`texvspace'" == "" local TEXVSPACE_USED = 0 if "`texvspace'" != "" local TEXVSPACE_USED = 1 - + *Is option texcolwidth() used: if "`texcolwidth'" == "" local TEXCOLWIDTH_USED = 0 if "`texcolwidth'" != "" local TEXCOLWIDTH_USED = 1 - + *Is option browse() used: - if "`browse'" == "" local BROWSE_USED = 0 - if "`browse'" != "" local BROWSE_USED = 1 + if "`browse'" == "" local BROWSE_USED = 0 + if "`browse'" != "" local BROWSE_USED = 1 *Is option restore() used: - if "`savebrowse'" == "" local SAVE_BROWSE_USED = 0 - if "`savebrowse'" != "" local SAVE_BROWSE_USED = 1 + if "`savebrowse'" == "" local SAVE_BROWSE_USED = 0 + if "`savebrowse'" != "" local SAVE_BROWSE_USED = 1 *Is option restore() used: - if "`replace'" == "" local REPLACE_USED = 0 - if "`replace'" != "" local REPLACE_USED = 1 + if "`replace'" == "" local REPLACE_USED = 0 + if "`replace'" != "" local REPLACE_USED = 1 *Is option tablenote() used: - if "`tblnote'" == "" local NOTE_USED = 0 - if "`tblnote'" != "" local NOTE_USED = 1 - + if "`tblnote'" == "" local NOTE_USED = 0 + if "`tblnote'" != "" local NOTE_USED = 1 + *Is option notecombine() used: - if "`notecombine'" == "" local NOTECOMBINE_USED = 0 - if "`notecombine'" != "" local NOTECOMBINE_USED = 1 - + if "`notecombine'" == "" local NOTECOMBINE_USED = 0 + if "`notecombine'" != "" local NOTECOMBINE_USED = 1 + *Is option notablenote() used: - if "`tblnonote'" == "" local NONOTE_USED = 0 - if "`tblnonote'" != "" local NONOTE_USED = 1 - - + if "`tblnonote'" == "" local NONOTE_USED = 0 + if "`tblnonote'" != "" local NONOTE_USED = 1 + + /*********************************************** ************************************************ - + Prepare a list of group variables - + ************************************************* - ************************************************/ - - cap confirm numeric variable `grpvar' - + ************************************************/ + + cap confirm numeric variable `grpvar' + if _rc != 0 { - + *Test for commands not allowed if grpvar is a string variable - + if `CONTROL_USED' == 1 { di as error "{pstd}The option control() can only be used if variable {it:`grpvar'} is a numeric variable. Use {help encode} to generate a numeric version of variable {it:`grpvar'}. It is best practice to store all categorical variables as labeled numeric variables.{p_end}" error 198 @@ -320,54 +332,54 @@ qui { di as error "{pstd}The option grplabels() can only be used if variable {it:`grpvar'} is a numeric variable. Use {help encode} to generate a numeric version of variable {it:`grpvar'}. It is best practice to store all categorical variables as labeled numeric variables.{p_end}" error 198 } - - *Generate a encoded tempvar version of grpvar - tempvar grpvar_code + + *Generate a encoded tempvar version of grpvar + tempvar grpvar_code encode `grpvar' , gen(`grpvar_code') - + *replace the grpvar local so that it uses the tempvar instead local grpvar `grpvar_code' - + } - + *Remove observations with a missing value in grpvar() drop if `grpvar' >= . - + *Create a local of all codes in group variable levelsof `grpvar', local(GRP_CODE_LEVELS) - + *Saving the name of the value label of the grpvar() local GRPVAR_VALUE_LABEL : value label `grpvar' - + *Counting how many levels there are in groupvar local GRPVAR_NUM_GROUPS : word count `GRP_CODE_LEVELS' - + *Static dummy for grpvar() has no label if "`GRPVAR_VALUE_LABEL'" == "" local GRPVAR_HAS_VALUE_LABEL = 0 if "`GRPVAR_VALUE_LABEL'" != "" local GRPVAR_HAS_VALUE_LABEL = 1 - + *Number of columns for Latex local NUM_COL_GRP_TOT = `GRPVAR_NUM_GROUPS' + `TOTAL_USED' - + /*********************************************** ************************************************/ - + *Testing that options to iebaltab is correctly specified and make initial operations based on these commands - + /************************************************* - ************************************************/ - + ************************************************/ + ** Group Options - - cap confirm numeric variable `grpvar' + + cap confirm numeric variable `grpvar' if _rc != 0 { noi display as error "{phang}The variable listed in grpvar(`grpvar') is not a numeric variable. See {help encode} for options on how to make a categorical string variable into a categorical numeric variable{p_end}" error 108 - } + } else { - ** Testing that groupvar is a categorical variable. Int() rounds to + ** Testing that groupvar is a categorical variable. Int() rounds to * integer, and if any values are non-integers then (int(`grpvar') == `grpvar) is * not true cap assert ( int(`grpvar') == `grpvar' ) @@ -377,156 +389,156 @@ qui { error 109 } } - - + + ** Column Options - + ** If control() or order() is used, then the levels specified in those * options need to exist in the groupvar - + local control_correct : list control in GRP_CODE_LEVELS if `control_correct' == 0 { noi display as error "{phang}The code listed in control(`control') is not used in grpvar(`grpvar'). See tabulation of `grpvar' below:" noi tab `grpvar', nol error 197 } - + local order_correct : list order in GRP_CODE_LEVELS if `order_correct' == 0 { noi display as error "{phang}One or more codes listed in order(`order') are not used in grpvar(`grpvar'). See tabulation of `grpvar' below:" noi tab `grpvar', nol error 197 } - + if `GRPLABEL_USED' == 1 { - + local col_labels_to_tokenize `grplabels' - + while "`col_labels_to_tokenize'" != "" { - + *Parsing code and label pair gettoken codeAndLabel col_labels_to_tokenize : col_labels_to_tokenize, parse("@") - + *Splitting code and label gettoken code label : codeAndLabel - - + + *** Codes - + *Checking that code exist in grpvar and store it local code_correct : list code in GRP_CODE_LEVELS if `code_correct' == 0 { noi display as error "{phang}Code [`code'] listed in grplabels(`grplabels') is not used in grpvar(`grpvar'). See tabulation of `grpvar' below:" noi tab `grpvar', nol error 198 - } - + } + *Storing the code in local to be used later local grpLabelCodes `"`grpLabelCodes' "`code'" "' - - + + *** Labels - + *Removing leadning or trailing spaces local label = trim("`label'") - + *Testing that no label is missing if "`label'" == "" { noi display as error "{phang}For code [`code'] listed in grplabels(`grplabels') you have not specified any label. Labels are requried for all codes listed in grplabels(). See tabulation of `grpvar' below:" noi tab `grpvar', nol - error 198 + error 198 } - + *Storing the label in local to be used later local grpLabelLables `"`grpLabelLables' "`label'" "' - - + + *Parse char is not removed by gettoken local col_labels_to_tokenize = subinstr("`col_labels_to_tokenize'" ,"@","",1) } } - + if `ROWLABEL_USED' { - + *** Test the validity for the rowlabel input - + *Create a local with the rowlabel input to be tokenized local row_labels_to_tokenize `rowlabels' - + while "`row_labels_to_tokenize'" != "" { - + *Parsing name and label pair gettoken nameAndLabel row_labels_to_tokenize : row_labels_to_tokenize, parse("@") - + *Splitting name and label gettoken name label : nameAndLabel - + *** Variable names - + *Checking that the variables used in rowlabels() are included in the table local name_correct : list name in balancevars if `name_correct' == 0 { noi display as error "{phang}Variable [`name'] listed in rowlabels(`rowlabels') is not found among the variables included in the balance table." error 111 - } - + } + *Storing the code in local to be used later local rowLabelNames `"`rowLabelNames' "`name'" "' - - + + *** Variable labels - + *Removing leading or trailing spaces local label = trim("`label'") - + *Testing that no label is missing if "`label'" == "" { noi display as error "{phang}For variable [`name'] listed in rowlabels(`rowlabels') you have not specified any label. Labels are requried for all variables listed in rowlabels(). The variable name itself will be used for any variables omitted from rowlabels(). See also option {help dmtab:rowvarlabels}" noi tab `grpvar', nol - error 198 + error 198 } - + *Storing the label in local to be used later local rowLabelLabels `"`rowLabelLabels' "`label'" "' - + *Parse char is not removed by gettoken local row_labels_to_tokenize = subinstr("`row_labels_to_tokenize'" ,"@","",1) } } - + if `TOTALLABEL_USED' & !`TOTAL_USED' { - + *Error for totallabel() incorrectly applied noi display as error "{phang}Option totallabel() may only be used together with the option total" error 197 - } - - - ** Stats Options + } + + + ** Stats Options local SHOW_NCLUSTER 0 - + if `VCE_USED' == 1 { - + local vce_nocomma = subinstr("`vce'", "," , " ", 1) - + tokenize "`vce_nocomma'" local vce_type `1' - + if "`vce_type'" == "robust" { - + *Robust is allowed and not other tests needed } else if "`vce_type'" == "cluster" { - + *Create a local for displaying number of clusters local SHOW_NCLUSTER 1 - + local cluster_var `2' - + cap confirm variable `cluster_var' - + if _rc { - + *Error for vce(cluster) incorrectly applied noi display as error "{phang}The cluster variable in vce(`vce') does not exist or is invalid for any other reason. See {help vce_option :help vce_option} for more information. " error _rc @@ -534,26 +546,26 @@ qui { } } else if "`vce_type'" == "bootstrap" { - + *bootstrap is allowed and not other tests needed. Error checking is more comlex, add tests here in the future. } else { - + *Error for vce() incorrectly applied noi display as error "{phang}The vce type `vce_type' in vce(`vce') is not allowed. Only robust, cluster and bootstrap is allowed. See {help vce_option :help vce_option} for more information." error 198 - + } } - + if `STARSNOADD_USED' == 0 { - + *Allow user defined p-values for stars or set the default values if `STARLEVEL_USED' == 1 { - + *Tokenize the string with the p-values entered by the user. The value entered are tested in syntax tokenize "`starlevels'" - + *Set user defined levels for 1, 2 and 3 stars local p1star `1' local p2star `2' @@ -565,7 +577,7 @@ qui { local p2star .05 local p3star .01 } - + ** Create locals with the values expressed * as percentages for the note to the table local p1star_percent = `p1star' * 100 @@ -573,12 +585,12 @@ qui { local p3star_percent = `p3star' * 100 } else { - - *Options starsomitt is used. No stars will be displayed. By setting + + *Options starsomitt is used. No stars will be displayed. By setting *these locals to nothing the loop adding stars will not be iterated - local p1star + local p1star local p2star - local p3star + local p3star } *Error for starlevels incorrectly used together with starsnoadd @@ -586,112 +598,133 @@ qui { *Error for starlevels and starsnoadd incorrectly used together noi display as error "{phang}Option starlevels() may not be used in combination with option starsnoadd" error 197 - } - - - + } + + + *Error for miss incorrectly used together with missreg if `BALMISS_USED' & `BALMISSREG_USED' { *Error for balmiss and balmissreg incorrectly used together noi display as error "{phang}Option balmiss() may not be used in combination with option balmissreg()" error 197 - } - + } + if `COVMISS_USED' & `COVMISSREG_USED' { *Error for covmiss and covmissreg incorrectly used together noi display as error "{phang}Option covmiss() may not be used in combination with option covmissreg()" error 197 - } - + } + + if !`TTEST_USED' { + if `PTTEST_USED' { + *Error for nottest and pttest incorrectly used together + noi display as error "{phang}Option pttest may not be used in combination with option nottest" + error 197 + } + if `PBOTH_USED' { + *Error for nottest and pboth incorrectly used together + noi display as error "{phang}Option pboth may not be used in combination with option nottest" + error 197 + } + } + + if `FTEST_USED' & !`TTEST_USED' & !`NORMDIFF_USED' { + *Error for F-test used, but not t-test of normalized difference: + *no columns are created for F-test to be displayed + noi di as error "{phang}Option ftest may not only be used if either t-tests or normalized differences are used. F-test for joing significance of balance variables will not be displayed. In order to display it, either use option normdiff or remove option nottest.{p_end}" + local FTEST_USED = 0 + + } + *Testing input in these for options. See function at the end of this command if `BALMISS_USED' == 1 iereplacestringtest "balmiss" "`balmiss'" if `BALMISSREG_USED' == 1 iereplacestringtest "balmissreg" "`balmissreg'" if `COVMISS_USED' == 1 iereplacestringtest "covmiss" "`covmiss'" if `COVMISSREG_USED' == 1 iereplacestringtest "covmissreg" "`covmissreg'" - - + + if `FIX_EFFECT_USED' == 1 { - + cap assert `fixedeffect' < . if _rc == 9 { - + noi display as error "{phang}The variable in fixedeffect(`fixedeffect') is missing for some observations. This would cause observations to be dropped in the estimation regressions. See tabulation of `fixedeffect' below:{p_end}" noi tab `fixedeffect', m error 109 } - + } - + * test covariate variables if `COVARIATES_USED' == 1 { - + foreach covar of local covariates { - + *Create option string - local replaceoptions - + local replaceoptions + *Sopecify differently based on all missing or only regualr missing if `COVMISS_USED' local replaceoptions `" `replaceoptions' replacetype("`covmiss'") "' if `COVMISSREG_USED' local replaceoptions `" `replaceoptions' replacetype("`covmissreg'") regonly "' - + *Add group variable if the replace type is group mean if "`covmiss'" == "groupmean" local replaceoptions `" `replaceoptions' groupvar(`grpvar') groupcodes("`GRP_CODE_LEVELS'") "' if "`covmissreg'" == "groupmean" local replaceoptions `" `replaceoptions' groupvar(`grpvar') groupcodes("`GRP_CODE_LEVELS'") "' - + *Set the minimum number of observations to allow means to be set from if `MISSMINMEAN_USED' == 1 local replaceoptions `" `replaceoptions' minobsmean(`missminmean') "' if `MISSMINMEAN_USED' == 0 local replaceoptions `" `replaceoptions' minobsmean(10) "' - + *Excute the command. Code is found at the bottom of this ado file - if (`COVMISS_USED' | `COVMISSREG_USED') iereplacemiss `covancevar', `replaceoptions' - + if (`COVMISS_USED' | `COVMISSREG_USED') iereplacemiss `covancevar', `replaceoptions' + if `COVARMISSOK_USED' != 1 { - + cap assert `covar' < . if _rc == 9 { - + noi display as error "{phang}The variable `covar' specified in covariates() has missing values for one or several observations. This would cause observations to be dropped in the estimation regressions. To allow for observations to be dropped see option covarmissok and to make the command treat missing values as zero see option covmiss() and covmissreg(). Click {stata tab `covar' `if' `in', m} to see the missing values.{p_end}" error 109 } } } } - + if `WEIGHT_USED' == 1 { - + * Parsing weight options gettoken weight_type weight_var : weight, parse("=") - + * Parsing keeps the separating character local weight_var : subinstr local weight_var "=" "" - + * Test is weight type specified is valie local weight_options "fweights pweights aweights iweights fw freq weight pw aw iw" - + if `:list weight_type in weight_options' == 0 { - + noi display as error "{phang} The option `weight_type' specified in weight() is not a valid weight option. Weight options are: fweights, fw, freq, weight, pweights, pw, aweights, aw, iweights, and iw. {p_end}" error 198 - + } - + * Test is weight variable specified if valid capture confirm variable `weight_var' - + if _rc { - + noi display as error "{phang} The option `weight_var' specified in weight() is not a variable. {p_end}" error 198 - } + } } - - - + + + ** Output Options - + ** If the format option is specified, then test if there is a valid format specified if `FORMAT_USED' == 1 { - + ** Creating a numeric mock variable that we attempt to apply the format * to. This allows us to piggy back on Stata's internal testing to be * sure that the format specified is at least one of the valid numeric @@ -699,24 +732,24 @@ qui { tempvar formattest gen `formattest' = 1 cap format `formattest' `format' - + if _rc == 120 { - + di as error "{phang}The format specified in format(`format') is not a valid Stata format. See {help format} for a list of valid Stata formats. This command only accept the f, fc, g, gc and e format.{p_end}" error 120 - } + } else if _rc != 0 { - + di as error "{phang}Something unexpected happened related to the option format(`format'). Make sure that the format you specified is a valid format. See {help format} for a list of valid Stata formats. If this problem remains, please report this error to kbjarkefur@worldbank.org.{p_end}" error _rc } else { ** We know here that the format is one of the numeric formats that Stata allows - + local fomrmatAllowed 0 - local charLast = substr("`format'", -1,.) - local char2Last = substr("`format'", -2,.) - + local charLast = substr("`format'", -1,.) + local char2Last = substr("`format'", -2,.) + if "`charLast'" == "f" | "`charLast'" == "e" { local fomrmatAllowed 1 } @@ -724,9 +757,9 @@ qui { if "`char2Last'" == "tg" { *format tg not allowed. all other valid formats ending on g are allowed local fomrmatAllowed 0 - } + } else { - + *Formats that end in g that is not tg can only be g which is allowed. local fomrmatAllowed 1 } @@ -735,11 +768,11 @@ qui { if "`char2Last'" != "gc" & "`char2Last'" != "fc" { *format ends on c but is neither fc nor gc local fomrmatAllowed 0 - } + } else { - + *Formats that end in c that are either fc or gc are allowed. - local fomrmatAllowed 1 + local fomrmatAllowed 1 } } else { @@ -752,369 +785,369 @@ qui { } *If format passed all tests, store it in the local used for display formats local diformat = "`format'" - } + } } else { *Default value if fomramt not specified local diformat = "%9.3f" } - + *Error for tblnonote incorrectly used together with notecombine if `NOTECOMBINE_USED' & `NONOTE_USED' { - + *Error for tblnonote incorrectly used together with notecombine noi display as error "{phang}Option tblnonote may not be used in combination with option notecombine" error 197 } - + if `SAVE_USED' | `SAVE_TEX_USED' { if `SAVE_USED' { - **Find the last . in the file path and assume that - * the file extension is what follows. If a file path has a . then + **Find the last . in the file path and assume that + * the file extension is what follows. If a file path has a . then * the file extension must be explicitly specified by the user. - + *Copy the full file path to the file suffix local local file_suffix = "`save'" - + ** Find index for where the file type suffix start local dot_index = strpos("`file_suffix'",".") - + *If no dot then no file extension if `dot_index' == 0 local file_suffix "" - - **If there is one or many . in the file path than loop over + + **If there is one or many . in the file path than loop over * the file path until we have found the last one. while `dot_index' > 0 { - + *Extract the file index local file_suffix = substr("`file_suffix'", `dot_index' + 1, .) - + *Find index for where the file type suffix start local dot_index = strpos("`file_suffix'",".") } - + *If no file format suffix is specified, use the default .xlsx if "`file_suffix'" == "" { - + local save `"`save'.xlsx"' } - + *If a file format suffix is specified make sure that it is one of the two allowed. else if !("`file_suffix'" == "xls" | "`file_suffix'" == "xlsx") { - + noi display as error "{phang}The file format specified in save(`save') is other than .xls or .xlsx. Only those two formats are allowed. If no format is specified .xlsx is the default. If you have a . in your file path, for example in a folder name, then you must specify the file extension .xls or .xlsx.{p_end}" error 198 } } if `SAVE_TEX_USED' { - - **Find the last . in the file path and assume that - * the file extension is what follows. If a file path has a . then + + **Find the last . in the file path and assume that + * the file extension is what follows. If a file path has a . then * the file extension must be explicitly specified by the user. - + *Copy the full file path to the file suffix local local tex_file_suffix = "`savetex'" - + ** Find index for where the file type suffix start local tex_dot_index = strpos("`tex_file_suffix'",".") - + *If no dot then no file extension if `tex_dot_index' == 0 local tex_file_suffix "" - - **If there is one or many . in the file path than loop over + + **If there is one or many . in the file path than loop over * the file path until we have found the last one. while `tex_dot_index' > 0 { - + *Extract the file index local tex_file_suffix = substr("`tex_file_suffix'", `tex_dot_index' + 1, .) - + *Find index for where the file type suffix start local tex_dot_index = strpos("`tex_file_suffix'",".") } *If no file format suffix is specified, use the default .tex if "`tex_file_suffix'" == "" { - + local savetex `"`savetex'.tex"' } - + *If a file format suffix is specified make sure that it is one of the two allowed. else if !("`tex_file_suffix'" == "tex" | "`tex_file_suffix'" == "txt") { - + noi display as error "{phang}The file format specified in savetex(`savetex') is other than .tex or .txt. Only those two formats are allowed. If no format is specified .tex is the default. If you have a . in your file path, for example in a folder name, then you must specify the file extension .tex or .txt.{p_end}" error 198 } - + if `CAPTION_USED' { - + * Make sure special characters are displayed correctly local texcaption : subinstr local texcaption "%" "\%" , all local texcaption : subinstr local texcaption "_" "\_" , all local texcaption : subinstr local texcaption "&" "\&" , all - + } } - + } else if `SAVE_BROWSE_USED' { - + noi display as error "{phang}Option savepreserve may only be used in combination with option save(){p_end}" error 198 } - + * Check tex options if `SAVE_TEX_USED' { - + * Note width must be positive if `NOTEWIDTH_USED' { - + if `texnotewidth' <= 0 { - + noi display as error `"{phang}The value specified in texnotewidth(`texnotewidth') is non-positive. Only positive numbers are allowed. For more information, {net "from http://en.wikibooks.org/wiki/LaTeX/Lengths.smcl":check LaTeX lengths manual}.{p_end}"' error 198 } } - + * Tex label must be a single word if `LABEL_USED' { - + local label_words : word count `texlabel' - + if `label_words' != 1 { - + noi display as error `"{phang}The value specified in texlabel(`texlabel') is not allowed. For more information, {browse "https://en.wikibooks.org/wiki/LaTeX/Labels_and_Cross-referencing":check LaTeX labels manual}.{p_end}"' error 198 } - + } - + if (`LABEL_USED' | `CAPTION_USED') { - + if `TEXDOC_USED' == 0 { - + noi display as error "{phang}Options texlabel and texcaption may only be used in combination with option texdocument {p_end}" error 198 } } - + if `TEXCOLWIDTH_USED' { - + * Test if width unit is correctly specified local texcolwidth_unit = substr("`texcolwidth'",-2,2) if !inlist("`texcolwidth_unit'","cm","mm","pt","in","ex","em") { noi display as error `"{phang}Option texcolwidth is incorrectly specified. Column width unit must be one of "cm", "mm", "pt", "in", "ex" or "em". For more information, {browse "https://en.wikibooks.org/wiki/LaTeX/Lengths":check LaTeX lengths manual}.{p_end}"' error 198 } - + * Test if width value is correctly specified local texcolwidth_value = subinstr("`texcolwidth'","`texcolwidth_unit'","",.) - capture confirm number `texcolwidth_value' + capture confirm number `texcolwidth_value' if _rc & inlist("`texcolwidth_unit'","cm","mm","pt","in","ex","em") { noi display as error "{phang}Option texcolwidth is incorrectly specified. Column width value must be numeric. See {help iebaltab:iebaltab help}. {p_end}" error 198 - } + } } - + if `TEXVSPACE_USED' { - + * Test if width unit is correctly specified local vspace_unit = substr("`texvspace'",-2,2) if !inlist("`vspace_unit'","cm","mm","pt","in","ex","em") { noi display as error `"{phang}Option texvspace is incorrectly specified. Vertical space unit must be one of "cm", "mm", "pt", "in", "ex" or "em". For more information, {browse "https://en.wikibooks.org/wiki/LaTeX/Lengths":check LaTeX lengths manual}.{p_end}"' error 198 } - + * Test if width value is correctly specified local vspace_value = subinstr("`texvspace'","`vspace_unit'","",.) - capture confirm number `vspace_value' + capture confirm number `vspace_value' if _rc & inlist("`vspace_unit'","cm","mm","pt","in","ex","em") { noi display as error "{phang}Option texvspace is incorrectly specified. Vertical space value must be numeric. See {help iebaltab:iebaltab help}. {p_end}" error 198 - } + } } } - + * Error for incorrectly using tex options else if `NOTEWIDTH_USED' | `LABEL_USED' | `CAPTION_USED' | `TEXDOC_USED' | `TEXVSPACE_USED' | `TEXCOLWIDTH_USED' { - + noi display as error "{phang}Options texnotewidth, texdocument, texlabel, texcaption, texvspace and texcolwidth may only be used in combination with option savetex(){p_end}" error 198 - + } - + *At least one of save and browse may be used if (`SAVE_USED' + `BROWSE_USED' + `SAVE_TEX_USED' < 1) { - + *Error for incorrectly using both save() and browse noi display as error "{phang}Either option save() or option savetex() or option browse must be used. Note that option browse drops all data in memory and it is not possible to restore it afterwards. Use preserve/restore, tempfiles or save data to disk before using the otion browse." - error + error } /*********************************************** ************************************************/ - - + + *Manage order in levels of grpvar() - + /************************************************* - ************************************************/ - - - *Changed to value used in control() if control() is - *specified but order() is not, + ************************************************/ + + + *Changed to value used in control() if control() is + *specified but order() is not, if !`ORDER_USED' & `CONTROL_USED' local order `control' - + *Unless changed above, either as specified in order(). *if order() is not specified then the all levels in *numeric order is stored in order_code_rest to be. local order_code_rest : list GRP_CODE_LEVELS - order - - - *The final order is compiled. order_code_rest + + + *The final order is compiled. order_code_rest *is ordered numerically. If order has no yet - *been defined (if niether order() or control() - * was used) then order will be exactly like + *been defined (if niether order() or control() + * was used) then order will be exactly like * order_code_rest local ORDER_OF_GROUPS `order' `order_code_rest' - + /*********************************************** ************************************************/ - + *Manage lables to be used for the groups in groupvar /************************************************* - ************************************************/ - + ************************************************/ + *Local that will store the final labels. These labels will be stored in the in final order of groups in groupvar - local grpLabels_final "" - + local grpLabels_final "" + *Loop over all groups in the final order foreach groupCode of local ORDER_OF_GROUPS { - + *Test if this code has a manually defined group label local grpLabelPos : list posof "`groupCode'" in grpLabelCodes - + *If index is not zero then manual label is defined, use it if `grpLabelPos' != 0 { *Getting the manually defined label corresponding to this code local group_label : word `grpLabelPos' of `grpLabelLables' - + *Storing the label to be used for this group code. local grpLabels_final `" `grpLabels_final' "`group_label'" "' } - + ** No manually defined label, test if group var has value labels or if user * has specified that value labels should not be used else if `NOGRPLABEL_USED' | !`GRPVAR_HAS_VALUE_LABEL' { - + *Not using value labels, simply using the group code as the label in the final table local grpLabels_final `" `grpLabels_final' "`groupCode'" "' - + } - + *No defined group label but value labels exist and may be used - else { - + else { + *Get the value label corresponding to this code local gprVar_valueLabel : label `GRPVAR_VALUE_LABEL' `groupCode' - + *Storing the value label used for grpvar corresponding to the group code. local grpLabels_final `" `grpLabels_final' "`gprVar_valueLabel'" "' - + } } - - - - + + + + /*********************************************** ************************************************/ - + *Manage labels to be used as rowtitles /************************************************* ************************************************/ - + local rowLabelsFinal "" - + foreach balancevar of local balancevars { - - ** Test if this variable has a manually defined rowlable. If + + ** Test if this variable has a manually defined rowlable. If * rowlabels() was not specified, then this local will be empty * and generate index 0 for all variables local rowLabPos : list posof "`balancevar'" in rowLabelNames - + if `rowLabPos' != 0 { - + *Getting the manually defined label corresponding to this code local row_label : word `rowLabPos' of `rowLabelLabels' - + *Store the label in local to be used later local rowLabels_final `" `rowLabels_final' "`row_label'" "' - + } *Use variable label if option is specified else if `ROWVARLABEL_USED' { - + *Get the variable label used for this variable local var_label : variable label `balancevar' - + *Remove leading or trailing spaces local var_label = trim("`var_label'") - + *Make sure varlabel is not empty if "`var_label'" != "" { - + *Store the label in local to be used later local rowLabels_final `" `rowLabels_final' "`var_label'" "' } *If var lable empty, use var name instead else { - + *Store the label in local to be used later local rowLabels_final `" `rowLabels_final' "`balancevar'" "' } } *Otherwise use the variable name else { - + *Store the label in local to be used later local rowLabels_final `" `rowLabels_final' "`balancevar'" "' } } - + } /*********************************************** ************************************************/ - + *Creating title rows - + /************************************************* ************************************************/ - - - - ** The titles consist of three rows across all + + + + ** The titles consist of three rows across all * columns of the table. Each row is one local local titlerow1 "" local titlerow2 "" local titlerow3 `""Variable""' - - - ** The titles consist of three rows across all + + + ** The titles consist of three rows across all * columns of the table. Each row is one local local texrow1 "" local texrow2 "" local texrow3 `"Variable"' - + ** Set titlw SE if standard errors are used (default) * or SD if standard deviation is used local variance_type "SE" if `STDEV_USED' == 1 local variance_type "SD" - + *Prepare title for column showing onle N or N and cluster if "`vce_type'" != "cluster" { local N_title "N" @@ -1122,237 +1155,231 @@ qui { else { local N_title "N/[Clusters]" } - + ********************************************* *Generating titles for each value of groupvar - - ** Tempvar corresponding that will store the final + + ** Tempvar corresponding that will store the final * order for the group the observation belongs to tempvar groupOrder gen `groupOrder' = . - + *Loop over the number of groups forvalues groupOrderNum = 1/`GRPVAR_NUM_GROUPS' { - + *Get the code and label corresponding to the group local groupLabel : word `groupOrderNum' of `grpLabels_final' local groupCode : word `groupOrderNum' of `ORDER_OF_GROUPS' - + * Make sure special characters are displayed correctly local texGroupLabel : subinstr local groupLabel "%" "\%" , all local texGroupLabel : subinstr local texGroupLabel "_" "\_" , all local texGroupLabel : subinstr local texGroupLabel "&" "\&" , all local texGroupLabel : subinstr local texGroupLabel "\$" "\\\\\\\$" , all - + *Prepare a row to store onerow values for each group if `ONEROW_USED' == 1 local onerow_`groupOrderNum' "" - + *Assign the group order to observations that belong to this group replace `groupOrder' = `groupOrderNum' if `grpvar' == `groupCode' - + *Create one more column for N if N is displayesd in column instead of row if `ONEROW_USED' == 0 { - + local titlerow1 `"`titlerow1' _tab "" _tab " (`groupOrderNum') " "' local titlerow2 `"`titlerow2' _tab "" _tab "`groupLabel'" "' local titlerow3 `"`titlerow3' _tab "`N_title'" _tab "Mean/`variance_type'" "' - - + + local texrow1 `"`texrow1' & \multicolumn{2}{c}{(`groupOrderNum')} "' local texrow2 `"`texrow2' & \multicolumn{2}{c}{`texGroupLabel'} "' local texrow3 `"`texrow3' & `N_title' & Mean/`variance_type' "' - + } else { - + local titlerow1 `"`titlerow1' _tab " (`groupOrderNum') " "' local titlerow2 `"`titlerow2' _tab "`groupLabel'" "' local titlerow3 `"`titlerow3' _tab "Mean/`variance_type'" "' - + local texrow1 `"`texrow1' & (`groupOrderNum') "' local texrow2 `"`texrow2' & `texGroupLabel' "' local texrow3 `"`texrow3' & Mean/`variance_type' "' - + } - + } *********************************************************** - *Generating titles for sample total if total() is specified - + *Generating titles for sample total if total() is specified + if `TOTAL_USED' { - + *Add one more column group local totalColNumber = `GRPVAR_NUM_GROUPS' + 1 - + *If onerow used, then add a local to store the total num obs if `ONEROW_USED' == 1 local onerow_tot "" - + local tot_label Total if `TOTALLABEL_USED' local tot_label `totallabel' - + * Make sure special characters are displayed correctly local tex_tot_label : subinstr local tot_label "%" "\%" , all local tex_tot_label : subinstr local tex_tot_label "_" "\_" , all local tex_tot_label : subinstr local tex_tot_label "&" "\&" , all local tex_tot_label : subinstr local tex_tot_label "\$" "\\\\\$" , all - + *Create one more column for N if N is displayesd in column instead of row if `ONEROW_USED' == 0 { - + local titlerow1 `"`titlerow1' _tab "" _tab " (`totalColNumber') " "' local titlerow2 `"`titlerow2' _tab "" _tab "`tot_label'" "' local titlerow3 `"`titlerow3' _tab "`N_title'" _tab "Mean/`variance_type'" "' - + local texrow1 `"`texrow1' & \multicolumn{2}{c}{(`totalColNumber')}"' local texrow2 `"`texrow2' & \multicolumn{2}{c}{`tex_tot_label'} "' local texrow3 `"`texrow3' & `N_title' & Mean/`variance_type' "' } else { - + local titlerow1 `"`titlerow1' _tab " (`totalColNumber') " "' local titlerow2 `"`titlerow2' _tab "`tot_label'" "' - local titlerow3 `"`titlerow3' _tab "Mean/`variance_type'" "' - + local titlerow3 `"`titlerow3' _tab "Mean/`variance_type'" "' + local texrow1 `"`texrow1' & (`totalColNumber') "' local texrow2 `"`texrow2' & `tex_tot_label' "' - local texrow3 `"`texrow3' & Mean/`variance_type' "' - + local texrow3 `"`texrow3' & Mean/`variance_type' "' + } } ************************************************ *Generating titles for each test of diff in mean - - local ttest_pairs "" - - if `CONTROL_USED' { - - *Get the order of the control group - local ctrlGrpPos : list posof "`control'" in ORDER_OF_GROUPS - - *The t-tests will only be between control and each of the other groups - forvalues second_ttest_group = 1/`GRPVAR_NUM_GROUPS' { - - *Include all groups apart from the control group itself - if `second_ttest_group' != `ctrlGrpPos' { - - *Adding title rows for the t-test. - local titlerow1 `"`titlerow1' _tab "t-test""' - local titlerow2 `"`titlerow2' _tab "(`ctrlGrpPos') - (`second_ttest_group')""' - + + if `TTEST_USED' | `NORMDIFF_USED' { + + if `CONTROL_USED' { + + iecontrolheader "`control'" "`ORDER_OF_GROUPS'" "`GRPVAR_NUM_GROUPS'" /// + `TTEST_USED' `PTTEST_USED' `NORMDIFF_USED' /// + `" `titlerow1' "' `" `titlerow2' "' `" `titlerow3' "' `" `texrow3' "' + } + else { + + ienocontrolheader "`GRPVAR_NUM_GROUPS'" /// + `TTEST_USED' `PTTEST_USED' `NORMDIFF_USED' /// + `" `titlerow1' "' `" `titlerow2' "' `" `titlerow3' "' `" `texrow3' "' + + } + + local titlerow1 `"`r(titlerow1)'"' + local titlerow2 `"`r(titlerow2)'"' + local titlerow3 `"`r(titlerow3)'"' + local texrow3 `"`r(texrow3)'"' + local ttest_pairs `"`r(ttest_pairs)'"' + + + local testPairCount : list sizeof ttest_pairs + + if `testPairCount' > 0 { + + if `TTEST_USED' { + local texrow1 `" `texrow1' & \multicolumn{`testPairCount'}{c}{T-test} "' + if `PTTEST_USED' == 1 { - local titlerow3 `"`titlerow3' _tab "p-value""' + local texrow2 `"`texrow2' & \multicolumn{`testPairCount'}{c}{P-value} "' } else { - local titlerow3 `"`titlerow3' _tab "Difference""' + local texrow2 `"`texrow2' & \multicolumn{`testPairCount'}{c}{Difference} "' } - - local texrow3 `" `texrow3' & (`ctrlGrpPos')-(`second_ttest_group') "' - - *Storing a local of all the test pairs - local ttest_pairs "`ttest_pairs' `ctrlGrpPos'_`second_ttest_group'" } - } - } - else { - - *The t-tests will be all cominations of groups - forvalues first_ttest_group = 1/`GRPVAR_NUM_GROUPS' { - - ** To guarantee that all combination of groups are included - * but no duplicates are possible, start next loop one integer - * higher than the first group - local nextPossGroup = `first_ttest_group' + 1 - - forvalues second_ttest_group = `nextPossGroup'/`GRPVAR_NUM_GROUPS' { - - *Adding title rows for the t-test. - local titlerow1 `"`titlerow1' _tab "t-test""' - local titlerow2 `"`titlerow2' _tab "(`first_ttest_group')-(`second_ttest_group')""' - - if `PTTEST_USED' == 1 { - local titlerow3 `"`titlerow3' _tab "p-value""' - } - else { - local titlerow3 `"`titlerow3' _tab "Difference""' - } - - local texrow3 `" `texrow3' & (`first_ttest_group')-(`second_ttest_group') "' - - *Storing a local of all the test pairs - local ttest_pairs "`ttest_pairs' `first_ttest_group'_`second_ttest_group'" + if `NORMDIFF_USED' { + local texrow1 `"`texrow1' & \multicolumn{`testPairCount'}{c}{Normalized} "' + local texrow2 `"`texrow2' & \multicolumn{`testPairCount'}{c}{difference} "' } } + *texrow3 created in loop above } - - - local testPairCount : list sizeof ttest_pairs - - if `testPairCount' > 0 { - - local texrow1 `" `texrow1' & \multicolumn{`testPairCount'}{c}{T-test} "' - - if `PTTEST_USED' == 1 { - local texrow2 `"`texrow2' & \multicolumn{`testPairCount'}{c}{P-value} "' - } - else { - local texrow2 `"`texrow2' & \multicolumn{`testPairCount'}{c}{Difference} "' - } + + ************************************************ + *Add column for F-test of joint equality + + if `FEQTEST_USED' { + + local titlerow1 `"`titlerow1' _tab "F-test""' + local titlerow2 `"`titlerow2' _tab "for joint""' + local titlerow3 `"`titlerow3' _tab "orthogonality""' + + local texrow1 `" `texrow1' & \multicolumn{1}{c}{F-test} "' + local texrow2 `" `texrow2' & \multicolumn{1}{c}{for joint}"' + local texrow3 `" `texrow3' & \multicolumn{1}{c}{orthogonality}"' } - *texrow3 created in loop above - - + **************************** *Writing titles to textfile - + *Create a temporary textfile tempname textname tempfile textfile - - *Write the title rows defined above + + *Write the title rows defined above cap file close `textname' file open `textname' using "`textfile'", text write replace file write `textname' /// `titlerow1' _n /// `titlerow2' _n /// - `titlerow3' _n + `titlerow3' _n file close `textname' - + ******************** *texfile - + *Count number of columns in table if `TEXCOLWIDTH_USED' == 0 local colstring l else local colstring p{`texcolwidth'} - + forvalues repeat = 1/`NUM_COL_GRP_TOT' { - + *Add at least one column per group and for total if used local colstring "`colstring'c" - *Add another columns if N is displyaed in column and not row - if `ONEROW_USED' == 0 { + *Add another column if N is displyaed in column and not row + if !`ONEROW_USED'{ local colstring "`colstring'c" } } *Add one column per test pair - forvalues repeat = 1/`testPairCount' { - local colstring `colstring'c + if `TTEST_USED' { + forvalues repeat = 1/`testPairCount' { + local colstring "`colstring'c" + } + } + + *Add another column if F-test for equality of means is included + if `FEQTEST_USED'{ + local colstring "`colstring'c" + } + + *Add another column if normalized difference is included + if `NORMDIFF_USED'{ + forvalues repeat = 1/`testPairCount' { + local colstring "`colstring'c" + } } - + *Create a temporary texfile tempname texname tempfile texfile - + ****Write texheader if full document option was selected *Everyhting here is the tex headers capture file close `texname' if `TEXDOC_USED' { - + file open `texname' using "`texfile'", text write replace file write `texname' /// "%%% Table created in Stata by iebaltab (https://github.com/worldbank/ietoolkit)" _n /// @@ -1362,70 +1389,70 @@ qui { "% ----- Preamble " _n /// "\usepackage[utf8]{inputenc}" _n /// "\usepackage{adjustbox}" _n - + file write `texname' /// "% ----- End of preamble " _n /// "" _n /// " \begin{document}" _n /// "" _n /// - "\begin{table}[!htbp]" _n /// + "\begin{table}[!htbp]" _n /// "\centering" _n - + * Write tex caption if specified if `CAPTION_USED' { - + file write `texname' `"\caption{`texcaption'}"' _n - + } - + * Write tex label if specified if `LABEL_USED' { - + file write `texname' `"\label{`texlabel'}"' _n - + } - + file write `texname' "\begin{adjustbox}{max width=\textwidth}" _n file close `texname' - - } + + } file open `texname' using "`texfile'", text write append file write `texname' /// "\begin{tabular}{@{\extracolsep{5pt}}`colstring'}" _n /// - "\\[-1.8ex]\hline \hline" _n + "\\[-1.8ex]\hline \hline \\[-1.8ex]" _n file close `texname' - *Write the title rows defined above + *Write the title rows defined above capture file close `texname' file open `texname' using "`texfile'", text write append - + file write `texname' /// "`texrow1' \\" _n /// "`texrow2' \\" _n /// - "`texrow3' \\ \hline " _n + "`texrow3' \\ \hline \\[-1.8ex] " _n file close `texname' - + /*********************************************** ***********************************************/ - - *Running the regression for the t-test + + *Running the regression for the t-test *for each variable in varlist - + /************************************************ - ************************************************/ - + ************************************************/ + *** Setting default values or specified values for fixed effects and clusters - + ********************************** *Preparing fixed effect option - + if !`FIX_EFFECT_USED' { - + ** If a fixed effect var is not specified, * then a constant fixed effect is here generated. - * A constent fixed effect leaves the areg + * A constent fixed effect leaves the areg * unaffected tempvar fixedeffect gen `fixedeffect' = 1 @@ -1433,120 +1460,119 @@ qui { ********************************** *Preparing cluster option - + if `VCE_USED' { - + ** The varname for cluster is * prepared to be put in the areg * options local error_estm vce(`vce') - + } - + ********************************** *Preparing weight option - + local weight_option "" - + if `WEIGHT_USED' { - + ** The varname for weight is prepared to be put in the reg options local weight_option "[`weight_type' = `weight_var']" - + } - - + + ** Create locals that control the warning table - + *Mean test warnings - local warn_means_num 0 - local warn_means_strlen 0 - - + local warn_means_num 0 + local warn_ftest_num 0 + *Joint test warnings local warn_joint_novar_num 0 local warn_joint_lovar_num 0 - local warn_joint_robus_num 0 - - + local warn_joint_robus_num 0 + + *** Create columns with means and sd for this row - + local tex_line_space 0pt - + foreach balancevar in `balancevars' { - + *Get the rowlabels prepared above one at the time gettoken row_label rowLabels_final : rowLabels_final - + *Start the tableRow string with the label defined - local tableRowUp `""`row_label'""' - local tableRowDo `" "' - + local tableRowUp `""`row_label'""' + local tableRowDo `" "' + *Make sure special characters in variable labels are displayed correctly local texrow_label : subinstr local row_label "%" "\%" , all local texrow_label : subinstr local texrow_label "_" "\_" , all local texrow_label : subinstr local texrow_label "[" "{[}" , all - local texrow_label : subinstr local texrow_label "&" "\&" , all + local texrow_label : subinstr local texrow_label "&" "\&" , all local texrow_label : subinstr local texrow_label "\$" "\\\\\\\$" , all - - local texRow `""`texrow_label'""' - + + local texRow `""`texrow_label'""' + *** Replacing missing value - - ** This option can be used to get a uniform N across all - * variables even if the variable is missing for some - * observations. When specifying this option, a dummy is - * created indicating all HHs that have a missing value + + ** This option can be used to get a uniform N across all + * variables even if the variable is missing for some + * observations. When specifying this option, a dummy is + * created indicating all HHs that have a missing value * for this variable. Missing values for the varaible is - * then set to zero, and in the areg used for testing the + * then set to zero, and in the areg used for testing the * differences in means, the dummy is included as a control. * Note that this will slightly distort the mean as well. - + *Create option string - local replaceoptions - + local replaceoptions + *Sopecify differently based on all missing or only regualr missing if `BALMISS_USED' local replaceoptions `" `replaceoptions' replacetype("`balmiss'") "' if `BALMISSREG_USED' local replaceoptions `" `replaceoptions' replacetype("`balmissreg'") regonly "' - + *Add group variable if the replace type is group mean if "`balmiss'" == "groupmean" local replaceoptions `" `replaceoptions' groupvar(`grpvar') groupcodes("`GRP_CODE_LEVELS'") "' if "`balmissreg'" == "groupmean" local replaceoptions `" `replaceoptions' groupvar(`grpvar') groupcodes("`GRP_CODE_LEVELS'") "' - + *Set the minimum number of observations to allow means to be set from if `MISSMINMEAN_USED' == 1 local replaceoptions `" `replaceoptions' minobsmean(`missminmean') "' if `MISSMINMEAN_USED' == 0 local replaceoptions `" `replaceoptions' minobsmean(10) "' - + *Excute the command. Code is found at the bottom of this ado file if (`BALMISS_USED' | `BALMISSREG_USED') iereplacemiss `balancevar', `replaceoptions' - + *** Run the regressions - + forvalues groupNumber = 1/`GRPVAR_NUM_GROUPS' { - - reg `balancevar' if `groupOrder' == `groupNumber' `weight_option', `error_estm' - + + reg `balancevar' if `groupOrder' == `groupNumber' `weight_option', `error_estm' + *Number of observation for this balancevar for this group local N_`groupNumber' = e(N) local N_`groupNumber' : display %9.0f `N_`groupNumber'' - + *If clusters used, number of clusters in this balance var for this group if "`vce_type'" == "cluster" { local N_clust_`groupNumber' = e(N_clust) - local N_clust_`groupNumber' : display %9.0f `N_clust_`groupNumber'' + local N_clust_`groupNumber' : display %9.0f `N_clust_`groupNumber'' local N_clust_`groupNumber' = trim("`N_clust_`groupNumber''") local N_clustex_`groupNumber' = "{[}`N_clust_`groupNumber'']" local N_clust_`groupNumber' = "[`N_clust_`groupNumber'']" } - + *Load values from matrices into scalars local mean_`groupNumber' = _b[_cons] local se_`groupNumber' = _se[_cons] - + local di_mean_`groupNumber' : display `diformat' `mean_`groupNumber'' - + *Display variation in Standard errors (default) or in Standard Deviations if `STDEV_USED' == 0 { *Format Standard Errors @@ -1558,15 +1584,15 @@ qui { *Format Standard Deviation local di_var_`groupNumber' : display `diformat' `sd_`groupNumber'' } - + *Remove leading zeros from excessive fomrat local N_`groupNumber' =trim("`N_`groupNumber''") local di_mean_`groupNumber' =trim("`di_mean_`groupNumber''") local di_var_`groupNumber' =trim("`di_var_`groupNumber''") - + *Test that N is the same for each group across all vars if `ONEROW_USED' == 0 { - + local tableRowUp `"`tableRowUp' _tab "`N_`groupNumber''" _tab "`di_mean_`groupNumber''" "' local tableRowDo `"`tableRowDo' _tab "`N_clust_`groupNumber''" _tab "[`di_var_`groupNumber'']" "' @@ -1575,7 +1601,7 @@ qui { } if `SHOW_NCLUSTER' == 1 { local texRow `"`texRow' " & \begin{tabular}[t]{@{}c@{}} `N_`groupNumber'' \\ `N_clustex_`groupNumber'' \end{tabular} & \begin{tabular}[t]{@{}c@{}} `di_mean_`groupNumber'' \\ (`di_var_`groupNumber'') \end{tabular}" "' - } + } } else { @@ -1586,62 +1612,62 @@ qui { } *If not, then check that the obs num is the same as before else if !(`onerow_`groupNumber'' == `N_`groupNumber'') { - + *option onerow not allowed if N is different noi display as error "{phang}The number of observations for `balancevar' is different compared to other balance variables within the same group. You can therefore not use the option onerow. Run the command without the option onerow to see which group does not have the same number of observations with non-missing values across all balance variables.{p_end}" error 198 } - - + + *If cluster is usedTest if the number of clusters is the same in each group accross all balance vars if "`vce_type'" == "cluster" { - + if "`oneclstrow_`groupNumber''" == "" { *Store the obs num local oneclstrow_`groupNumber' = `N_clust_`groupNumber'' } *If not, then check that the obs num is the same as before else if !(`oneclstrow_`groupNumber'' == `N_clust_`groupNumber'') { - + *option onerow not allowed if N is different noi display as error "{phang}The number of clusters for `balancevar' is differenet compared to other balance variables within the same group. You can therefore not use the option onerow. Run the command without the option onerow to see which group does not have the same number of clusters with non-missing values across all balance variables.{p_end}" error 198 } } - + *Either this is the first balance var or num obs are identical, so write columns local tableRowUp `"`tableRowUp' _tab "`di_mean_`groupNumber''" "' - local tableRowDo `"`tableRowDo' _tab "[`di_var_`groupNumber'']" "' - + local tableRowDo `"`tableRowDo' _tab "[`di_var_`groupNumber'']" "' + local texRow `"`texRow' " & \begin{tabular}[t]{@{}c@{}} `di_mean_`groupNumber'' \\ (`di_var_`groupNumber'') \end{tabular}" "' } - + } - + if `TOTAL_USED' { - + reg `balancevar' `weight_option', `error_estm' - + local N_tot = e(N) local N_tot : display %9.0f `N_tot' - + *If clusters used, number of clusters in this balance var for this group if "`vce_type'" == "cluster" { local N_clust_tot = e(N_clust) - local N_clust_tot : display %9.0f `N_clust_tot' - local N_clust_tot = trim("`N_clust_tot'") + local N_clust_tot : display %9.0f `N_clust_tot' + local N_clust_tot = trim("`N_clust_tot'") local N_clustex_tot = "{[}`N_clust_tot']" local N_clust_tot = "[`N_clust_tot']" } - - + + *Load values from matrices into scalars local mean_tot = _b[_cons] local se_tot = _se[_cons] - + local mean_tot : display `diformat' `mean_tot' - + *Display variation in Standard errors (default) or in Standard Deviations if `STDEV_USED' == 0 { *Format Standard Errors @@ -1652,22 +1678,22 @@ qui { local sd_tot = `se_tot' * sqrt(`N_tot') *Format Standard Deviation local var_tot : display `diformat' `sd_tot' - + } - + *Remove leading zeros from excessive fomrat local N_tot =trim("`N_tot'") local mean_tot =trim("`mean_tot'") local var_tot =trim("`var_tot'") - - + + *Test that N is the same for each group across all vars if `ONEROW_USED' == 0 { - + local tableRowUp `"`tableRowUp' _tab "`N_tot'" _tab "`mean_tot'" "' local tableRowDo `"`tableRowDo' _tab "`N_clust_tot'" _tab "[`var_tot']" "' - + if `SHOW_NCLUSTER' == 0 { local texRow `"`texRow' " & `N_tot' & \begin{tabular}[t]{@{}c@{}} `mean_tot' \\ (`var_tot') \end{tabular}" "' } @@ -1676,7 +1702,7 @@ qui { } } else { - + *Test if the first balance var if "`onerow_tot'" == "" { *Store the obs num @@ -1684,202 +1710,323 @@ qui { } *If not, then check that the obs num is the same as before else if !(`onerow_tot' == `N_tot') { - + *option onerow not allowed if N is different noi display as error "{phang}The number of observations for all groups are not the same for `balancevar' compare to at least one other balance variables. Run the command without the option onerow to see which group does not have the same number of observations with non-missing values across all balance variables. This happened in the total column which can be an indication of a serious bug. Please email this erro message to kbjarkefur@worldbank.org{p_end}" error 198 } - + *If cluster is usedTest if the number of clusters is the same in each group accross all balance vars if "`vce_type'" == "cluster" { - + if "`oneclstrow_tot'" == "" { *Store the obs num local oneclstrow_tot = `N_clust_tot' } *If not, then check that the obs num is the same as before else if !(`oneclstrow_tot' == `N_clust_tot') { - + *option onerow not allowed if N is different noi display as error "{phang}The number of clusters fora ll groups for `balancevar' is differenet compared to other balance variables. You can therefore not use the option onerow. Run the command without the option onerow to see which balance variable does not have the same number of clusters with non-missing values as the other balance variables.{p_end}" error 198 } } - - + + *Either this is the first balance var or num obs are identical, so write columns local tableRowUp `"`tableRowUp' _tab "`mean_tot'" "' - local tableRowDo `"`tableRowDo' _tab "[`var_tot']" "' - + local tableRowDo `"`tableRowDo' _tab "[`var_tot']" "' + local texRow `"`texRow' " & \begin{tabular}[t]{@{}c@{}} `mean_tot' \\ (`var_tot') \end{tabular}" "' } - } - - *** Create the columns with t-tests for this row - - - foreach ttest_pair of local ttest_pairs { - - *Create a local for each group in the test - *pair from the test_pair local created above - local undscr_pos = strpos("`ttest_pair'","_") - local first_group = substr("`ttest_pair'",1,`undscr_pos'-1) - local second_group = substr("`ttest_pair'", `undscr_pos'+1,.) - - *Create the local with the difference to be displayed in the table - local diff_`ttest_pair' = `mean_`first_group'' - `mean_`second_group'' //means from section above - - *Create a temporary varaible used as the dummy to indicate - *which observation is in the first and in the second group - *in the test pair. Since all other observations are mission, - *this variable also exculde all observations in neither of - *the groups from the test regression - tempvar tempvar_thisGroupInPair - gen `tempvar_thisGroupInPair' = . //default is missing, and obs not in this pair will remain missing - replace `tempvar_thisGroupInPair' = 0 if `groupOrder' == `first_group' - replace `tempvar_thisGroupInPair' = 1 if `groupOrder' == `second_group' - - *The command mean is used to test that there is variation - *in the balance var across these two groups. The regression - *that includes fixed effects and covariaties might run without - *error even if there is no variance across the two groups. The - *local varloc will determine if an error or a warning will be - *thrown or if the test results will be replaced with an "N/A". - if "`error_estm'" != "vce(robust)" local mean_error_estm `error_estm' //Robust not allowed in mean, but mean here is used to test something else - mean `balancevar', over(`tempvar_thisGroupInPair') `mean_error_estm' - mat var = e(V) - local varloc = max(var[1,1],var[2,2]) - - *This is the regression where we test differences. - reg `balancevar' `tempvar_thisGroupInPair' `covariates' i.`fixedeffect' `weight_option', `error_estm' - - - *Testing result and if valid, write to file with or without stars - if `varloc' == 0 { - - local warn_means_num = `warn_means_num' + 1 - local warn_means_strlen = max(`warn_means_strlen', strlen("`balancevar'")) - - local warn_means_test`warn_means_num' "(`first_group')-(`second_group')" - local warn_means_bvar`warn_means_num' "`balancevar'" - - local tableRowUp `" `tableRowUp' _tab "N/A" "' - local tableRowDo `" `tableRowDo' _tab " " "' - - local texRow `" `texRow' " & N/A" "' - - } - - else { - - *Perform the t-test and store p-value in pttest - test `tempvar_thisGroupInPair' - local pttest = r(p) - - - *If p-test option is used - if `PTTEST_USED' == 1 { - - local ttest_output = `pttest' - } - *Otherwise display differences - else { - - local ttest_output = `diff_`ttest_pair'' - } - - *Format the output - local ttest_output : display `diformat' `ttest_output' - - *Add stars - foreach ttest_p_level in `p1star' `p2star' `p3star' { - - if `pttest' < `ttest_p_level' local ttest_output "`ttest_output'*" - } - - *Print row - local tableRowUp `" `tableRowUp' _tab "`ttest_output'" "' - local tableRowDo `" `tableRowDo' _tab " " "' - - local texRow `" `texRow' " & `ttest_output'" "' - } } - - - *Write the row for this balance var to file. - file open `textname' using "`textfile'", text write append - file write `textname' /// - `tableRowUp' _n /// - `tableRowDo' _n - file close `textname' - file open `texname' using "`texfile'", text write append - file write `texname' /// - `texRow' " \rule{0pt}{`tex_line_space'}\\" _n - file close `texname' - - * We'll now add more space between the lines - if `TEXVSPACE_USED' == 0 local tex_line_space 3ex - else local tex_line_space `texvspace' - } + *** Create the columns with t-tests for this row - - - ***Write N row if onerow used - - if `ONEROW_USED' == 1 { - - *Variable column i.e. row title - local tableRowN `""N""' - local texRowN `"N"' + if `TTEST_USED' { + + foreach ttest_pair of local ttest_pairs { + + *Create a local for each group in the test + *pair from the test_pair local created above + local undscr_pos = strpos("`ttest_pair'","_") + local first_group = substr("`ttest_pair'",1,`undscr_pos'-1) + local second_group = substr("`ttest_pair'", `undscr_pos'+1,.) + + *Create the local with the difference to be displayed in the table + local diff_`ttest_pair' = `mean_`first_group'' - `mean_`second_group'' //means from section above + + *Create a temporary varaible used as the dummy to indicate + *which observation is in the first and in the second group + *in the test pair. Since all other observations are mission, + *this variable also exculde all observations in neither of + *the groups from the test regression + tempvar tempvar_thisGroupInPair + gen `tempvar_thisGroupInPair' = . //default is missing, and obs not in this pair will remain missing + replace `tempvar_thisGroupInPair' = 0 if `groupOrder' == `first_group' + replace `tempvar_thisGroupInPair' = 1 if `groupOrder' == `second_group' + + *The command mean is used to test that there is variation + *in the balance var across these two groups. The regression + *that includes fixed effects and covariaties might run without + *error even if there is no variance across the two groups. The + *local varloc will determine if an error or a warning will be + *thrown or if the test results will be replaced with an "N/A". + if "`error_estm'" != "vce(robust)" local mean_error_estm `error_estm' //Robust not allowed in mean, but mean here is used to test something else + mean `balancevar', over(`tempvar_thisGroupInPair') `mean_error_estm' + mat var = e(V) + local varloc = max(var[1,1],var[2,2]) + + *This is the regression where we test differences. + reg `balancevar' `tempvar_thisGroupInPair' `covariates' i.`fixedeffect' `weight_option', `error_estm' + + + *Testing result and if valid, write to file with or without stars + if `varloc' == 0 { + + local warn_means_num = `warn_means_num' + 1 + + local warn_means_name`warn_means_num' "t-test" + local warn_means_group`warn_means_num' "(`first_group')-(`second_group')" + local warn_means_bvar`warn_means_num' "`balancevar'" + + local tableRowUp `" `tableRowUp' _tab "N/A" "' + local tableRowDo `" `tableRowDo' _tab " " "' + + local texRow `" `texRow' " & N/A" "' + + } + + else { + + *Perform the t-test and store p-value in pttest + test `tempvar_thisGroupInPair' + local pttest = r(p) + + + *If p-test option is used + if `PTTEST_USED' == 1 { + + local ttest_output = `pttest' + } + *Otherwise display differences + else { + + local ttest_output = `diff_`ttest_pair'' + } + + *Format the output + local ttest_output : display `diformat' `ttest_output' + + *Add stars + foreach ttest_p_level in `p1star' `p2star' `p3star' { + + if `pttest' < `ttest_p_level' local ttest_output "`ttest_output'*" + } + + *Print row + local tableRowUp `" `tableRowUp' _tab "`ttest_output'" "' + local tableRowDo `" `tableRowDo' _tab " " "' + + local texRow `" `texRow' " & `ttest_output'" "' + } + } + } + + *** Create the columns with normalized difference for this row + + if `NORMDIFF_USED' { + + foreach normdiff_pair of local ttest_pairs { + + *Create a local for each group in the test + *pair from the test_pair local created above + local undscr_pos = strpos("`normdiff_pair'","_") + local first_group = substr("`normdiff_pair'",1,`undscr_pos'-1) + local second_group = substr("`normdiff_pair'", `undscr_pos'+1,.) + + *Create the local with the difference to be displayed in the table + local diff_`normdiff_pair' = `mean_`first_group'' - `mean_`second_group'' //means from section above + + *Calculate standard deviation for sample of interest + sum `balancevar' if inlist(`groupOrder',`first_group',`second_group') + + *Testing result and if valid, write to file with or without stars + if r(sd) == 0 { + + local warn_means_num = `warn_means_num' + 1 + + local warn_means_name`warn_means_num' "Norm diff" + local warn_means_group`warn_means_num' "(`first_group')-(`second_group')" + local warn_means_bvar`warn_means_num' "`balancevar'" + + local tableRowUp `" `tableRowUp' _tab "N/A" "' + local tableRowDo `" `tableRowDo' _tab " " "' + + local texRow `" `texRow' " & N/A" "' + + } + + else { + *Create the local with the normalized difference + local normdiff_`normdiff_pair' = `diff_`normdiff_pair''/r(sd) + + *Format the output + local normdiff_output : display `diformat' `normdiff_`normdiff_pair'' + + *Print row + local tableRowUp `" `tableRowUp' _tab "`normdiff_output'" "' + local tableRowDo `" `tableRowDo' _tab " " "' + + local texRow `" `texRow' " & `normdiff_output'" "' + } + } + } + + *** Create the columns with F-tests for this row + + if `FEQTEST_USED' { + + * Run regression + reg `balancevar' i.`grpvar' `covariates' i.`fixedeffect' `weight_option', `error_estm' + + * Calculate input for F-test: i. will drop the lowest value of + * grpvar, so we'll do the same + local 1st_level = strpos("`GRP_CODE_LEVELS'"," ") + 1 + local FEQTEST_CODE_LEVELS = substr("`GRP_CODE_LEVELS'",`1st_level',.) + + * Calculate input for F-test: loop through levels to create input + local ftest_input "" + foreach grpCode of local FEQTEST_CODE_LEVELS { + local ftest_input = " `ftest_input' `grpCode'.`grpvar'=" + } + + test `ftest_input' 0 + local pfeqtest = r(p) + local ffeqtest = r(F) + + *Check if the test is valid. If not, print N/A and error message. + *Is yes, print test + if "`ffeqtest'" == "." { + + local warn_ftest_num = `warn_ftest_num' + 1 + + local warn_ftest_bvar`warn_ftest_num' "`balancevar'" + + local tableRowUp `" `tableRowUp' _tab "N/A" "' + local tableRowDo `" `tableRowDo' _tab " " "' + + local texRow `" `texRow' " & N/A" "' + } + + else { + + *Create the F-test output + + *If p-test option is used, display p-value + if `PFTEST_USED' { + + local feqtest_output = `pfeqtest' + } + *Otherwise display differences + else { + + local feqtest_output = `ffeqtest' + } + + + *Store f-value + local feqtest_output : display `diformat' `feqtest_output' + + *Adding stars + foreach feqtest_p_level in `p1star' `p2star' `p3star' { + + if `pfeqtest' < `feqtest_p_level' local feqtest_output `feqtest_output'* + } + + *Print row + local tableRowUp `" `tableRowUp' _tab "`feqtest_output'" "' + local tableRowDo `" `tableRowDo' _tab " " "' + + local texRow `" `texRow' " & `feqtest_output'" "' + } + } + + + *Write the row for this balance var to file. + file open `textname' using "`textfile'", text write append + file write `textname' /// + `tableRowUp' _n /// + `tableRowDo' _n + file close `textname' + + file open `texname' using "`texfile'", text write append + file write `texname' /// + `texRow' " \rule{0pt}{`tex_line_space'}\\" _n + file close `texname' + + * We'll now add more space between the lines + if `TEXVSPACE_USED' == 0 local tex_line_space 3ex + else local tex_line_space `texvspace' + } + + + + ***Write N row if onerow used + + if `ONEROW_USED' == 1 { + + *Variable column i.e. row title + local tableRowN `""N""' + local texRowN `"N"' + + local tableRowClstr `""Clusters""' + local texRowClstr `"Clusters"' + + *Loop over all groups + forvalues groupOrderNum = 1/`GRPVAR_NUM_GROUPS' { - local tableRowClstr `""Clusters""' - local texRowClstr `"Clusters"' - - *Loop over all groups - forvalues groupOrderNum = 1/`GRPVAR_NUM_GROUPS' { - *Prepare the row based on the numbers from above local tableRowN `" `tableRowN' _tab "`onerow_`groupOrderNum''" "' local texRowN `" `texRowN' & `onerow_`groupOrderNum'' "' - + local tableRowClstr `" `tableRowClstr' _tab "`oneclstrow_`groupOrderNum''" "' - local texRowClstr `" `texRowClstr' & `oneclstrow_`groupOrderNum'' "' - + local texRowClstr `" `texRowClstr' & `oneclstrow_`groupOrderNum'' "' + } if `TOTAL_USED' { - + *Prepare the row based on the numbers from above local tableRowN `" `tableRowN' _tab "`onerow_tot'" "' local texRowN `" `texRowN' & `onerow_tot' "' - + local tableRowClstr `" `tableRowClstr' _tab "`oneclstrow_tot'" "' - local texRowClstr `" `texRowClstr' & `oneclstrow_tot' "' + local texRowClstr `" `texRowClstr' & `oneclstrow_tot' "' } *Write the N prepared above file open `textname' using "`textfile'", text write append - file write `textname' `tableRowN' _n - if "`vce_type'" == "cluster" file write `textname' `tableRowClstr' _n + file write `textname' `tableRowN' _n + if "`vce_type'" == "cluster" file write `textname' `tableRowClstr' _n file close `textname' - + file open `texname' using "`texfile'", text write append - file write `texname' " `texRowN' \rule{0pt}{`tex_line_space'} \\" _n - if "`vce_type'" == "cluster" file write `texname' " `texRowClstr' \\" _n + file write `texname' " `texRowN' \rule{0pt}{`tex_line_space'} \\" _n + if "`vce_type'" == "cluster" file write `texname' " `texRowClstr' \\" _n file close `texname' } - + /*********************************************** ***********************************************/ - + *Running the regression for the F-tests - + /************************************************ ************************************************/ - if `FTEST_USED' == 1 { - + if `FTEST_USED' { + if `ONEROW_USED' == 0 { local ftestMulticol = 1 + (2*`NUM_COL_GRP_TOT') } @@ -1887,8 +2034,8 @@ qui { local ftestMulticol = 1 + `NUM_COL_GRP_TOT' } - - if `PFTEST_USED' == 1 { + + if `PFTEST_USED' { local Fstat_row `" "F-test of joint significance (p-value)" "' local Fstat_texrow `" "\multicolumn{`ftestMulticol'}{@{} l}{F-test of joint significance (p-value)}" "' } @@ -1896,21 +2043,21 @@ qui { local Fstat_row `" "F-test of joint significance (F-stat)" "' local Fstat_texrow `" "\multicolumn{`ftestMulticol'}{@{} l}{F-test of joint significance (F-stat)}" "' } - + local Fobs_row `" "F-test, number of observations" "' local Fobs_texrow `" "\multicolumn{`ftestMulticol'}{@{} l}{F-test, number of observations}" "' - + *Create empty cells for all the group columns forvalues groupIteration = 1/`GRPVAR_NUM_GROUPS' { - + local Fstat_row `" `Fstat_row' _tab "" "' local Fobs_row `" `Fobs_row' _tab "" "' - + *Add one more column if onerow is not used if `ONEROW_USED' == 0 { local Fstat_row `" `Fstat_row' _tab "" "' - local Fobs_row `" `Fobs_row' _tab "" "' - + local Fobs_row `" `Fobs_row' _tab "" "' + } } @@ -1918,52 +2065,52 @@ qui { if `TOTAL_USED' { local Fstat_row `" `Fstat_row' _tab "" "' local Fobs_row `" `Fobs_row' _tab "" "' - - + + *Add one more column if onerow is not used if `ONEROW_USED' == 0 { local Fstat_row `" `Fstat_row' _tab "" "' - local Fobs_row `" `Fobs_row' _tab "" "' - - } + local Fobs_row `" `Fobs_row' _tab "" "' + + } } *Local used to count number of f-test that trigered warnings local warn_joint_novar_num 0 local warn_joint_lovar_num 0 - local warn_joint_robus_num 0 + local warn_joint_robus_num 0 local fmiss_error 0 - + *Run the F-test on each pair foreach ttest_pair of local ttest_pairs { - - *Create a local for each group in the test - *pair from the test_pair local created above + + *Create a local for each group in the test + *pair from the test_pair local created above local undscr_pos = strpos("`ttest_pair'","_") local first_group = substr("`ttest_pair'",1,`undscr_pos'-1) local second_group = substr("`ttest_pair'", `undscr_pos'+1,.) - + *Create the local with the difference to be displayed in the table tempvar tempvar_thisGroupInPair miss gen `tempvar_thisGroupInPair' = . replace `tempvar_thisGroupInPair' = 0 if `groupOrder' == `first_group' - replace `tempvar_thisGroupInPair' = 1 if `groupOrder' == `second_group' - + replace `tempvar_thisGroupInPair' = 1 if `groupOrder' == `second_group' + ***** Testing if any obs have missing values in any og the varaibles used in the f_test - + *Loop over all balvars and seperate them with a comma local balvars_comma_seperated foreach balancevar of local balancevars { local balvars_comma_seperated `balvars_comma_seperated' , `balancevar' } *Remove the first comman before the first variable - local balvars_comma_seperated = subinstr("`balvars_comma_seperated'" ,",","",1) + local balvars_comma_seperated = subinstr("`balvars_comma_seperated'" ,",","",1) *Generate a variable equal to 1 if any balance var is missing gen `miss' = missing(`balvars_comma_seperated') if !missing(`tempvar_thisGroupInPair') - + *Count number obs in this test pair with non-missing values for all balance variables. - count if `miss' == 0 - + count if `miss' == 0 + if `r(N)' == 0 { noi di as error "{phang}F-test not possible. All observations are dropped from the f-test regression as no observation in the f-test between (`first_group')-(`second_group') has non-missing values in all balance variables. Disable the f-test option." error 2000 @@ -1972,121 +2119,121 @@ qui { noi di as error "{phang}F-test not possible. All but one observation are dropped from the f-test regression as only that one observation in the f-test between (`first_group')-(`second_group') has non-missing values in all balance variables. Disable the f-test option." error 2001 } - + *Count number obs in this test pair with missing value in at least one balance variable. count if `miss' == 1 - - + + if `r(N)' != 0 & `F_MISS_OK' == 0 { - local fmiss_error 1 //Used to throw error below + local fmiss_error 1 //Used to throw error below local fmiss_error_list `fmiss_error_list', (`first_group')-(`second_group') } - - ********** + + ********** * Run the regression for f-test reg `tempvar_thisGroupInPair' `balancevars' `covariates' i.`fixedeffect' `weight_option', `error_estm' - + *This F is calculated using fixed effects as well local reg_F "`e(F)'" local reg_F_N "`e(N)'" - + *Test all balance variables for joint significance cap testparm `balancevars' local test_F "`r(F)'" local test_F_p "`r(p)'" - - ********** - * Write to table - + + ********** + * Write to table + * No variance in either groups mean in any of the balance vars. F-test not possible to calculate if _rc == 111 { - + local warn_joint_novar_num = `warn_joint_novar_num' + 1 local warn_joint_novar`warn_joint_novar_num' "(`first_group')-(`second_group')" - + local Fstat_row `" `Fstat_row' _tab "N/A" "' local Fobs_row `" `Fobs_row' _tab "N/A" "' - + local Fstat_texrow `" `Fstat_texrow' " & N/A" "' local Fobs_texrow `" `Fobs_texrow' " & N/A" "' } - + * Collinearity between one balance variable and the dependent treatment dummy else if "`test_F'" == "." { - + local warn_joint_lovar_num = `warn_joint_lovar_num' + 1 local warn_joint_lovar`warn_joint_lovar_num' "(`first_group')-(`second_group')" - + local Fstat_row `" `Fstat_row' _tab "N/A" "' local Fobs_row `" `Fobs_row' _tab "N/A" "' - + local Fstat_texrow `" `Fstat_texrow' " & N/A" "' local Fobs_texrow `" `Fobs_texrow' " & N/A" "' } - + * F-test is incorreclty specified, error in this code else if _rc != 0 { noi di as error "F-test not valid. Please report this error to kbjarkefur@worldbank.org" error _rc } - + * F-tests possible to calculate else { - + * Robust singularity, see help file. Similar to overfitted model. Result possible but probably not reliable if "`reg_F'" == "." { - + local warn_joint_robus_num = `warn_joint_robus_num' + 1 local warn_joint_robus`warn_joint_robus_num' "(`first_group')-(`second_group')" - } - + } + *If p-test option is used - if `PFTEST_USED' == 1 { - + if `PFTEST_USED' { + local ftest_output = `test_F_p' } *Otherwise display differences else { - + local ftest_output = `test_F' - } - - + } + + *Store f-value local ftest_output : display `diformat' `ftest_output' local reg_F_N : display %9.0f `reg_F_N' - + *Adding stars foreach ftest_p_level in `p1star' `p2star' `p3star' { - + if `test_F_p' < `ftest_p_level' local ftest_output `ftest_output'* } - + *Store the f-stat value with stars to the f-stat row local Fstat_row `" `Fstat_row' _tab "`ftest_output'" "' local Fobs_row `" `Fobs_row' _tab "`reg_F_N'" "' - + local Fstat_texrow `" `Fstat_texrow' " & `ftest_output'" "' local Fobs_texrow `" `Fobs_texrow' " & `reg_F_N'" "' } } - + ******* * Throw missing values in f-test warning if `fmiss_error' == 1 { - + *Remove the first comman before the first variable - local fmiss_error_list = subinstr("`fmiss_error_list'" ,",","",1) - + local fmiss_error_list = subinstr("`fmiss_error_list'" ,",","",1) + noi di as error "{phang}F-test is possible but perhaps not advisable. Some observations have missing values in some of the balance variables and therfore dropped from the f-stat regression. This happened in the f-tests for the following group(s): [`fmiss_error_list']. Solve this by manually restricting the balance table using if or in, or disable the f-test, or by using option {help dmtab:balmiss()}. Suppress this error message by using option {help dmtab:fmissok}" error 416 } - - - + + + ******* * Write the f-test row to file - + file open `textname' using "`textfile'", text write append file write `textname' `Fstat_row' _n if !`F_NO_OBS' file write `textname' `Fobs_row' _n @@ -2100,50 +2247,68 @@ qui { file close `texname' } - + /*********************************************** ************************************************/ *Compile and display warnings (as opposed to errors in relation to t and f tests.) - + /************************************************* ************************************************/ - local anywarning = max(`warn_means_num' ,`warn_joint_novar_num', `warn_joint_lovar_num' ,`warn_joint_robus_num') + local anywarning = max(`warn_means_num',`warn_ftest_num',`warn_joint_novar_num', `warn_joint_lovar_num' ,`warn_joint_robus_num') local anywarning_F = max(`warn_joint_novar_num', `warn_joint_lovar_num' ,`warn_joint_robus_num') - + + + if `anywarning' > 0 { - + noi di as text "" noi di as error "{hline}" - noi di as error "{pstd}Stata issued one or more warnings in relation to the tests in this balance table. Read the warning(s) below carefully before using the values generated for this table.{p_end}" + noi di as error "{pstd}Stata issued one or more warnings in relation to the tests in this balance table. Read the warning(s) below carefully before using the values generated for this table.{p_end}" noi di as text "" - + if `warn_means_num' > 0 { - - noi di as text "{pmore}{bf:Difference-in-Means Tests:} The variance in both groups listed below is zero for the varaible indicated and a difference-in-means test between the two groups is therefore not valid. Tests are reported as N/A in the table.{p_end}" + + noi di as text "{pmore}{bf:Difference-in-Means Tests:} The variance in both groups listed below is zero for the variable indicated and a difference-in-means test between the two groups is therefore not valid. Tests are reported as N/A in the table.{p_end}" noi di as text "" - - noi di as text "{col 9}{c TLC}{hline 11}{c TT}{hline 36}{c TRC}" - noi di as text "{col 9}{c |}{col 13}Test{col 21}{c |}{col 24}Balance Variable{col 58}{c |}" - noi di as text "{col 9}{c LT}{hline 11}{c +}{hline 36}{c RT}" - + + noi di as text "{col 9}{c TLC}{hline 11}{c TT}{hline 12}{c TT}{hline 37}{c TRC}" + noi di as text "{col 9}{c |}{col 13}Test{col 21}{c |}{col 25}Group{col 34}{c |}{col 39}Balance Variable{col 72}{c |}" + noi di as text "{col 9}{c LT}{hline 11}{c +}{hline 12}{c +}{hline 37}{c RT}" + forvalues warn_num = 1/`warn_means_num' { - noi di as text "{col 9}{c |}{col 12}`warn_means_test`warn_num''{col 21}{c |}{col 24}`warn_means_bvar`warn_num''{col 58}{c |}" + noi di as text "{col 9}{c |}{col 11}`warn_means_name`warn_num''{col 21}{c |}{col 23}`warn_means_group`warn_num''{col 34}{c |}{col 37}`warn_means_bvar`warn_num''{col 72}{c |}" + } + noi di as text "{col 9}{c BLC}{hline 11}{c BT}{hline 12}{c BT}{hline 37}{c BRC}" + noi di as text "" + } + + if `warn_ftest_num' > 0 { + + noi di as text "{pmore}{bf:F-Test for Joint Orthogonality:} The variance all groups is zero for the varible indicated and a test of joint orthogonality for all groups is therefore not valid. Tests are reported as N/A in the table.{p_end}" + noi di as text "" + + noi di as text "{col 9}{c TLC}{hline 25}{c TRC}" + noi di as text "{col 9}{c |}{col 13} Balance Variable{col 35}{c |}" + noi di as text "{col 9}{c LT}{hline 25}{c RT}" + + forvalues warn_num = 1/`warn_ftest_num' { + noi di as text "{col 9}{c |}{col 12}`warn_ftest_bvar`warn_num''{col 35}{c |}" } - noi di as text "{col 9}{c BLC}{hline 11}{c BT}{hline 36}{c BRC}" + noi di as text "{col 9}{c BLC}{hline 25}{c BRC}" noi di as text "" } - + if `anywarning_F' > 0 { - noi di as text "{pmore}{bf:Joint Significance Tests:} F-tests are not possible to perform or unreliable. See below for details:{p_end}" + noi di as text "{pmore}{bf:Joint Significance Tests:} F-tests are not possible to perform or unreliable. See below for details:{p_end}" noi di as text "" - + if `warn_joint_novar_num' > 0 { - - noi di as text "{pmore}In the following tests, F-tests were not valid as all variables were omitted in the joint significance test due to colliniarity. Tests are reported as N/A in the table.{p_end}" + + noi di as text "{pmore}In the following tests, F-tests were not valid as all variables were omitted in the joint significance test due to colliniarity. Tests are reported as N/A in the table.{p_end}" noi di as text "" - + noi di as text "{col 9}{c TLC}{hline 12}{c TRC}" noi di as text "{col 9}{c |}{col 13}Test{col 22}{c |}" noi di as text "{col 9}{c LT}{hline 12}{c RT}" @@ -2155,10 +2320,10 @@ qui { noi di as text "" } if `warn_joint_lovar_num' > 0 { - - noi di as text "{pmore}In the following tests, F-tests are not valid as the variation in, and the covariation between, the balance variables is too close to zero in the joint test. This could be due to many reasons, but is usually due to a balance variable with high correlation with group dummy. Tests are reported as N/A in the table.{p_end}" + + noi di as text "{pmore}In the following tests, F-tests are not valid as the variation in, and the covariation between, the balance variables is too close to zero in the joint test. This could be due to many reasons, but is usually due to a balance variable with high correlation with group dummy. Tests are reported as N/A in the table.{p_end}" noi di as text "" - + noi di as text "{col 9}{c TLC}{hline 12}{c TRC}" noi di as text "{col 9}{c |}{col 13}Test{col 22}{c |}" noi di as text "{col 9}{c LT}{hline 12}{c RT}" @@ -2170,10 +2335,10 @@ qui { noi di as text "" } if `warn_joint_robus_num' > 0 { - - noi di as text "{pmore}In the following tests, F-tests are possible to calculate, but Stata issued a warning. Read more about this warning {help j_robustsingular:here}. Tests are reported with F-values and significance stars (if applicable), but these results might be unreliable.{p_end}" + + noi di as text "{pmore}In the following tests, F-tests are possible to calculate, but Stata issued a warning. Read more about this warning {help j_robustsingular:here}. Tests are reported with F-values and significance stars (if applicable), but these results might be unreliable.{p_end}" noi di as text "" - + noi di as text "{col 9}{c TLC}{hline 12}{c TRC}" noi di as text "{col 9}{c |}{col 13}Test{col 22}{c |}" noi di as text "{col 9}{c LT}{hline 12}{c RT}" @@ -2183,15 +2348,15 @@ qui { } noi di as text "{col 9}{c BLC}{hline 12}{c BRC}" noi di as text "" - } - } - - noi di as error "{pstd}Stata issued one or more warnings in relation to the tests in this balance table. Read the warning(s) above carefully before using the values generated for this table.{p_end}" + } + } + + noi di as error "{pstd}Stata issued one or more warnings in relation to the tests in this balance table. Read the warning(s) above carefully before using the values generated for this table.{p_end}" noi di as error "{hline}" noi di as text "" - + } - + /*********************************************** ************************************************/ @@ -2199,11 +2364,11 @@ qui { /************************************************* ************************************************/ - + * Prepare the covariate note. if `COVARIATES_USED' == 1 { local covars_comma = "" - + *Loop over all covariates and add a comma foreach covar of local covariates { if "`covars_comma'" == "" { @@ -2211,11 +2376,11 @@ qui { local one_covar 1 } else { - local covars_comma "`covar', `covars_comma'" + local covars_comma "`covar', `covars_comma'" local one_covar 0 } } - + * If only one covariate, remove and from local and make note singular, and if multiple covariates, make note plural. if `one_covar' == 1 { local covars_comma = subinstr("`covars_comma'" , "and ", "", .) @@ -2225,27 +2390,27 @@ qui { local covar_note "The covariate variables `covars_comma' are included in all estimation regressions. " } } - + *** Prepare the notes used below local fixed_note "Fixed effects using variable `fixedeffect' are included in all estimation regressions. " local stars_note "***, **, and * indicate significance at the `p3star_percent', `p2star_percent', and `p1star_percent' percent critical level. " - - if `PTTEST_USED' == 1 { - local ttest_note "The value displayed for t-tests are p-values. " + + if `PTTEST_USED' == 1 { + local ttest_note "The value displayed for t-tests are p-values. " } else { - local ttest_note "The value displayed for t-tests are the differences in the means across the groups. " + local ttest_note "The value displayed for t-tests are the differences in the means across the groups. " } - - if `PFTEST_USED' == 1 { - local ftest_note "The value displayed for F-tests are p-values. " + + if `PFTEST_USED' == 1 { + local ftest_note "The value displayed for F-tests are p-values. " } else { local ftest_note "The value displayed for F-tests are the F-statistics. " - } - + } + if `VCE_USED' == 1 { - + *Display variation in Standard errors (default) or in Standard Deviations if `STDEV_USED' == 0 { *Standard Errors string @@ -2254,55 +2419,55 @@ qui { else { *Standard Deviation string local variance_type_name "Standard deviations" - } - + } + if "`vce_type'" == "robust" local error_est_note "`variance_type_name' are robust. " if "`vce_type'" == "cluster" local error_est_note "`variance_type_name' are clustered at variable `cluster_var'. " if "`vce_type'" == "bootstrap" local error_est_note "`variance_type_name' are estimeated using bootstrap. " - } - + } + if `WEIGHT_USED' == 1 { - + local f_weights "fweights fw freq weight" local a_weights "aweights aw" local p_weights "pweights pw" local i_weights "iweights iw" - + if `:list weight_type in f_weights' local weight_type = "frequency" else if `:list weight_type in a_weights' local weight_type = "analytical" else if `:list weight_type in p_weights' local weight_type = "probability" else if `:list weight_type in i_weights' local weight_type = "importance" - + local weight_note "Observations are weighted using variable `weight_var' as `weight_type' weights." - - } - + } + + if `BALMISS_USED' == 1 | `BALMISSREG_USED' == 1 { - + if `BALMISS_USED' == 1 local balmiss_note "All missing values in balance variables are treated as zero." if `BALMISSREG_USED' == 1 local balmiss_note "Regular missing values in balance variables are treated as zero, {help missing:extended missing values} are still treated as missing." - + local BALMISS_USED = 1 } - - - + + + if `COVMISS_USED' == 1 | `COVMISSREG_USED' == 1 { - - if `COVMISS_USED' == 1 local covmiss_note "All missing values in covariate varaibles are treated as zero." - if `COVMISSREG_USED' == 1 local covmiss_note "Regular missing values in covariate varaibles are treated as zero, {help missing:extended missing values} are still treated as missing." - + + if `COVMISS_USED' == 1 local covmiss_note "All missing values in covariate variables are treated as zero." + if `COVMISSREG_USED' == 1 local covmiss_note "Regular missing values in covariate variables are treated as zero, {help missing:extended missing values} are still treated as missing." + local COVMISS_USED = 1 } - - + + *** Write notes to file according to specificiation - + if `NOTECOMBINE_USED' == 1 { *Combine all notes used to one line - + *Delete the locals corresponding to options not used if `FTEST_USED' == 0 local ftest_note "" if `VCE_USED' == 0 local error_est_note "" @@ -2312,47 +2477,47 @@ qui { if `BALMISS_USED' == 0 local balmiss_note "" if `COVMISS_USED' == 0 local covmiss_note "" if `STARSNOADD_USED' == 1 local stars_note "" - + *Write to file file open `textname' using "`textfile'", text write append - + file write `textname' "`tblnote' `ttest_note'`ftest_note'`error_est_note'`fixed_note'`covar_note'`weight_note'`balmiss_note'`covmiss_note'`stars_note'" _n - + file close `textname' - + } else if `NONOTE_USED' == 1 { - + *Nonote used. Only add manually entered note - + file open `textname' using "`textfile'", text write append - + if `NOTE_USED' file write `textname' "`tblnote'" _n - + file close `textname' - + } else { - + file open `textname' using "`textfile'", text write append - + if `NOTE_USED' file write `textname' "`tblnote'" _n file write `textname' "`ttest_note'" _n if `FTEST_USED' file write `textname' "`ftest_note'" _n - if `VCE_USED' file write `textname' "`error_est_note'" _n + if `VCE_USED' file write `textname' "`error_est_note'" _n if `FIX_EFFECT_USED' file write `textname' "`fixed_note'" _n if `COVARIATES_USED' file write `textname' "`covar_note'" _n - if `BALMISS_USED' file write `textname' "`balmiss_note'" _n - if `COVMISS_USED' file write `textname' "`covmiss_note'" _n + if `BALMISS_USED' file write `textname' "`balmiss_note'" _n + if `COVMISS_USED' file write `textname' "`covmiss_note'" _n if !`STARSNOADD_USED' file write `textname' "`stars_note'" _n file close `textname' - + } - + *** Write tex footer - + *Latex is always combnote, so prep for that *Delete the locals corresponding to options not used if `FTEST_USED' == 0 local ftest_note "" @@ -2363,19 +2528,19 @@ qui { if `BALMISS_USED' == 0 local balmiss_note "" if `COVMISS_USED' == 0 local covmiss_note "" if `STARSNOADD_USED' == 1 local stars_note "" - + * Make sure variables with underscore in name are displayed correctly in the note local notes_list "tblnote error_est_note weight_note fixed_note covar_note" - + foreach note of local notes_list { - + local `note' : subinstr local `note' "_" "\_" , all local `note' : subinstr local `note' "%" "\%" , all local `note' : subinstr local `note' "&" "\&" , all local `note' : subinstr local `note' "\$" "\\\$" , all } - + *Calculate total number of columns if `TEXCOLWIDTH_USED' == 0 local totalColNo = strlen("`colstring'") else { @@ -2383,16 +2548,16 @@ qui { local nonLabelCols = substr("`colstring'",`colstrBracePos'+1,.) local totalColNo = strlen("`nonLabelCols'") +1 } - + *Set default tex note width (note width is a multiple of text width. *if none is manually specified, default is text width) if `NOTEWIDTH_USED' == 0 local texnotewidth = 1 - + file open `texname' using "`texfile'", text write append - + file write `texname' /// - "\hline \hline" _n - + "\hline \hline \\[-1.8ex]" _n + ** Write notes to file according to specificiation *If no automatic notes are used, write only manual notes if `NONOTE_USED' & `NOTE_USED' { @@ -2403,181 +2568,317 @@ qui { } else if ! `NONOTE_USED' { - + *Write to file file write `texname' /// "%%% This is the note. If it does not have the correct margins, edit text below to fit to table size." _n /// "\multicolumn{`totalColNo'}{@{}p{`texnotewidth'\textwidth}}" _n /// `"{\textit{Notes}: `tblnote' `ttest_note'`ftest_note'`error_est_note'`fixed_note'`covar_note'`weight_note'`balmiss_note'`covmiss_note'`stars_note'}"' _n } - - file write `texname' /// + + file write `texname' /// "\end{tabular}" _n - file close `texname' - - + file close `texname' + + if `TEXDOC_USED' { - + file open `texname' using "`texfile'", text write append - file write `texname' /// + file write `texname' /// "\end{adjustbox}" _n /// "\end{table}" _n /// "\end{document}" _n - + file close `texname' } - - + + /*********************************************** ************************************************/ *Export and restore data unless other specified - + /************************************************* ************************************************/ - + *Restore from orginial preserve at top of command restore - - - - + + + + if !( `BROWSE_USED' | `SAVE_BROWSE_USED' ) preserve - - + + ****************************************** *Load the text file with the data prepared - + *Insheet was replaced by import delimited by Stata 13 if c(version) < 13 { - + *For Stata 11 and 12 insheet using "`textfile'", tab clear } else { - + *For Stata 13 and more recent import delimited using "`textfile'", clear delimiters("\t") } - + ****************************************** - *Export the data according to user specification - + *Export the data according to user specification + *Export to excel format if `SAVE_USED' { - + export excel using `"`save'"', `replace' - + noi di as result `"{phang}Balance table saved to: {browse "`save'":`save'} "' } - + *Export to tex format if `SAVE_TEX_USED' { - + copy "`texfile'" `"`savetex'"', `replace' - + noi di as result `"{phang}Balance table saved to: {browse "`savetex'":`savetex'} "' } - - + + if !( `BROWSE_USED' | `SAVE_BROWSE_USED' ) restore - -} + +} end -*This function is used to test the input in the options -*that replace missing values. Only three strings are allowed +*This function is used to test the input in the options +*that replace missing values. Only three strings are allowed *as arguemnts cap program drop iereplacestringtest program define iereplacestringtest args optionname replacetypestring - + if !("`replacetypestring'" == "zero" | "`replacetypestring'" == "mean" | "`replacetypestring'" == "groupmean") { - + noi display as error "{phang}The string entered in option `optionname'(`replacetypestring') is not a valid replace type string. Only zero, mean and groupmean is allowed. See {help iebaltab:help iebaltab} for more details.{p_end}" error 198 } -end +end -*This function replaces zeros in balance variables +*This function replaces zeros in balance variables *or covariates according to the users specifications. cap program drop iereplacemiss program define iereplacemiss - + syntax varname, replacetype(string) [minobsmean(numlist) regonly groupvar(varname) groupcodes(string)] - + *Which missing values to change. Standard or extended. if "`regonly'" == "" { - local misstype "`varlist' >= ." + local misstype "`varlist' >= ." } else { - local misstype "`varlist' == ." + local misstype "`varlist' == ." } - - - *Set the minimum number of observations + + + *Set the minimum number of observations *a mean is allowed to be based on. if "`minobsmean'" == "" { local minobs 10 //10 is the default } else { *setting it to a user defiend value - local minobs `minobsmean' + local minobs `minobsmean' } - - - *Change the missing values accord + + + *Change the missing values accord *to the users specifications if "`replacetype'" == "zero" { - + *Missing is set to zero replace `varlist' = 0 if `misstype' - + } else if "`replacetype'" == "mean" { - + *Generate the mean for all observations in the table - sum `varlist' - + sum `varlist' + *Test that there are enough observations to base the mean on if `r(N)' < `minobs' { noi display as error "{phang}Not enough observations. There are less than `minobs' observations with a non missing value in `varlist'. Missing values can therefore not be set to the mean. Click {stata tab `varlist', missing} for detailed information.{p_end}" error 2001 } - + *Missing values are set to the mean replace `varlist' = `r(mean)' if `misstype' - + } else if "`replacetype'" == "groupmean" { - + *Loop over each group code foreach code of local groupcodes { - + *Generate the mean for all observations in the group sum `varlist' if `groupvar' == `code' - + *Test that there are enough observations to base the mean on if `r(N)' == 0 { - + noi display as error "{phang}No observations. All values are missing in variable `varlist' for group `code' in variable `groupvar' and missing values can therefore not be set to the group mean. Click {stata tab `varlist' if `groupvar' == `code', missing} for detailed information.{p_end}" error 2000 } if `r(N)' < `minobs' { - + noi display as error "{phang}Not enough observations. There are less than `minobs' observations in group `code' in variable `groupvar' with a non missing value in `varlist'. Missing values can therefore not be set to the group mean. Click {stata tab `varlist' if `groupvar' == `code', missing} for detailed information.{p_end}" error 2001 - + } - + *Missing values are set to the mean of the group replace `varlist' = `r(mean)' if `misstype' & `groupvar' == `code' } - + + } + +end + + +cap program drop iecontrolheader +program define iecontrolheader, rclass + + args control ORDER_OF_GROUPS GRPVAR_NUM_GROUPS TTEST_USED PTTEST_USED NORMDIFF_USED titlerow1 titlerow2 titlerow3 texrow3 + + local ttest_pairs "" + + *The t-tests will only be between control and each of the other groups + *Get the order of the control group + local ctrlGrpPos : list posof "`control'" in ORDER_OF_GROUPS + + *Storing a local of all the test pairs + forvalues second_ttest_group = 1/`GRPVAR_NUM_GROUPS' { + if `second_ttest_group' != `ctrlGrpPos' { + local ttest_pairs "`ttest_pairs' `ctrlGrpPos'_`second_ttest_group'" + } + } + + if `TTEST_USED' { + + forvalues second_ttest_group = 1/`GRPVAR_NUM_GROUPS' { + + *Include all groups apart from the control group itself + if `second_ttest_group' != `ctrlGrpPos' { + + *Adding title rows for the t-test. + local titlerow1 `"`titlerow1' _tab "t-test""' + if `PTTEST_USED' local titlerow2 `"`titlerow2' _tab "p-value""' + else local titlerow2 `"`titlerow2' _tab "Difference""' + local titlerow3 `"`titlerow3' _tab "(`ctrlGrpPos')-(`second_ttest_group')""' + + local texrow3 `" `texrow3' & (`ctrlGrpPos')-(`second_ttest_group') "' + + } + } + } + + if `NORMDIFF_USED' { + + forvalues second_ttest_group = 1/`GRPVAR_NUM_GROUPS' { + + *Include all groups apart from the control group itself + if `second_ttest_group' != `ctrlGrpPos' { + + local titlerow1 `"`titlerow1' _tab "Normalized""' + local titlerow2 `"`titlerow2' _tab "difference""' + local titlerow3 `"`titlerow3' _tab "(`ctrlGrpPos')-(`second_ttest_group')""' + + local texrow3 `" `texrow3' & (`ctrlGrpPos')-(`second_ttest_group') "' + } } + } + + return local titlerow1 `"`titlerow1'"' + return local titlerow2 `"`titlerow2'"' + return local titlerow3 `"`titlerow3'"' + + return local texrow3 `"`texrow3'"' + + return local ttest_pairs `"`ttest_pairs'"' + +end + +cap program drop ienocontrolheader +program define ienocontrolheader, rclass + + args GRPVAR_NUM_GROUPS TTEST_USED PTTEST_USED NORMDIFF_USED titlerow1 titlerow2 titlerow3 texrow3 + + local ttest_pairs "" + + *The t-tests will be all cominations of groups + forvalues first_ttest_group = 1/`GRPVAR_NUM_GROUPS' { + + ** To guarantee that all combination of groups are included + * but no duplicates are possible, start next loop one integer + * higher than the first group + local nextPossGroup = `first_ttest_group' + 1 + + *Storing a local of all the test pairs + forvalues second_ttest_group = `nextPossGroup'/`GRPVAR_NUM_GROUPS' { + local ttest_pairs "`ttest_pairs' `first_ttest_group'_`second_ttest_group'" + } + } + + *Adding title rows for the t-test. + if `TTEST_USED' { + forvalues first_ttest_group = 1/`GRPVAR_NUM_GROUPS' { + + ** To guarantee that all combination of groups are included + * but no duplicates are possible, start next loop one integer + * higher than the first group + local nextPossGroup = `first_ttest_group' + 1 + + forvalues second_ttest_group = `nextPossGroup'/`GRPVAR_NUM_GROUPS' { + + local titlerow1 `"`titlerow1' _tab "t-test""' + if `PTTEST_USED' local titlerow2 `"`titlerow2' _tab "p-value""' + else local titlerow2 `"`titlerow2' _tab "Difference""' + local titlerow3 `"`titlerow3' _tab "(`first_ttest_group')-(`second_ttest_group')""' + + local texrow3 `" `texrow3' & (`first_ttest_group')-(`second_ttest_group') "' + } + } + } + + *Adding title rows for the normalized differences. + if `NORMDIFF_USED' { + forvalues first_ttest_group = 1/`GRPVAR_NUM_GROUPS' { + + ** To guarantee that all combination of groups are included + * but no duplicates are possible, start next loop one integer + * higher than the first group + local nextPossGroup = `first_ttest_group' + 1 + + forvalues second_ttest_group = `nextPossGroup'/`GRPVAR_NUM_GROUPS' { + + local titlerow1 `"`titlerow1' _tab "Normalized""' + local titlerow2 `"`titlerow2' _tab "difference""' + local titlerow3 `"`titlerow3' _tab "(`first_ttest_group')-(`second_ttest_group')""' + + local texrow3 `" `texrow3' & (`first_ttest_group')-(`second_ttest_group') "' + } + } + } + + return local titlerow1 `"`titlerow1'"' + return local titlerow2 `"`titlerow2'"' + return local titlerow3 `"`titlerow3'"' + + return local texrow3 `"`texrow3'"' + + return local ttest_pairs `"`ttest_pairs'"' end diff --git a/src/ado_files/ieboilsave.ado b/src/ado_files/ieboilsave.ado index 21a4cb46..43beb589 100644 --- a/src/ado_files/ieboilsave.ado +++ b/src/ado_files/ieboilsave.ado @@ -1,169 +1,169 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org - - capture program drop ieboilsave +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + + capture program drop ieboilsave program ieboilsave , rclass - + syntax , IDVARname(varlist) [DIOUTput missingok tagnoname tagnohost] - + qui { - + preserve - + local origversion "`c(version)'" - + version 11.0 - + //Checking that only one id variable is listed if `:list sizeof idvarname' > 1 { - + noi di as error "{phang}Multiple ID variables in idvarname(`idvarname') are not allowed. While it is not always incorrect, it is bad practice, see {help ieboilsave##IDnotes:Notes on ID variables} for more details.{p_end}" noi di "" error 103 exit } - - + + /********************************* - + ID variables - + *********************************/ - + capture isid `idvarname' - + if _rc { - - + + //Test missing capture assert !missing(`idvarname') if _rc { - + count if missing(`idvarname') - + noi di as error "{phang}The ID variable `idvarname' is missing in `r(N)' observation(s). The ID variable needs to be fully identifying, meaning that no values can be a missing values (., .a, .b ... .z) or the empty string{p_end}" noi di "" } - + //Test duplicates tempvar iedup - + duplicates tag `idvarname', gen(`iedup') - + count if `iedup' != 0 - + if r(N) > 0 { - + sort `idvarname' - + noi di as error "{phang}To be uniquely identifying the ID variable should not have any duplicates. The ID variable `idvarname' has duplicate observations in the following values:{p_end}" noi list `idvarname' if `iedup' != 0 } noi di "" error 148 exit - } - - - + } + + + /********************************* - + Missing values - + *********************************/ - + if "`missingok'" == "" { - - local varsStandMiss - + + local varsStandMiss + ds, has(type numeric) - + foreach variable in `r(varlist)' { - + cap assert `variable' != . - + if _rc { - + local varsStandMiss `varsStandMiss' `variable' } } - + if `:list sizeof varsStandMiss' > 0 { - + noi di as error "{phang}There are `:list sizeof varsStandMiss' numeric variable(s) that contains the standard missing value (.) which is bad practice. A list of the variable(s) are stored in the local {cmd:r(standmissvars)}. Extended missing variables should be used. See {help ieboilsave} for more details.{p_end}" - + return local standmissvars "`varsStandMiss'" - + error 416 exit } } - - restore - + + restore + /********************************* - + Output success messages - - *********************************/ - + + *********************************/ + // ID - + //Store the name of idvar in data set char and in notes char _dta[ie_idvar] "`idvarname'" - + local idOut "The uniquely and fully identifying ID variable is `idvarname'. " - - - // Version + + + // Version char _dta[ie_version] "`origversion'" - + local versOut "This data set was created in Stata version `origversion'" - + // Date char _dta[ie_date] "`c(current_date)'" - + local dateOut " on `c(current_date)'." - - // Name - + + // Name + local nameOut "" local hostOut "" - + if "`tagnoname'" == "" { - + char _dta[ie_name] "`c(username)'" - + if "`tagnohost'" == "" { - + char _dta[ie_host] "`c(hostname)'" local hostOut ", by user `c(username)' using computer `c(hostname)'," } else { - + local nameOut ", by user `c(username)'," - + } } // Missing vars - + if "`missingok'" == "" { - + local missOut "There are no regular missing values in this data set" } else { - + local missOut "This data set was not tested for missing values" } - + char _dta[ie_boilsave] "ieboilsave ran successfully. `idOut'`versOut'`nameOut'`hostOut'`dateOut' `missOut'" - + if "`dioutput'" != "" { - + local outputSum : char _dta[ie_boilsave] noi di "" noi di "{phang}`outputSum'{p_end}" - + } - + } - end + end diff --git a/src/ado_files/ieboilstart.ado b/src/ado_files/ieboilstart.ado index 4dfcefd7..9f12d76b 100644 --- a/src/ado_files/ieboilstart.ado +++ b/src/ado_files/ieboilstart.ado @@ -1,116 +1,116 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org capture program drop ieboilstart program ieboilstart , rclass - + qui { - + syntax , Versionnumber(string) [noclear maxvar(numlist) matsize(numlist) Quietly veryquietly Custom(string) setmem(string) ] - + version 11.0 - + /********************************* - - Check user specifed version + + Check user specifed version and apply it is valid - - *********************************/ - + + *********************************/ + local stata_versions "11 11.0 11.1 11.2 12 12.0 12.1 13 13.0 13.1 14 14.0 14.1 14.2 15 15.0" - + if `:list versionnumber in stata_versions' == 0 { di as error "{phang}Only recent major releases are allowed. The releases currently allowed are:{break}`stata_versions'{p_end}" di "" error 198 exit - } - + } + *Return the value return local version "version `versionnumber'" - + *Set the version specfied in the command version `versionnumber' /********************************* - + Check settings related to older versions and Stata IC - - *********************************/ - + + *********************************/ + *Test that maxvar is not set in combination when using Stata IC if !(c(MP) == 1 | c(SE) == 1) & "`maxvar'" != "" { - + di as error "{phang}In Stata IC the maximum number of variables allowed is fixed at 2,047 and maxvar() is therefore not allowed.{p_end}" error 198 } - + if "`setmem'" != "" { - + if `versionnumber' >= 12 { - + di as error "{phang}Option setmem() is only allowed when setting the version number to 11. Setmem() is only applicable in Stata 11 or eralier, but those versions wont be able to run this file as the version number is set to 12 or higher.{p_end}" error 198 } - - - *Split the memory setting string into value and byte unit + + + *Split the memory setting string into value and byte unit local mem_strlen = strlen("`setmem'") -1 local mem_number = substr("`setmem'", 1 , `mem_strlen') local mem_bytetype = substr("`setmem'", -1 , 1) - - + + *Test that the number part is a number and a integer cap confirm integer number `mem_number' if _rc { - + di as error "{phang}The value in setmem(`setmem') must be a number (followed by either b, k, m or g). See {help memory} for details.{p_end}" error _rc } - + *Test that the byte type is not anything else than b, k, m or g if !`=inlist(lower("`mem_bytetype'"), "b", "k", "m", "g")' { - + di as error "{phang}The last character in setmem(`setmem') must be either b, k, m or g. See {help memory} for details.{p_end}"" error 7 - + } - } - else { - - *Default value is 50M. This is probably too little, but + } + else { + + *Default value is 50M. This is probably too little, but *it will safely work. And users can manually increase this local setmem "50M" - + } - - + + /********************************* - - Check input for maxvar and matsize - if specified, other wise set + + Check input for maxvar and matsize + if specified, other wise set maximum value allowed. - - *********************************/ - + + *********************************/ + *Setting maxvar requires a cleared memory. Therefore *maxvar() and noclear cannot be used at the same time. if "`maxvar'" != "" & "`clear'" != "" { - + di as error "{phang}It is not possible to set allowed maximum numbers of variables without clearing the data. noclear and maxvar() can therefore not be specified at the same time{p_end}" di "" error 198 exit - } - + } + foreach maxlocal in maxvar matsize { - - - *Set locals with the max and min values fox maxvar and matsize + + + *Set locals with the max and min values fox maxvar and matsize if "`maxlocal'" == "maxvar" { local max 32767 local min 2048 @@ -118,12 +118,12 @@ if "`maxlocal'" == "matsize" { local max 11000 local min 10 - } - + } + *Test if user set a value for this value if "``maxlocal''" != "" { - + *If user specified a value, test that it is between the min and the max if !(``maxlocal'' >= `min' & ``maxlocal'' <= `max') { @@ -135,74 +135,74 @@ exit } } - + else { *User did not specify value, use ieboilstart's defaults: - + if "`maxlocal'" == "maxvar" { - *Set maxvar to max value allowed as this is often an issue when working with large survey data + *Set maxvar to max value allowed as this is often an issue when working with large survey data local `maxlocal' `max' } if "`maxlocal'" == "matsize" { *Set to the default as the maximum is rarely requered. local `maxlocal' 400 - } + } } } - + /********************************* - + Set the settings - - *********************************/ - + + *********************************/ + local setDispLocal "{col 5}{ul:Settings set by this command:}" - + *Set basic memory limits if "`clear'" == "" { - + *Setting clear all local setDispLocal "`setDispLocal'{break}{col 5}clear all" - - **Setting maxvar not allowed in Stata IC. + + **Setting maxvar not allowed in Stata IC. if (c(MP) == 1 | c(SE) == 1) { - + *Setting set maxvar `maxvar' local setDispLocal "`setDispLocal'{break}{col 5}set maxvar {col 22}`maxvar'" } - } + } *Setting set matsize `matsize' local setDispLocal "`setDispLocal'{break}{col 5}set matsize {col 22}`matsize'" - - - **************** - *Memory settings - - *For compatibility with Stata 11 the do file includes and if/else - *condition testing version number. The memoroy settings introduced - *in Stata 12 will be applied to Stata version more recent than + + + **************** + *Memory settings + + *For compatibility with Stata 11 the do file includes and if/else + *condition testing version number. The memoroy settings introduced + *in Stata 12 will be applied to Stata version more recent than *Stata 11, and set memory will be applied to Stata 11. if c(stata_version) >= 12 { - + *Setting set niceness 5 local setDispLocal "`setDispLocal'{break}{col 5}set niceness{col 22}5" - + *These settings cannot be modified with data in memory if "`clear'" == "" { - + *Settings set min_memory 0 set max_memory . local setDispLocal "`setDispLocal'{break}{col 5}set min_memory {col 22}0{break}{col 5}set max_memory {col 22}." - + *Set segment size to the largest value allowed by the operative system - if c(bit) == 64 { + if c(bit) == 64 { *Setting set segmentsize 32m local setDispLocal "`setDispLocal'{break}{col 5}set segmentsize {col 22}32m" @@ -215,17 +215,17 @@ } } else { - - *If this dofile is generated in Stata 11 then only the old - *way of setting memory is included. This will be ignored by + + *If this dofile is generated in Stata 11 then only the old + *way of setting memory is included. This will be ignored by *more recent versios of Stata - + *Setting set memory `setmem' local setDispLocal "`setDispLocal'{break}{col 5}set memory {col 22}`setmem'" - + } - + ********************* *Set default settings set more off , perm @@ -234,54 +234,54 @@ local setDispLocal "`setDispLocal'{break}{col 5}set more {col 22}off {col 28}, perm" local setDispLocal "`setDispLocal'{break}{col 5}pause {col 22}on" local setDispLocal "`setDispLocal'{break}{col 5}set varabbrev {col 22}off" - + /********************************* - + Add custom lines of code - + *********************************/ if `"`custom'"' != "" { - + local setDispLocal `"`setDispLocal'{break} {break}{col 5}{ul:User specified settings:}"' - + *Create a local with the rowlabel input to be tokenized local custom_code_lines `custom' - + while `"`custom_code_lines'"' != "" { - + *Parsing name and label pair gettoken code_line custom_code_lines : custom_code_lines, parse("@") - + *Removing leadning or trailing spaces local code_line = trim(`"`code_line'"') - + *Set custom setting local setDispLocal `"`setDispLocal'{break}{col 5}`code_line'"' `code_line' - + *Parse char is not removed by gettoken local custom_code_lines = subinstr(`"`custom_code_lines'"' ,"@","",1) } } - - + + /********************************* - + Create return value and output message - - *********************************/ - + + *********************************/ + if "`quietly'" == "" & "`veryquietly'" == "" { - + noi di "" noi di "{phang}{err:DISCLAIMER:} Due to how settings works in Stata, this command can only attempt to harmonize settings as much as possible across users, but no guarantee can be given that all commands will always behave identical unless the exact same version and type of Stata is used.{p_end}" noi di "" noi di `"`setDispLocal'"' } - + if "`veryquietly'" == "" { - + noi di "" noi di "{phang}{err:IMPORTANT:} The most important setting of this command cannot be set inside the command due to technical reasons. The setting has been prepared by this command, and you only need to write \`r(version)' after this command (include the apostrophes).{p_end}" @@ -289,9 +289,3 @@ } end - - - - - - diff --git a/src/ado_files/iecompdup.ado b/src/ado_files/iecompdup.ado index 2150b3e7..c0d9b667 100644 --- a/src/ado_files/iecompdup.ado +++ b/src/ado_files/iecompdup.ado @@ -1,121 +1,121 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org - - capture program drop iecompdup +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + + capture program drop iecompdup program iecompdup , rclass - + qui { - + syntax varname , id(string) [DIDIfference KEEPDIFFerence KEEPOTHer(varlist) more2ok] - + version 11.0 - + preserve - + /**************************** - + Turn ID var to string so that the rest of the command works similarly - - ****************************/ - + + ****************************/ + * Test if ID variable is numeric or string - cap confirm numeric variable `varlist' + cap confirm numeric variable `varlist' if !_rc { - + * If numeric, test if all values in ID variable is integer cap assert mod(`varlist',1) == 0 - + * This command does not allow numeric ID varaibels that are not integers if _rc { di as error "{phang}The ID variable is only allowed to be either string or only consist of integers. Integer in this context is not the same as the variable type int. Integer in this context means numeric values without decimals. Please consider using integers as your ID or convert your ID variable to a string.{p_end}" - - } + + } else { - - ** Find the longest (for integers that is the same as largest) + + ** Find the longest (for integers that is the same as largest) * number and get its legth.l sum `varlist' local length = strlen("`r(max)'") - - ** Use that length when explicitly setting the format in + + ** Use that length when explicitly setting the format in * order to prevent information lost tostring `varlist', replace format(%`length'.0f) } - } - + } + * Testing that the ID variable is a string before constinueing the command - cap confirm string variable `varlist' + cap confirm string variable `varlist' if _rc { di as error "{phang}This error message is not due to incorrect specification from you. This message follows a failed check that the command is working properly. If you get this error, please send an email to kbjarkefur@worldbank.org including the follwoing message 'ID var was not succesfully turned in to a string without information loss in iecompdup.' and include whatever other data you do not mind sharing.{p_end}" } - + * Only keep duplicates with the ID specified keep if `varlist' == "`id'" - + /**************************** - + Test if input is correct - + ****************************/ - - + + if "`keepdifference'" == "" & "`keepother'" != "" { - + noi di as error "{phang}Not allowed to specify keepother() without specifying keepdifference{p_end}" noi di "" error 197 exit } - + * Test the number of observations left and make sure that there are only two of them count if `r(N)' == 0 { - + noi di as error "{phang}ID incorrectly specified. No observations with (`varlist' == `id'){p_end}" noi di "" error 2000 exit } else if `r(N)' == 1 { - + noi di as error "{phang}ID incorrectly specified. No duplicates with that ID. Only one observation where (`varlist' == `id'){p_end}" noi di "" error 2001 exit } else if `r(N)' > 2 & "`more2ok'" == "" { - + noi di as error "{phang}The current version of ie_compdup is not able to compare more than 2 duplicates at the time. (How to output the results for groups larger than 2 is non-obvious and suggestions on how to do that are appreciated.) Either drop one of the duplicates before re-running the command or specify option more2ok and the comparison will be done between the first and the second row.{p_end}" noi di "" error 198 exit - } + } else { - - + + /**************************** - + Compare all variables - - ****************************/ - - + + ****************************/ + + * If more than 2 observations, keep the first and second row only keep if _n <= 2 - + *Initiate the locals local match local difference - + * Go over all variables and see if they are non missing for at least one of the variables foreach var of varlist _all { - + cap assert missing(`var') - - ** If not missing for at lease one of the observations, test - * if they are identical across the duplicates or not, and + + ** If not missing for at lease one of the observations, test + * if they are identical across the duplicates or not, and * store variable name in appropriate local if _rc { - + * Are the variables identical if `var'[1] == `var'[2] { local match `match' `var' @@ -123,62 +123,62 @@ else { local difference `difference' `var' } - } + } * If missing for all duplicates, then drop that variable else { drop `var' } } - + * Remove the ID var from the match list, it is match by definition and therefore add no information local match : list match - varlist - + /**************************** - + Output the result - + ****************************/ - - + + noi di "" - ** Display all variables that differ. This comes first in case + ** Display all variables that differ. This comes first in case * the number of variables are a lot, cause then it would push * any other output to far up if "`didifference'" != "" { - + noi di "{phang}The following variables have different values across the duplicates:{p_end}" noi di "{pstd}`difference'{p_end}" noi di "" } - + *Display number output local numNonMissing = `:list sizeof match' + `:list sizeof difference' - + noi di "{phang}The duplicate observations with ID = `id' have non-missing values in `numNonMissing' variables. Out of those variables:{p_end}" noi di "" noi di "{phang2}`:list sizeof match' variable(s) are identical across the duplicates{p_end}" noi di "{phang2}`:list sizeof difference' variable(s) have different values across the duplicates{p_end}" - - - + + + return local matchvars `"`match'"' return local diffvars `"`difference'"' return scalar nummatch = `:list sizeof match' return scalar numdiff = `:list sizeof difference' - return scalar numnomiss = `numNonMissing' - } - + return scalar numnomiss = `numNonMissing' + } + restore - + * If keep difference is applied only keep those variables here. if "`keepdifference'" != "" & "`difference'" != "" { - + order `varlist' `difference' `keepother' keep `varlist' `difference' `keepother' - + * Drop differently depending on numeric or string - cap confirm numeric variable `varlist' + cap confirm numeric variable `varlist' if !_rc { keep if `varlist' == `id' } diff --git a/src/ado_files/iedropone.ado b/src/ado_files/iedropone.ado index a4d3e175..bd64033a 100644 --- a/src/ado_files/iedropone.ado +++ b/src/ado_files/iedropone.ado @@ -1,92 +1,92 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org - +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + capture program drop iedropone - program define iedropone , - + program define iedropone , + qui { syntax [if] , [Numobs(numlist int min=1 max=1 >0) mvar(varname) mval(string) zerook] - - + + version 11.0 /*********************************** - + Set constants - - ***********************************/ - + + ***********************************/ + *Set a constant for the multi option being used local MULTI_USED 0 if ("`mvar'" != "" & "`mval'" != "") local MULTI_USED 1 - + *Set a constant for the zerook option being used local ZEROOK_USED 0 if ("`zerook'" != "") local ZEROOK_USED 1 - + *Set a constant for if an IF-condition used local IF_USED 0 - if (`"`if'"' != "") local IF_USED 1 - + if (`"`if'"' != "") local IF_USED 1 + /*********************************** - + Test input - + ***********************************/ *Test that mvar() and mval() was used in combination if ("`mvar'" != "" & "`mval'" == "") { - + di as error "{pstd}The option mval() is required when using the option mvar()" error 197 } if ("`mvar'" == "" & "`mval'" != "") { - + di as error "{pstd}The option mvar() is required when using the option mval()" error 197 } - - *Test that either an if condition was specified or of the multi values option was used + + *Test that either an if condition was specified or of the multi values option was used if `IF_USED' == 0 & `MULTI_USED' == 0 { - + di as error "{pstd}An {it:if} condition is required when mvar() and mval() is not used." error 197 } - + /*********************************** - + Set locals - - ***********************************/ + + ***********************************/ *If number of obs to drop is not set, then use the default which is 1 if "`numobs'" == "" { - + local numobs = 1 } - + *Test if the var in mvar() is string or not if `MULTI_USED' == 1 { - + cap confirm string variable `mvar' - + if _rc == 0 local MULTI_STRING 1 if _rc != 0 local MULTI_STRING 0 - } - + } + /*********************************** - + Test the number of obs matching - - ***********************************/ - + + ***********************************/ + *Subfunction options local subfunc_opts "nobs(`numobs') zero(`ZEROOK_USED')" - - if `MULTI_USED' == 0 { - - ** Use the functon iedropone_test_match to + + if `MULTI_USED' == 0 { + + ** Use the functon iedropone_test_match to * test if the number of observations to drop * is correct iedropone_test_match `if' , `subfunc_opts' @@ -94,22 +94,22 @@ } else { - - **Create a counter that will noitify - * the user how many observations were + + **Create a counter that will noitify + * the user how many observations were * dropped local num_obs_dropped = 0 - + *Loop over all values in mval() foreach mvalue of local mval { - + *Run the sub-function that test number of observation to be dropped is OK - + *Is mvar() is numeric if `MULTI_STRING' == 0 { - - - ** Use the functon iedropone_test_match to + + + ** Use the functon iedropone_test_match to * test if the number of observations to drop * is correct if `IF_USED' == 1 iedropone_test_match `if' & `mvar' == `mvalue' , `subfunc_opts' @@ -117,122 +117,118 @@ } *Is mvar() is numeric else { - - ** Use the functon iedropone_test_match to + + ** Use the functon iedropone_test_match to * test if the number of observations to drop * is correct if `IF_USED' == 1 iedropone_test_match `if' & `mvar' == "`mvalue'" , `subfunc_opts' - if `IF_USED' == 0 iedropone_test_match if `mvar' == "`mvalue'" , `subfunc_opts' + if `IF_USED' == 0 iedropone_test_match if `mvar' == "`mvalue'" , `subfunc_opts' } - + *Add to teh counter how many observations will be dropped local num_obs_dropped = `num_obs_dropped' + `r(numtodrop)' } } - + /*********************************** - + Drop observation(s) - - ***********************************/ - - + + ***********************************/ + + *Test if multivars are used - if `MULTI_USED' == 0 { - + if `MULTI_USED' == 0 { + *The observations to be dropped drop `if' - + } else { - + *Loop over all values in mval and drop observations foreach mvalue of local mval { - + if `MULTI_STRING' == 0 { if `IF_USED' == 1 drop `if' & `mvar' == `mvalue' - if `IF_USED' == 0 drop if `mvar' == `mvalue' + if `IF_USED' == 0 drop if `mvar' == `mvalue' } else { if `IF_USED' == 1 drop `if' & `mvar' == "`mvalue'" - if `IF_USED' == 0 drop if `mvar' == "`mvalue'" + if `IF_USED' == 0 drop if `mvar' == "`mvalue'" } } } - + *Output to user how many observations were dropped if `num_obs_dropped' == 1 noi di "`num_obs_dropped' observation was dropped" if `num_obs_dropped' != 1 noi di "`num_obs_dropped' observations were dropped" - + } end - - **Sub function that checks that the number of obswervations - * to drop is correct and it returns the number of obsevations + + **Sub function that checks that the number of obswervations + * to drop is correct and it returns the number of obsevations * that will be dropped. - + capture program drop iedropone_test_match program define iedropone_test_match , rclass - - + + syntax [if] , nobs(int) zero(int) - - - **Count how many obs fits the drop condition (this + + + **Count how many obs fits the drop condition (this * function is called once for each value in mval) count `if' - local count_match `r(N)' - + local count_match `r(N)' + *Test if no match and zerook not used if `count_match' == 0 & `zero' == 0 { - + *Return error message noi di as error `"{pstd}No observation matches the drop condition " `if'". Consider using option zerook to surpress this error. No observations dropped."' error 2000 } *Test if no match but zerook used else if `count_match' == 0 & `zero' == 1 { - + *No observation dropped but that is allowed byt zero_used. Return 0. return scalar numtodrop = `count_match' - + } *Test if the number of match is less than it is supposed to be else if `count_match' < `nobs' { - + *Return error message noi di as error `"{pstd}There are less than exactly `nobs' observations that match the drop condition " `if'". No observations dropped."' error 910 - + } *Test if the number of match is more than it is supposed to be else if `count_match' > `nobs' { - + *Return error message if `nobs' == 1 noi di as error `"{pstd}There are more than exactly `nobs' observation that match the drop condition " `if'". No observations dropped."' if `nobs' > 1 noi di as error `"{pstd}There are more than exactly `nobs' observations that match the drop condition " `if'". No observations dropped."' error 912 - + } *Test if the number of match exactly what it is suppsed to be else if `count_match' == `nobs' { - + *Return the number of obs that will be dropped return scalar numtodrop = `count_match' - + } *The options above should be mutually exclusive, so this should never happen. else { - + *Return error message noi di as error "{pstd}The command is never supposed to reach this point, please notify the author if the command on kbjarkefur@worldbank.org" error 197 } - - end - - - - + + end diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index ad86b1d8..98ef5645 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -1,105 +1,105 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org - +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + capture program drop ieduplicates program ieduplicates , rclass - - + + qui { syntax varname , FOLder(string) UNIQUEvars(varlist) [ KEEPvars(varlist) MINprecision(numlist >0) tostringok droprest nodaily SUFfix(string)] - + version 11.0 - + *Add version of Stata fix - //Make sure that keepvars are still saved if saved if the duplicates file - * is generated on a subset of the data. For example, duplicates from - * version 1, 2 . If the command is run on only version 1. Then values - * for keeepvars in version 2 and 3 are dropped and not reloaded as those + //Make sure that keepvars are still saved if saved if the duplicates file + * is generated on a subset of the data. For example, duplicates from + * version 1, 2 . If the command is run on only version 1. Then values + * for keeepvars in version 2 and 3 are dropped and not reloaded as those * obs are not in current memory - - **Test that observations have not been deleted from the report before readind - * it. Deleted in a way that the report does not make sense. Provide an error + + **Test that observations have not been deleted from the report before readind + * it. Deleted in a way that the report does not make sense. Provide an error * message to this that is more informative. preserve - + noi di "" - - ** Making one macro with all variables that will be + + ** Making one macro with all variables that will be * imported and exported from the Excel file local agrumentVars `varlist' `uniquevars' `keepvars' - + ** When migrating data from Stata to Excel format, time variables loose precision. - * If time varaibles are used for uniquely identifying observations, then this loss - * of precision might make a time variable unfit to merge the observations back + * If time varaibles are used for uniquely identifying observations, then this loss + * of precision might make a time variable unfit to merge the observations back * to Stata after they have been exported to Excel. if "`minprecision'" != "" local milliprecision = `minprecision' * 1000 * 60 - + local excelvars dupListID dateListed dateFixed correct drop newID initials notes - + local date = subinstr(c(current_date)," ","",.) - + /*********************************************************************** ************************************************************************ - + Section 1 - - Storing format and type of the variables that will be imported and - exported from the Excel file. The export and import function may + + Storing format and type of the variables that will be imported and + exported from the Excel file. The export and import function may sometimes alter type and format. Merging with different types results - in an error and format is required for date and time variabels. + in an error and format is required for date and time variabels. By using the information stored in this loop, the code can enusre - that the variables have correct type and format before merging. + that the variables have correct type and format before merging. For more information, see below. - + ************************************************************************ ***********************************************************************/ - + local i 0 foreach var in `agrumentVars' { - + local type_`i' : type `var' local format_`i' : format `var' - + if substr("`format_`i''",1,2) == "%t" & substr("`format_`i''",1,3) != "%td" { - + if "`minprecision'" != "" replace `var' = (floor(`var' / `milliprecision')*`milliprecision') } - + local ++i } - + /*********************************************************************** ************************************************************************ - + Section 2 - - Saving a version of the data to be used before mergin and + + Saving a version of the data to be used before mergin and before correcting duplicates - + ************************************************************************ - ***********************************************************************/ + ***********************************************************************/ tempfile restart save `restart' - + /*********************************************************************** ************************************************************************ - + Section 3 - + Import and prepare Corrections (if file exists) - + ************************************************************************ - ***********************************************************************/ - + ***********************************************************************/ + /****************** Section 3.1 Check if earlier report exists. - ******************/ - + ******************/ + cap confirm file "`folder'/iedupreport`suffix'.xlsx" - + if !_rc { local fileExists 1 } @@ -110,217 +110,217 @@ /****************** Section 3.2 If report exist, load file and check input - ******************/ + ******************/ if `fileExists' { - - *Load excel file. Load all vars as string and use meta data from Section 1 + + *Load excel file. Load all vars as string and use meta data from Section 1 import excel "`folder'/iedupreport`suffix'.xlsx" , clear firstrow allstring - + *dupListID is always numeric destring dupListID, replace /****************** Section 3.2.1 Make sure that the - ID variable and - the uniquevars are - not changed since + ID variable and + the uniquevars are + not changed since last report. - ******************/ - + ******************/ + ds - local existingexcelvars `r(varlist)' - + local existingexcelvars `r(varlist)' + if `:list varlist in existingexcelvars' == 0 { - + noi display as error "{phang}ID variable [`varlist'] does not exist in the previously exported Excle file. If you renamed or changed the ID variable, you need to start over with a new file. Rename or move the already existing file. Create a new file and carefully copy any corrections from the old file to the new.{p_end}" noi di "" error 111 exit - + } if `:list uniquevars in existingexcelvars' == 0 { - + noi display as error "{phang}One or more unique variables in [`uniquevars'] do not exist in the previously exported Excel file. If you renamed or changed any variable used in uniquevars(), you need to start over with a new file. Rename or move the already existing file. Create a new file and carefully copy any corrections from the old file to the new.{p_end}" noi di "" error 111 exit - } - + } + /****************** Section 3.2.2 - Make sure that all variables are same - type and format as in original Stata - file regardless of how they were + Make sure that all variables are same + type and format as in original Stata + file regardless of how they were imported from Excel. - ******************/ - + ******************/ + local i 0 foreach argVar in `agrumentVars' { - - + + cap confirm variable `argVar' //In case the variable was added since last export if !_rc { if substr("`type_`i''",1,3) == "str" { - + *No need for any action since all varaibles are loaded as string - + } else if substr("`format_`i''",1,2) == "%t" { - - ** All variables are loaded as strings. The letters - * in date/time varibles makes -destring- not - * applicable. The code generates a new variable - * that read the date/time string using the date() - * and clock() functions. Then the format from + + ** All variables are loaded as strings. The letters + * in date/time varibles makes -destring- not + * applicable. The code generates a new variable + * that read the date/time string using the date() + * and clock() functions. Then the format from * Section 1 is applied to the new variable. The old - * string variable is dropped and the newly + * string variable is dropped and the newly * generated variable takes its place. if substr("`format_`i''",1,3) == "%td" { - + *Read date var from string gen double `argVar'_tmp = date(`argVar', "MDY") } else { - + *Read time var from string gen double `argVar'_tmp = clock( `argVar', "MDY hm") *Manually applying lower precision. Read more in Section 0 for details - + if "`minprecision'" != "" replace `argVar'_tmp = (floor(`argVar'_tmp / `milliprecision')*`milliprecision') - + } - - ** Order the newly generated var after the imported + + ** Order the newly generated var after the imported * string var, and then drop the string var order `argVar'_tmp, after(`argVar') drop `argVar' - - ** Format the new variable to match its format in - * the original Stata file and then take away the + + ** Format the new variable to match its format in + * the original Stata file and then take away the * _tmp suffix format `argVar'_tmp `format_`i'' rename `argVar'_tmp `argVar' - + } else { - + *Destring numeric variables destring `argVar' , replace } } local ++i } - + /****************** Section 3.3 Make sure input is correct - ******************/ - + ******************/ + *Temporary variables needed for checking input tempvar tmpvar_multiInp tmpvar_inputNotYes tmpvar_maxMultiInp tmpvar_notDrop tempvar_yescorrect tempvar_numcorrect - + *Locals indicating in which ways input is incorrect (if any) local local_multiInp 0 local local_multiCorr 0 local local_inputNotYes 0 local local_notDrop 0 - + /****************** Section 3.3.1 Make sure there are not too many corrections - ******************/ - - * Count the number of corrections (correct drop newID) per + ******************/ + + * Count the number of corrections (correct drop newID) per * observation. Only one correction per observation is allowed. egen `tmpvar_multiInp' = rownonmiss(correct drop newID), strok - + *Check that all rows have utmost one correction cap assert `tmpvar_multiInp' == 0 | `tmpvar_multiInp' == 1 - + if _rc { - + *Error will be outputted below local local_multiInp 1 } - - + + /****************** Section 3.3.2 Make sure string input is yes or y - ******************/ - + ******************/ + * Make string input lower case and change "y" to "yes" replace correct = lower(correct) replace drop = lower(drop) replace correct = "yes" if correct == "y" - replace drop = "yes" if drop == "y" - + replace drop = "yes" if drop == "y" + *Check that varaibles are wither empty or "yes" gen `tmpvar_inputNotYes' = !((correct == "yes" | correct == "") & (drop == "yes" | drop == "")) - - cap assert `tmpvar_inputNotYes' == 0 + + cap assert `tmpvar_inputNotYes' == 0 if _rc { - + *Error will be outputted below local local_inputNotYes 1 - } - + } + /****************** Section 3.3.3 - Make sure that either option droprest is specified, or that - drop was correctly indicated for all observations. i.e.; if + Make sure that either option droprest is specified, or that + drop was correctly indicated for all observations. i.e.; if correct or newID was indicated for at least one duplicate in - a duplicate group, then all other observations should be + a duplicate group, then all other observations should be indicated as drop (unless droprest is specified) - - ******************/ - + + ******************/ + *Check if any other duplicate in duplicate group has at least one correction gen `tempvar_yescorrect' = (correct == "yes") bys `varlist' : egen `tempvar_numcorrect' = total(`tempvar_yescorrect') count if `tempvar_numcorrect' > 1 - if `r(N)' != 0 local local_multiCorr 1 - - + if `r(N)' != 0 local local_multiCorr 1 + + *Check if any other duplicate in duplicate group has at least one correction bys `varlist' : egen `tmpvar_maxMultiInp' = max(`tmpvar_multiInp') - + *Check that drops are explicitly indicated gen `tmpvar_notDrop' = (`tmpvar_multiInp' == 0 & `tmpvar_maxMultiInp' > 0) * Check if option droprest is specified if "`droprest'" != "" { - cap assert `tmpvar_notDrop' == 0 + cap assert `tmpvar_notDrop' == 0 if _rc { *Error will be outputted below local local_notDrop 1 - } + } } else { - ** Option -droprest- specified. Drop will be changed to yes + ** Option -droprest- specified. Drop will be changed to yes * for any observations without drop or any other correction * explicitly specified if the observation is in a duplicate * group with at least one observation has a correction replace drop = "yes" if `tmpvar_notDrop' == 1 } - - + + /****************** Section 3.4 Throw errors if any of the tests were not passed - ******************/ - + ******************/ + *Was any error detected if `local_multiInp' == 1 | `local_inputNotYes' == 1 | `local_notDrop' == 1 | `local_multiCorr' == 1 { - + *Error multiple input if `local_multiInp' == 1 { noi { @@ -329,7 +329,7 @@ di "" } } - + *Error multiple correct if `local_multiCorr' == 1 { noi { @@ -338,113 +338,113 @@ di "" } - } - + } + *Error in incorrect string if `local_inputNotYes' == 1 { noi { display as error "{phang}The following observations have an answer in either correct or drop that is neither yes nor y{p_end}" list `varlist' dupListID correct drop if `tmpvar_inputNotYes' == 1 di "" - } + } } - + *Error is not specfied as drop if `local_notDrop' == 1 { noi { display as error "{phang}The following observations are not explicitly indicated as drop while other duplicates in the same duplicate group are corrected. Either manually indicate as drop or see option droprest{p_end}" list `varlist' dupListID correct drop newID if `tmpvar_notDrop' == 1 di "" - } + } } - + *Same error for any incorrect input error 119 exit } - + *Keep only the variables needed for matching and variables used for input in the Excel file keep `varlist' `uniquevars' `excelvars' - + *Save imported data set with all corrections tempfile imputfile_merge save `imputfile_merge' } - - + + /*********************************************************************** ************************************************************************ - + Section 4 - + Merge corrections with original data - + ************************************************************************ - ***********************************************************************/ - + ***********************************************************************/ + *Re-load original - use `restart', clear - + use `restart', clear + * Merge original data with imported Excel file (if Excel file exists) if `fileExists' { - + *Create a tempvar for merging results tempvar iedup_merge - + *Merge the corrections with the data set merge 1:1 `varlist' `uniquevars' using `imputfile_merge', generate(`iedup_merge') - + *Make sure that obsrevations listed in the duplicate report is still in the data set cap assert `iedup_merge' != 2 - + *Display error message if assertion is not true and some duplicates in the excle file are no longer in the data set if _rc { - + display as error "{phang}One or several observations in the Excel report are no longer found in the data set. Always run ieduplicates on the raw data set that include all the duplicates, both new duplicates and those you have already identified. After removing duplicates, save the data set using a different name. You might also recieve this error if you are using an old ieduplicates Excel report on a new data set.{p_end}" error 9 exit - - + + } - - *Explicitly drop temporary variable. Temporary variables might - *be exported to excel so delete explicitly before that. Only + + *Explicitly drop temporary variable. Temporary variables might + *be exported to excel so delete explicitly before that. Only *using tempvar here to create a name with no conflicts drop `iedup_merge' - + } - + /*********************************************************************** ************************************************************************ - + Section 5 - + Drop all but one observations that are duplicates in all variables - + ************************************************************************ - ***********************************************************************/ - - tempvar id_string allDup - + ***********************************************************************/ + + tempvar id_string allDup + /****************** Section 5.1 - Next section 5.2 needs the ID var in the same type for the - if statement in duplicates drop. And since all numeric variables - can be expressed as string, we generate an temporary variable + Next section 5.2 needs the ID var in the same type for the + if statement in duplicates drop. And since all numeric variables + can be expressed as string, we generate an temporary variable that is always string. - ******************/ - + ******************/ + *Test if ID var is already string cap confirm string variable `varlist' - + *if ID var not string: if _rc { *Generate string copy of ID var tostring `varlist' , generate(`id_string') force } - + *if ID var is string: else { *Simply copy the ID var to the temporary variable @@ -454,75 +454,75 @@ /****************** Section 5.2 Throw errors if any of the tests was not passed - ******************/ - - ** Generate variables that are not 0 if any observations are + ******************/ + + ** Generate variables that are not 0 if any observations are * duplicates in all variables duplicates tag , gen(`allDup') - *Test if any observations is duplicates in all variables + *Test if any observations is duplicates in all variables count if `allDup' != 0 if `r(N)' != 0 { - - * Output message indicating that some observations - * were dropped automatically as they were duplicates + + * Output message indicating that some observations + * were dropped automatically as they were duplicates * in all variables. noi di "{phang}The following IDs are duplicates in all variable so only one version is kept. The other observations in the same duplicate group are automatically dropped:{p_end}" - + *Create a local of all IDs that are to be deleted. levelsof `id_string' if `allDup' != 0 foreach alldupID in `r(levels)' { - + *Output the ID noi di "{phang2}ID: `alldupID'{p_end}" - - *Drop all but one duplicates in duplicate + + *Drop all but one duplicates in duplicate *groups that are duplicated in all variables duplicates drop if `id_string' == "`alldupID'", force } - + *Add an empty row after the output noi di "" } - - - ** Save data set including dropping duplicates in all variables. + + + ** Save data set including dropping duplicates in all variables. * The command returns the data set without these observations. save `restart', replace /*********************************************************************** ************************************************************************ - + Section 6 - - Test if there are duplicates in ID var. If any duplicates exist, + + Test if there are duplicates in ID var. If any duplicates exist, tehn update the Excel file with new and unaddressed cases - + ************************************************************************ - ***********************************************************************/ + ***********************************************************************/ /****************** Section 6.1 Test if there are any duplicates in ID var - ******************/ - - * Generate variable that is not 0 + ******************/ + + * Generate variable that is not 0 * if observation is a duplicate tempvar dup duplicates tag `varlist', gen(`dup') - + *Test if there are any duplicates cap assert `dup'==0 - if _rc { - + if _rc { + /****************** Section 6.2 - Test if the variables passed as ID var and unique var - uniquely and fully identify the data set. It should be + Test if the variables passed as ID var and unique var + uniquely and fully identify the data set. It should be possible to merge corrections back to the main file. - - ******************/ - + + ******************/ + cap isid `varlist' `uniquevars' if _rc { @@ -530,29 +530,29 @@ error 119 exit } - + /****************** Section 6.3 Keep only duplicates for the report - ******************/ - - *Keep if observation is part of duplicate group + ******************/ + + *Keep if observation is part of duplicate group keep if `dup' != 0 - + if `fileExists' { * If Excel file exists keep excel vars and * variables passed as arguments in the - * command - keep `agrumentVars' `excelvars' + * command + keep `agrumentVars' `excelvars' } else { - * Keep only variables passed as arguments in + * Keep only variables passed as arguments in * the command and the string ID var as no Excel file exists keep `agrumentVars' - - *Generate the excel variables used for indicating correction + + *Generate the excel variables used for indicating correction foreach excelvar of local excelvars { - + *Create all variables apart from dupListID as string vars if "`excelvar'" == "dupListID" { gen `excelvar' = . @@ -561,144 +561,144 @@ gen `excelvar' = "" } } - + } /****************** Section 6.4 Update the excel vars that are not updated manually - ******************/ - + ******************/ + * Generate a local that is 1 if there are new duplicates local unaddressedNewExcel 0 count if dateFixed == "" if `r(N)' > 0 local unaddressedNewExcel 1 - + /****************** Section 6.4.1 Date variables - ******************/ - + ******************/ + * Add date first time duplicvate was identified replace dateListed = "`date'" if dateListed == "" - - ** Add today's date to variable dateFixed if dateFixed + + ** Add today's date to variable dateFixed if dateFixed * is empty and at least one correction is added replace dateFixed = "`date'" if dateFixed == "" & (correct != "" | drop != "" | newID != "") /****************** Section 6.4.2 Duplicate report list ID - ******************/ - - ** Sort after dupListID and after ID var for - * duplicates currently without dupListID + ******************/ + + ** Sort after dupListID and after ID var for + * duplicates currently without dupListID sort dupListID `varlist' - - ** Assign dupListID 1 to the top row if no duplicate + + ** Assign dupListID 1 to the top row if no duplicate * list IDs have been generated so far. replace dupListID = 1 if _n == 1 & dupListID == . - + ** Generate new IDs based on the row above instead of directly * from the row number. That prevents duplicates in the list in - * case an observation is deleted. The first observation with - * missing value will have an ID that is one digit higher than + * case an observation is deleted. The first observation with + * missing value will have an ID that is one digit higher than * the highest ID already in the list - replace dupListID = dupListID[_n - 1] + 1 if dupListID == . + replace dupListID = dupListID[_n - 1] + 1 if dupListID == . + - /****************** Section 6.5 Keep and order the variables and output the Excel files ******************/ - + * If cases unaddressed then update the Excel file if `unaddressedNewExcel' { keep `agrumentVars' `excelvars' - order `varlist' `excelvars' `uniquevars' `keepvars' + order `varlist' `excelvars' `uniquevars' `keepvars' + + if "`daily'" == "" { + - if "`daily'" == "" { - - *Returns 0 if folder does not exist, 1 if it does mata : st_numscalar("r(dirExist)", direxists("`folder'/Daily")) - + ** If the daily folder is not created, just create it if `r(dirExist)' == 0 { - + *Create the folder since it does not exist mkdir "`folder'/Daily" } - + *Export the daily file - cap export excel using "`folder'/Daily/iedupreport`suffix'_`date'.xlsx" , firstrow(variables) replace nolabel - + cap export excel using "`folder'/Daily/iedupreport`suffix'_`date'.xlsx" , firstrow(variables) replace nolabel + *Print error if daily report cannot be saved if _rc { - - display as error "{phang}There the Daily copy could not be saved to the `folder'/Daily folder. Make sure to close any old daily copy or see the option nodaily{p_end}" + + display as error "{phang}There the Daily copy could not be saved to the `folder'/Daily folder. Make sure to close any old daily copy or see the option nodaily{p_end}" error 603 exit - + } - + *Prepare local for output local daily_output "and a daily copy have been saved to the Daily folder" } - - + + *Export main report - export excel using "`folder'/iedupreport`suffix'.xlsx" , firstrow(variables) replace nolabel - + export excel using "`folder'/iedupreport`suffix'.xlsx" , firstrow(variables) replace nolabel + *Produce output noi di `"{phang}Excel file created at: {browse "`folder'/iedupreport`suffix'.xlsx":`folder'/iedupreport`suffix'.xlsx} `daily_output'{p_end}"' noi di "" } } - - - + + + /*********************************************************************** ************************************************************************ - + Section 7 - + Update the data set and with the new corrections. - + ************************************************************************ - ***********************************************************************/ + ***********************************************************************/ - * Load the original data set merged with correction. Duplicates + * Load the original data set merged with correction. Duplicates * in all variables are already dropped in this data set use `restart', clear - + * If excel file exists, apply any corrections indicated (if any) if `fileExists' { - + /****************** Section 7.1 Drop duplicates listed for drop ******************/ - + drop if drop == "yes" /****************** Section 7.2 - Update new ID. ID var can be either numeric or - string. All numbers can be made strings but not - all strings can be numeric. Therefore this + Update new ID. ID var can be either numeric or + string. All numbers can be made strings but not + all strings can be numeric. Therefore this section is complicated. - ******************/ - + ******************/ + /****************** Section 7.2.1 - ID var in original file is string. Either + ID var in original file is string. Either newID was imported as string or the variable is made string. Easy. - ******************/ - + ******************/ + * If ID variable is string in original Stata file if substr("`type_0'",1,3) == "str" { - + * Tostring the newID tostring newID , replace * Replace missing value with the empty string @@ -706,30 +706,30 @@ * Update ID replace `varlist' = newID if newID != "" } - - + + /****************** Section 7.2.2 ID var in original file is numeric. Test first if newID is numeric or can be made numeric. - ******************/ - + ******************/ + * ID var is numeric else { - + ** Trying to convert newID. If newID is already numeric, nothing - * happens. If it is not possible to make it numeric (having + * happens. If it is not possible to make it numeric (having * non-numeric characters), then it will remain as string. destring newID, replace * Test if newID now is numeric cap confirm numeric variable newID - + /****************** Section 7.2.2.1 newID is numeric and original ID can easily be updated. - ******************/ - + ******************/ + if !_rc { replace `varlist' = newID if newID != . } @@ -737,29 +737,29 @@ /****************** Section 7.2.2.2 newID cannot be made numeric but origianl ID var is numeric. - To update original ID var, it has to be made a string, but + To update original ID var, it has to be made a string, but that will be allowed only if option tostringok is specified. - ******************/ - + ******************/ + else { * Check if -tostringok- is specificed: if "`tostringok'" != "" { - + * Make original ID var string tostring `varlist' , replace * Replace any missing values with empty string replace `varlist' = "" if `varlist' == "." * Update the original ID var with value in newID replace `varlist' = newID if newID != "" - + } - + * Error, IDvar can not be updated else { - + * Create a local with all non-numeric values - levelsof newID if missing(real(newID)), local(NaN_values) + levelsof newID if missing(real(newID)), local(NaN_values) * Output error message di as error "{phang}`varlist' is numeric but newID has thes non-numeric values `NaN_values'. Update newID to only contain numeric values or see option tostringok{p_end}" @@ -768,69 +768,69 @@ } } } - + /****************** Section 7.3 Test that values in newID - were neither used twice + were neither used twice nor already existed ******************/ - + * Loop over all values in newID - levelsof newID + levelsof newID foreach newID in `r(levels)' { - + /****************** Section 7.1 - Different test depending on ID var being string + Different test depending on ID var being string or numeric. Count number of observations with each - of the values used in newID and test that that + of the values used in newID and test that that the number is exactly one for each value. - ******************/ - + ******************/ + if substr("`type_0'",1,3) == "str" { count if `varlist' == "`newID'" } else { - count if `varlist' == `newID' + count if `varlist' == `newID' } - + if `r(N)' == 1 { - + *Do nothing, each value in newID should be used exactly once. } else if `r(N)' == 0 { - + di as error "{phang}New ID value `newID' listed in the Excel file was never used on any observation. Please ensure that `newID' is a valid input. If problem remains, please report this bug to kbjarkefur@worldbank.org{p_end}" error 119 exit } else { - + di as error "{phang}New ID value `newID' listed in the Excel file is expected to be used only once, but it is after corrections used in `r(N)' obsevations. New ID value `newID' is already used in original data or is used more than once in the Excel file to correct duplicates{p_end}" error 119 exit } } - + /****************** Section 7.4 - Drop Excel vars - ******************/ - + Drop Excel vars + ******************/ + drop `excelvars' } - + /*********************************************************************** ************************************************************************ - + Section 8 - - Return the data set without duplicates and - output information regarding unresolved duplicates. - + + Return the data set without duplicates and + output information regarding unresolved duplicates. + ************************************************************************ - ***********************************************************************/ + ***********************************************************************/ * Generate a variable that is 1 if the observation is a duplicate in varlist tempvar dropDup @@ -839,48 +839,47 @@ levelsof `varlist' if `dropDup' != 0 , local(dup_ids) clean * Drop the duplicates (they are exported in Excel) drop if `dropDup' != 0 - - * Test if varlist is now uniqely and fully identifying the data set + + * Test if varlist is now uniquely and fully identifying the data set cap isid `varlist' if _rc { - - di as error "{phang}The data set is not returned with `varlist' uniqely and fully identifying the data set. Please report this bug to kbjarkefur@worldbank.org{p_end}" + + di as error "{phang}The data set is not returned with `varlist' uniquely and fully identifying the data set. Please report this bug to kbjarkefur@worldbank.org{p_end}" error 119 exit - + } - + if `:list sizeof dup_ids' == 0 { - - noi di "{phang}There are no unresolved duplicates in this data set. The data set is returned with `varlist' uniqely and fully identifying the data set.{p_end}" + + noi di "{phang}There are no unresolved duplicates in this data set. The data set is returned with `varlist' uniquely and fully identifying the data set.{p_end}" } else { noi di "{phang}There are `:list sizeof dup_ids' duplicates unresolved. IDs still contining duplicates: `dup_ids'. The unresolved duplicate observations were exported in the Excel file. The data set is returned without those duplicates and with `varlist' uniquely and fully identifying the data set.{p_end}" } - + return scalar numDup = `:list sizeof dup_ids' - + /*********************************************************************** ************************************************************************ - + Section 9 - - Save data set to be returned outside preserve/restore. - Preserve/restore is used so that original data is returned + + Save data set to be returned outside preserve/restore. + Preserve/restore is used so that original data is returned in case an error is thrown. - + ************************************************************************ - ***********************************************************************/ + ***********************************************************************/ tempfile returndata save `returndata' - + restore - - ** Using restore above to return the data to + + ** Using restore above to return the data to * the orignal data set in case of error. use `returndata', clear - + } end - diff --git a/src/ado_files/iefolder.ado b/src/ado_files/iefolder.ado index c5254a22..bbe9a5f7 100644 --- a/src/ado_files/iefolder.ado +++ b/src/ado_files/iefolder.ado @@ -1,83 +1,83 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org cap program drop iefolder program define iefolder -qui { - +qui { + syntax anything, PROJectfolder(string) [ABBreviation(string) SUBfolder(string)] - + version 11 - + ***Todo *give error message if divisor is changed *Create an empty line before error message or output noi di "" - + /*************************************************** - + Parse input - - ***************************************************/ - + + ***************************************************/ + *Parse out the sub-command gettoken subcommand item : anything - + *Parse out the item type and the item name gettoken itemType itemName : item - + *Make sure that the item name is only one word gettoken itemName rest : itemName - + *Clean up the input local subcomand = trim("`subcommand'") local itemType = trim("`itemType'") local itemName = trim("`itemName'") local abbreviation = trim("`abbreviation'") - + *noi di "SubCommand `subcommand'" *noi di "ItemType `itemType'" *noi di "ItemName `itemName'" /*************************************************** - + Test input - + ***************************************************/ - + *Test that the type is correct local sub_commands "new" local itemTypes "project round unitofobs subfolder" - + *Test if subcommand is valid if `:list subcommand in sub_commands' == 0 { noi di as error `"{phang}You have not used a valid subcommand. You entered "`subcommand'". See the {help iefolder:help file} for details.{p_end}"' error 198 - } - + } + *Test if item type is valid if `:list itemType in itemTypes' == 0 { noi di as error `"{phang}You have not used a valid item type. You entered "`itemType'". See the {help iefolder:help file} for details.{p_end}"' error 198 - } - + } + *Test that item name is used when item type is anything but project else if ("`itemType'" != "project" & "`itemName'" == "" ) { - + noi di as error `"{phang}You must specify a name of the `itemType'. See the {help iefolder:help file} for details.{p_end}"' error 198 } - + *Test that abbreviation is used only with round of unitofobs else if ("`abbreviation'" != "" & !("`itemType'" == "round" | "`itemType'" == "unitofobs") ) { - + noi di as error `"{phang}You may not use the option abbreviation() together with itemtype `itemType'.{p_end}"' error 198 } - + *test that there is no space in itemname local space_pos = strpos(trim("`itemName'"), " ") if "`rest'" != "" | `space_pos' != 0 { @@ -85,30 +85,30 @@ qui { noi di as error `"{pstd}You have specified to many words in: [{it:iefolder `subcommand' `itemType' `itemName'`rest'}] or used a space in {it:itemname}. Spaces are not allowed in the {it:itemname}. Use underscores or camel case.{p_end}"' error 198 } - - ** Test that item name only includes numbers, letter, underscores and does - * not start with a number. These are simplified requirements for folder + + ** Test that item name only includes numbers, letter, underscores and does + * not start with a number. These are simplified requirements for folder * names on disk. if !regexm("`itemName'", "^[a-zA-Z][a-zA-Z0-9_]*[a-zA-Z0-9_]$") & "`itemType'" != "project" { - + noi di as error `"{pstd}Invalid {it:itemname}. The itemname [`itemName'] can only include letters, numbers or underscore and the first character must be a letter.{p_end}"' error 198 } - + ** Test that also the abbreviation has valid characters if !regexm("`abbreviation'", "^[a-zA-Z][a-zA-Z0-9_]*[a-zA-Z0-9_]$") & "`abbreviation'" != "" { - + noi di as error `"{pstd}Invalid name in the option {it:abbreviation()}. The name [`abbreviation'] can only include letters, numbers or underscore and the first character must be a letter.{p_end}"' error 198 - } - - **Only rounds can be put in a sufolder, so if subfolder is used the itemtype must be + } + + **Only rounds can be put in a sufolder, so if subfolder is used the itemtype must be if ("`subfolder'" != "" & "`itemType'" != "round") { - + noi di as error `"{pstd}The option subfolder() can only be used together with item type "round" as only "round" folders can be organized in subfolders.{p_end}"' error 198 } - + *test that there is no space in subfolder option local space_pos = strpos(trim("`subfolder'"), " ") if "`subfolder'" != "" & `space_pos' != 0 { @@ -116,52 +116,52 @@ qui { noi di as error `"{pstd}You have specified to many words in: [{it:subfolder(`subfolder')}]. Spaces are not allowed, use underscores or camel case instead.{p_end}"' error 198 } - - + + /*************************************************** - + Start making updates to the project folder - + ***************************************************/ *Create a temporary textfile - tempname newHandle + tempname newHandle tempfile newTextFile - - cap file close `newHandle' - file open `newHandle' using "`newTextFile'", text write append - - + + cap file close `newHandle' + file open `newHandle' using "`newTextFile'", text write append + + *Create a global pointing to the main data folder global projectFolder "`projectfolder'" global dataWorkFolder "$projectFolder/DataWork" global encryptFolder "$dataWorkFolder/EncryptedData" - + if "`subcommand'" == "new" { - + di "Subcommand: New" - - *Use the full item name if abbrevaition was not specified (this code + + *Use the full item name if abbrevaition was not specified (this code *is irrelevant if not applicable) if "`abbreviation'" == "" local abbreviation "`itemName'" - + *Creating a new project if "`itemType'" == "project" { - + di "ItemType: Project" iefolder_newProject "$projectFolder" `newHandle' - + *Produce success output noi di "{pstd}Command ran succesfully, a new DataWork folder was created here:{p_end}" noi di "{phang2}1) [${dataWorkFolder}]{p_end}" - + } *Creating a new round else if "`itemType'" == "round" { - + di "ItemType: round" iefolder_newItem `newHandle' round "`itemName'" "`abbreviation'" "`subfolder'" - + *Produce success output noi di "{pstd}Command ran succesfully, for the round [`itemName'] the following folders and master dofile were created:{p_end}" noi di "{phang2}1) [${`abbreviation'}]{p_end}" @@ -170,13 +170,13 @@ qui { } *Creating a new level of observation for master data set else if "`itemType'" == "unitofobs" { - + global mastData "$dataWorkFolder/MasterData" - + di "ItemType: untiofobs/unitofobs" di `"iefolder_newMaster `newHandle' "`itemName'""' iefolder_newItem `newHandle' untObs "`itemName'" "`abbreviation'" - + *Produce success output noi di "{pstd}Command ran succesfully, for the unit of observation [`itemName'] the following folders were created:{p_end}" noi di "{phang2}1) [${dataWorkFolder}/MasterData/`itemName']{p_end}" @@ -184,27 +184,27 @@ qui { } *Creating a new subfolder in which rounds can be organized else if "`itemType'" == "subfolder" { - + di "ItemType: subfolder" - iefolder_newItem `newHandle' subFld "`itemName'" "`abbreviation'" - + iefolder_newItem `newHandle' subFld "`itemName'" "`abbreviation'" + *Produce success output noi di "{pstd}Command ran succesfully, for the subfolder [`itemName'] the following folders were created:{p_end}" noi di "{phang2}1) [${dataWorkFolder}/`itemName']{p_end}" noi di "{phang2}2) [${encryptFolder}/Subfolder `itemName' Encrypted]{p_end}" - + } } - + *Closing the new main master dofile handle file close `newHandle' - + *Copy the new master dofile from the tempfile to the original position copy "`newTextFile'" "$projectFolder/DataWork/Project_MasterDofile.do" , replace - - -} -end + + +} +end /************************************************************ @@ -214,23 +214,23 @@ end cap program drop iefolder_newProject program define iefolder_newProject - + args projectfolder newHandle - + *Test if folder where to create new folder exist checkFolderExists "$projectFolder" "parent" *Test that the new folder does not already exist - checkFolderExists "$dataWorkFolder" "new" - + checkFolderExists "$dataWorkFolder" "new" + *Create the main DataWork folder mkdir "$dataWorkFolder" - + ****************************** *Writing master do file header ****************************** - *Write intro part with description of project, + *Write intro part with description of project, mdofle_p0 `newHandle' project *Write folder globals section header and the root folders @@ -240,89 +240,89 @@ cap program drop iefolder_newProject mdofle_p2 `newHandle' *Write section that runs sub-master dofiles - mdofle_p3 `newHandle' project - + mdofle_p3 `newHandle' project + ****************************** *Create Global Setup Dofile - ****************************** - + ****************************** + *See this sub command below global_setup - -end + +end cap program drop iefolder_newItem program define iefolder_newItem - + args subHandle itemType itemName itemAbb subfolder - - **Test that the folder where the new folder will + + **Test that the folder where the new folder will * be created exist and that a folder with the same * name is created there already - if "`itemType'" == "round" iefolder_testFolderPossible "dataWorkFolder" "`itemName'" "encryptFolder" "Round `itemName' Encrypted" - if "`itemType'" == "untObs" iefolder_testFolderPossible "mastData" "`itemName'" "encryptFolder" "Master `itemName' Encrypted" + if "`itemType'" == "round" iefolder_testFolderPossible "dataWorkFolder" "`itemName'" "encryptFolder" "Round `itemName' Encrypted" + if "`itemType'" == "untObs" iefolder_testFolderPossible "mastData" "`itemName'" "encryptFolder" "Master `itemName' Encrypted" if "`itemType'" == "subFld" iefolder_testFolderPossible "dataWorkFolder" "`itemName'" "encryptFolder" "Subfolder `itemName' Encrypted" - + *Old file reference tempname oldHandle local oldTextFile "$projectFolder/DataWork/Project_MasterDofile.do" file open `oldHandle' using `"`oldTextFile'"', read file read `oldHandle' line - + *Locals needed for the section devider local partNum = 0 //Keeps track of the part number - + while r(eof)==0 { - + *Do not interpret macros local line : subinstr local line "\$" "\\$" local line : subinstr local line "\`" "\\`" - + //di `"`line'"' - - **Run funtion to read old project master dofile to check if any + + **Run funtion to read old project master dofile to check if any * infomration should be added before this line parseReadLine `"`line'"' - + if `r(ief_line)' == 0 { - *This is a regular line of code. It should be copied as-is. No action needed. + *This is a regular line of code. It should be copied as-is. No action needed. file write `subHandle' `"`line'"' _n - + } else if `r(ief_line)' == 1 { - **This is a line of code with a devisor written by - * iefodler. New additions to a section are always added right before an end of section line. + **This is a line of code with a devisor written by + * iefodler. New additions to a section are always added right before an end of section line. return list - + *Test if the line is a line with a name line if `r(ief_NameLine)' == 1 { // make sure that this is not confused when moved to the front - + ****Test that name is not used already - testNameAvailible "`line'" "`itemName'" "`itemAbb'" - + testNameAvailible "`line'" "`itemName'" "`itemAbb'" + ****Write new line if "`r(nameLineType)'" == "`itemType'" { - - *Test if abb is identical to name (applies to + + *Test if abb is identical to name (applies to *when abb is not used) *add only name as they are identical if "`itemName'" == "`itemAbb'" local name "`itemName'" *add both name and abb if "`itemName'" != "`itemAbb'" local name "`itemName'*`itemAbb'" - + *write the new line writeNameLine `subHandle' "`itemType'" "`name'" "`line'" } else { - + *We do not add to this line so write is as it was - file write `subHandle' `"`line'"' _n + file write `subHandle' `"`line'"' _n } - + *If subfolder option is used for a round, test that subfolder is created if "`r(nameLineType)'" == "subFld" & "`subfolder'" != "" { ****Test that name is not used already @@ -330,102 +330,102 @@ cap program drop iefolder_newItem } } else if "`itemType'" == "round" { // test if round - - **This is NOT an end of section line. Nothing will be written here + + **This is NOT an end of section line. Nothing will be written here * but we test that there is no confict with previous names if "`r(sectionName)'" == "endRounds" { - + *Write devisor for this section - writeDevisor `subHandle' 1 RoundGlobals rounds `itemName' `itemAbb' - + writeDevisor `subHandle' 1 RoundGlobals rounds `itemName' `itemAbb' + *Write the globals for this round to the proejct master dofile newRndFolderAndGlobals `itemName' `itemAbb' `subHandle' round "`subfolder'" *Create the round master dofile and create the subfolders for this round createRoundMasterDofile "$dataWorkFolder/`itemName'" "`itemName'" "`itemAbb'" "`subfolder'" - + *Write an empty line before the end devisor - file write `subHandle' "" _n - + file write `subHandle' "" _n + *Copy the line as is - file write `subHandle' `"`line'"' _n + file write `subHandle' `"`line'"' _n } - - **This is an end of section line. We will add the new content here + + **This is an end of section line. We will add the new content here * before writing the end of section line else if "`r(partName)'" == "End_RunDofiles" { //and test round - + *Write devisor for this section - writeDevisor `subHandle' 3 RunDofiles `itemName' `itemAbb' - - *Write the + writeDevisor `subHandle' 3 RunDofiles `itemName' `itemAbb' + + *Write the file write `subHandle' /// _col(4)"if (0) { //Change the 0 to 1 to run the `itemName' master dofile" _n /// _col(8) `"do ""' _char(36) `"`itemAbb'/`itemName'_MasterDofile.do" "' _n /// _col(4)"}" /// _n - + *Write an empty line before the end devisor - file write `subHandle' "" _n - + file write `subHandle' "" _n + *Then write original line file write `subHandle' `"`line'"' _n - + } else { *If none apply, just write the line - file write `subHandle' `"`line'"' _n + file write `subHandle' `"`line'"' _n } } - + *Test if this is the location to write the new master data globals else if "`r(sectionName)'" == "encrypted" & "`itemType'" == "untObs" { //And new unitofobs - + *Create unit of observation data folder and add global to folder in master do file - file write `subHandle' _col(4)"*`itemName' folder globals" _n + file write `subHandle' _col(4)"*`itemName' folder globals" _n createFolderWriteGlobal "`itemName'" "mastData" `itemAbb' `subHandle' *Create folders that have no refrence in master do file ************* *Create unit of observation data subfolders - createFolderWriteGlobal "DataSet" "`itemAbb'" masterDataSets + createFolderWriteGlobal "DataSet" "`itemAbb'" masterDataSets createFolderWriteGlobal "Dofiles" "`itemAbb'" mastDataDo ************* - *create folder in encrypred ID key master + *create folder in encrypred ID key master createFolderWriteGlobal "Master `itemName' Encrypted" "encryptFolder" `itemAbb'_encrypt `subHandle' - file write `subHandle' _col(4) _n + file write `subHandle' _col(4) _n createFolderWriteGlobal "DataSet" "`itemAbb'_encrypt" mastData_E_data createFolderWriteGlobal "Sampling" "`itemAbb'_encrypt" mastData_E_Samp - createFolderWriteGlobal "Treatment Assignment" "`itemAbb'_encrypt" mastData_E_Treat + createFolderWriteGlobal "Treatment Assignment" "`itemAbb'_encrypt" mastData_E_Treat *write the line after these lines - file write `subHandle' `"`line'"' _n + file write `subHandle' `"`line'"' _n } - + *Test if this is the location to write the new subfolder globals else if "`r(sectionName)'" == "master" & "`itemType'" == "subFld" { //And new subfolder - + *Create unit of observation data folder and add global to folder in master do file - file write `subHandle' _col(4)"*`itemName' sub-folder globals" _n - createFolderWriteGlobal "`itemName'" "dataWorkFolder" `itemAbb' `subHandle' + file write `subHandle' _col(4)"*`itemName' sub-folder globals" _n + createFolderWriteGlobal "`itemName'" "dataWorkFolder" `itemAbb' `subHandle' createFolderWriteGlobal "Subfolder `itemName' Encrypted" "encryptFolder" `itemAbb'_encrypt `subHandle' - file write `subHandle' _col(4) _n - - file write `subHandle' `"`line'"' _n + file write `subHandle' _col(4) _n + + file write `subHandle' `"`line'"' _n } else { *If none apply, just write the line - file write `subHandle' `"`line'"' _n + file write `subHandle' `"`line'"' _n } } - - + + *Read next file and repeat the while loop file read `oldHandle' line } - *Close the old file. + *Close the old file. file close `oldHandle' end @@ -440,146 +440,146 @@ end cap program drop iefolder_testFolderPossible program define iefolder_testFolderPossible - + args mainFolderGlobal mainName encrptFolderGlobal encrptName - + *Test if folder where to create new folder exist checkFolderExists "$`mainFolderGlobal'" "parent" *Test that the new folder does not already exist - checkFolderExists "$`mainFolderGlobal'/`mainName'" "new" + checkFolderExists "$`mainFolderGlobal'/`mainName'" "new" *Encrypted branch - + *Test if folder where to create new fodler exist checkFolderExists "$`encrptFolderGlobal'" "parent" *Test that the new folder does not already exist checkFolderExists "$`encrptFolderGlobal'/`encrptName'" "new" - + end -cap program drop parseReadLine +cap program drop parseReadLine program define parseReadLine, rclass - + args line - - *Tokenize each line + + *Tokenize each line tokenize `"`line'"' , parse("*") - - **Parse the beginning of the line to + + **Parse the beginning of the line to * test if this line is an iefolder line local divisorStar "`1'" local divisorIefoldeer "`2'" - + if `"`divisorIefoldeer'"' == "round" | `"`divisorIefoldeer'"' == "untObs" | `"`divisorIefoldeer'"' == "subFld"{ - - return scalar ief_line = 1 + + return scalar ief_line = 1 return scalar ief_end = 0 return scalar ief_NameLine = 1 return local nameLineType = `"`divisorIefoldeer'"' - + } else if `"`divisorStar'`divisorIefoldeer'"' != "*iefolder" { - + *This is not a iefolder divisor return scalar ief_line = 0 return scalar ief_end = 0 - + } else { - - *This is a iefolder divisor - return scalar ief_line = 1 + + *This is a iefolder divisor + return scalar ief_line = 1 return scalar ief_NameLine = 0 - + *parse the rest of the line (from tokenize above) local partNum "`4'" local partName "`6'" local sectionName "`8'" local itemName "`10'" local itemAbb "`12'" - + *Return the part name and number. All iefolder divisor lines has this return local partNum "`partNum'" - return local partName "`partName'" - + return local partName "`partName'" + *Get the prefix of the section name local sectPrefix = substr("`partName'",1,4) - + *Test if it is an end of section divisor if "`sectPrefix'" == "End_" { - + return scalar ief_end = 1 } else { - + return scalar ief_end = 0 - + *These are returned empty if they do not apply - return local sectionNum "`sectionNum'" - return local sectionName "`sectionName'" - return local sectionAbb "`sectionAbb'" - return local itemName "`itemName'" - return local itemAbb "`itemAbb'" + return local sectionNum "`sectionNum'" + return local sectionName "`sectionName'" + return local sectionAbb "`sectionAbb'" + return local itemName "`itemName'" + return local itemAbb "`itemAbb'" } } end - - - -cap program drop newRndFolderAndGlobals - program define newRndFolderAndGlobals - + + + +cap program drop newRndFolderAndGlobals + program define newRndFolderAndGlobals + args rndName rnd subHandle masterType subfolder - + *Add prefix, suffux and backslash to the subfolder name so that it works in a file path if "`subfolder'" != "" local subfolder_encrypt "Subfolder `subfolder' Encrypted/" if "`subfolder'" != "" local subfolder "`subfolder'/" - + *Write title to the folder globals to this round file write `subHandle' _col(4)"*`rndName' folder globals" _n - *Round main folder + *Round main folder createFolderWriteGlobal "`rndName'" "dataWorkFolder" "`rnd'" `subHandle' "`subfolder'" - + *Sub folders (this onle writes globals in master dofile, folders are created when writing round master dofile) writeGlobal "Round `rndName' Encrypted" "encryptFolder" "`rnd'_encrypt" `subHandle' "`subfolder_encrypt'" writeGlobal "DataSets" "`rnd'" "`rnd'_dt" `subHandle' writeGlobal "Dofiles" "`rnd'" "`rnd'_do" `subHandle' writeGlobal "Output" "`rnd'" "`rnd'_out" `subHandle' - - *This are never written to the master dofile, only created - createFolderWriteGlobal "Documentation" "`rnd'" "`rnd'_doc" - createFolderWriteGlobal "Questionnaire" "`rnd'" "`rnd'_quest" - + + *This are never written to the master dofile, only created + createFolderWriteGlobal "Documentation" "`rnd'" "`rnd'_doc" + createFolderWriteGlobal "Questionnaire" "`rnd'" "`rnd'_quest" + end - -cap program drop createRoundMasterDofile - program define createRoundMasterDofile - + +cap program drop createRoundMasterDofile + program define createRoundMasterDofile + args roundfolder rndName rnd subfolder - + *Add prefix, suffux and backslash to the subfolder name so that it works in a file path if "`subfolder'" != "" local subfolder_encrypt "Subfolder `subfolder' Encrypted/" if "`subfolder'" != "" local subfolder "`subfolder'/" - + *Create a temporary textfile tempname roundHandle tempfile roundTextFile - file open `roundHandle' using "`roundTextFile'", text write replace + file open `roundHandle' using "`roundTextFile'", text write replace - *Write intro part with description of round, + *Write intro part with description of round, mdofle_p0 `roundHandle' round mdofle_p1 `roundHandle' "$projectFolder" `rndName' `rnd' *Create main round folder and add global to round master dofile - file write `roundHandle' _n _col(4)"*Encrypted round sub-folder globals" _n + file write `roundHandle' _n _col(4)"*Encrypted round sub-folder globals" _n writeGlobal "`rndName'" "dataWorkFolder" "`rnd'" `roundHandle' "`subfolder'" - + *Create encrypted round sub-folder and add global to round master dofile - file write `roundHandle' _n _col(4)"*Encrypted round sub-folder globals" _n + file write `roundHandle' _n _col(4)"*Encrypted round sub-folder globals" _n createFolderWriteGlobal "Round `rndName' Encrypted" "encryptFolder" "`rnd'_encrypt" `roundHandle' "`subfolder_encrypt'" createFolderWriteGlobal "Raw Identified Data" "`rnd'_encrypt" "`rnd'_dtRaw" `roundHandle' createFolderWriteGlobal "Dofiles Import" "`rnd'_encrypt" "`rnd'_doImp" `roundHandle' @@ -590,62 +590,62 @@ cap program drop createRoundMasterDofile createFolderWriteGlobal "DataSets" "`rnd'" "`rnd'_dt" `roundHandle' createFolderWriteGlobal "Intermediate" "`rnd'_dt" "`rnd'_dtInt" `roundHandle' createFolderWriteGlobal "Final" "`rnd'_dt" "`rnd'_dtFin" `roundHandle' - + *Creat Dofile sub-folder and add global to round master dofile file write `roundHandle' _n _col(4)"*Dofile sub-folder globals" _n createFolderWriteGlobal "Dofiles" "`rnd'" "`rnd'_do" `roundHandle' createFolderWriteGlobal "Cleaning" "`rnd'_do" "`rnd'_doCln" `roundHandle' createFolderWriteGlobal "Construct" "`rnd'_do" "`rnd'_doCon" `roundHandle' createFolderWriteGlobal "Analysis" "`rnd'_do" "`rnd'_doAnl" `roundHandle' - + *Create Output subfolders and add global to round master dofile file write `roundHandle' _n _col(4)"*Output sub-folder globals" _n createFolderWriteGlobal "Output" "`rnd'" "`rnd'_out" `roundHandle' - createFolderWriteGlobal "Raw" "`rnd'_out" "`rnd'_outRaw" `roundHandle' + createFolderWriteGlobal "Raw" "`rnd'_out" "`rnd'_outRaw" `roundHandle' createFolderWriteGlobal "Final" "`rnd'_out" "`rnd'_outFin" `roundHandle' - + *Creat Questionnaire subfolders and add global to round master dofile file write `roundHandle' _n _col(4)"*Questionnaire sub-folder globals" _n - createFolderWriteGlobal "Questionnaire Develop" "`rnd'_quest" "`rnd'_qstDev" - createFolderWriteGlobal "Questionnaire Final" "`rnd'_quest" "`rnd'_qstFin" + createFolderWriteGlobal "Questionnaire Develop" "`rnd'_quest" "`rnd'_qstDev" + createFolderWriteGlobal "Questionnaire Final" "`rnd'_quest" "`rnd'_qstFin" createFolderWriteGlobal "PreloadData" "`rnd'_quest" "`rnd'_prld" `roundHandle' createFolderWriteGlobal "Questionnaire Documentation" "`rnd'_quest" "`rnd'_doc" `roundHandle' - + *Write sub devisor starting master and monitor data section section - writeDevisor `roundHandle' 1 End_FolderGlobals - + writeDevisor `roundHandle' 1 End_FolderGlobals + *Write constant global section here mdofle_p2 `roundHandle' mdofle_p3 `roundHandle' round `rndName' `rnd' - + *Closing the new main master dofile handle file close `roundHandle' *Copy the new master dofile from the tempfile to the original position copy "`roundTextFile'" "${`rnd'}/`rndName'_MasterDofile.do" , replace - + end - -cap program drop createFolderWriteGlobal - program define createFolderWriteGlobal + +cap program drop createFolderWriteGlobal + program define createFolderWriteGlobal args folderName parentGlobal globalName subHandle subfolder *Create a global for this folder global `globalName' "$`parentGlobal'/`subfolder'`folderName'" - + *If a subhandle is specified then write the global to the master file if ("`subHandle'" != "") { writeGlobal "`folderName'" `parentGlobal' `globalName' `subHandle' "`subfolder'" } - + *Create the folder mkdir "${`parentGlobal'}/`subfolder'`folderName'" - + end -cap program drop writeGlobal - program define writeGlobal +cap program drop writeGlobal + program define writeGlobal args folderName parentGlobal globalName subHandle subfolder @@ -655,97 +655,97 @@ cap program drop writeGlobal *Write global in round master dofile if subHandle is specified file write `subHandle' /// _col(4) `"global `globalName'"' _col(34) `"""' /// - _char(36)`"`parentGlobal'/`subfolder'`folderName'" "' _n + _char(36)`"`parentGlobal'/`subfolder'`folderName'" "' _n -end +end -cap program drop writeDevisor +cap program drop writeDevisor program define writeDevisor , rclass - - args subHandle partNum partName sectionName itemName itemAbb - + + args subHandle partNum partName sectionName itemName itemAbb + local devisor "*iefolder*`partNum'*`partName'*`sectionName'*`itemName'*`itemAbb'*" - + local devisorLen = strlen("`devisor'") - + *Make all devisors at least 80 characters wide by adding stars (just aesthetic reasons) if (`devisorLen' < 80) { - + local numStars = 80 - `devisorLen' local addedStars _dup(`numStars') _char(42) } - + file write `subHandle' _n "`devisor'" `addedStars' file write `subHandle' _n "*iefolder will not work properly if the line above is edited" _n _n - + end - -*Program that checks if folder exist and provide errors if not. -cap program drop checkFolderExists - program define checkFolderExists , rclass - + +*Program that checks if folder exist and provide errors if not. +cap program drop checkFolderExists + program define checkFolderExists , rclass + args folder type - + *Returns 0 if folder does not exist, 1 if it does mata : st_numscalar("r(dirExist)", direxists("`folder'")) - - ** If type is parent folder, i.e. the folder in which we are creating a + + ** If type is parent folder, i.e. the folder in which we are creating a * new folder, then the parent folder should exist. Throw an error if it doesn't if `r(dirExist)' == 0 & "`type'" == "parent" { - - noi di as error `"{phang}A new folder cannot be created in "`folder'" as that folder does not exist. iefolder will not work properly if the names of the folders it depends on are changed.{p_end}"' + + noi di as error `"{phang}A new folder cannot be created in "`folder'" as that folder does not exist. iefolder will not work properly if the names of the folders it depends on are changed.{p_end}"' error 693 exit } - - ** If type is new folder, i.e. the folder we are creating, - * then that folder should not exist. Throw an error if it does + + ** If type is new folder, i.e. the folder we are creating, + * then that folder should not exist. Throw an error if it does if `r(dirExist)' == 1 & "`type'" == "new" { - - noi di as error `"{phang}The new folder cannot be created since the folder "`folder'" already exist. You may not use the a name twice for the same type of folder.{p_end}"' + + noi di as error `"{phang}The new folder cannot be created since the folder "`folder'" already exist. You may not use the a name twice for the same type of folder.{p_end}"' error 693 exit } - + end *Write or update the line that list all names used cap program drop writeNameLine program define writeNameLine - + args subHandle type name line - + if "`type'" == "new" { - + *Add a white space before this section - file write `subHandle' "" _n - - writeNameLine `subHandle' "round" + file write `subHandle' "" _n + + writeNameLine `subHandle' "round" writeNameLine `subHandle' "untObs" writeNameLine `subHandle' "subFld" } else { - + *remove stars in the end of the line local line = substr("`line'",1,strlen("`line'") - indexnot(reverse("`line'"),"*") + 1) - + *Start the new line if "`name'" == "" local line "*`type'" - + *add new name (and abbrevation if applicable) to existing line if "`name'" != "" local line "`line'*`name'" - + *Make all devisors at least 80 characters wide by adding stars if (strlen("`line'") < 80) { - + local numStars = 80 - strlen("`line'") local addedStars _dup(`numStars') _char(42) } - - file write `subHandle' "`line'" `addedStars' _n - + + file write `subHandle' "`line'" `addedStars' _n + *If creating the lines the first time then add a warning text at the end if "`type'" == "subFld" & "`name'" == "" { file write `subHandle' "*iefolder will not work properly if the lines above are edited" _n @@ -756,41 +756,41 @@ end * Test if the new name or abb is already used cap program drop testNameAvailible program define testNameAvailible - + args line name abb subfolder - + *If abb was not used or is the same, remove abb - if "`name'" == "`abb'" local abb "" - - *Tokenize each line + if "`name'" == "`abb'" local abb "" + + *Tokenize each line tokenize "`line'" , parse("*") - + *Start at the second item in the list as the first is a star local number 1 local item "``number''" - + *Local that keeps track if subfolder names is used local subfolderFound 0 - *Loop over all + *Loop over all while "`item'" != "" { *Loop over name and abb (if abb is not used) foreach nameTest in `name' `abb' { - + *Test if the name to test is equal to something already used if "`item'" == "`nameTest'" & "`subfolder'" == "" { - + *name already used, throw error noi di as error "{phang}The name `nameTest' have already been used as a folder name or abreviation. No new folders are creaetd and the master do-files has not been changed.{p_end}" error 507 } else if "`item'" == "`nameTest'" & "`subfolder'" != "" { - + local subfolderFound 1 } } - + *Increment number one more step and take the nest item in the tokenized list local ++number @@ -803,26 +803,26 @@ cap program drop testNameAvailible local item "``number''" } } - + *Test that subfolder was found if `subfolderFound' == 0 & "`subfolder'" != "" { noi di as error "{phang}The subofolder `name' has not been created by iefolder. Please only create subfolders with iefolders, and do not change the names once they are created. No new folders are creaetd and the master do-files has not been changed.{p_end}" error 507 } - + end - + /************************************************************ Sub-functions : writing master dofiles ************************************************************/ - + cap program drop mdofle_p0 - program define mdofle_p0 - + program define mdofle_p0 + args subHandle itemType rndName task - + file write `subHandle' /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* ******************************************************************** *" _n /// @@ -832,13 +832,13 @@ cap program drop mdofle_p0 _col(4)"*" _col(75) "*" _n /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* ******************************************************************** *" _n /// - _n _col(8)"/*" _n - - if "`itemType'" == "project" { - - file write `subHandle' /// + _n _col(8)"/*" _n + + if "`itemType'" == "project" { + + file write `subHandle' /// _col(8)"** PURPOSE:" _col(25) "Write intro to project here" _n /// - _n /// + _n /// _col(8)"** OUTLINE:" _col(25) "PART 0: Standardize settings and install packages" _n /// _col(25) "PART 1: Set globals for dynamic file paths" _n /// _col(25) "PART 2: Set globals for constants and varlist" _n /// @@ -849,32 +849,32 @@ cap program drop mdofle_p0 _col(32) "tas. Do not include Part 0-2 in a task" _n /// _col(32) "specific master do-file" _n /// _n _n - } + } else if "`itemType'" == "round" { - - file write `subHandle' /// + + file write `subHandle' /// _col(8)"** PURPOSE:" _col(25) "Write intro to survey round here" _n /// - _n /// + _n /// _col(8)"** OUTLINE:" _col(25) "PART 0: Standardize settings and install packages" _n /// _col(25) "PART 1: Preparing folder path globals" _n /// - _col(25) "PART 2: Run the master do files for each high level task" _n _n + _col(25) "PART 2: Run the master do files for each high level task" _n _n } - - file write `subHandle' /// + + file write `subHandle' /// _col(8)"** IDS VAR:" _col(25) "list_ID_var_here //Uniquely identifies households (update for your project)" _n /// - _n /// - _col(8)"** NOTES:" _n /// - _n /// + _n /// + _col(8)"** NOTES:" _n /// + _n /// _col(8)"** WRITEN BY:" _col(25) "names_of_contributors" _n /// _n /// - _col(8)"** Last date modified: `c(current_date)'" _n /// + _col(8)"** Last date modified: `c(current_date)'" _n /// _col(8)"*/" _n - + *Write devisor starting setting standardize section - writeDevisor `subHandle' 0 StandardSettings - - - file write `subHandle' /// + writeDevisor `subHandle' 0 StandardSettings + + + file write `subHandle' /// _col(4)"* ******************************************************************** *" _n /// _col(4)"*" _n /// _col(4)"*" _col(12) "PART 0: INSTALL PACKAGES AND STANDARDIZE SETTINGS" _n /// @@ -882,23 +882,29 @@ cap program drop mdofle_p0 _col(4)"*" _col(16) "-Install packages needed to run all dofiles called" _n /// _col(4)"*" _col(17) "by this master dofile." _n /// _col(4)"*" _col(16) "-Use ieboilstart to harmonize settings across users" _n /// - _col(4)"*" _n /// - _col(4)"* ******************************************************************** *" _n + _col(4)"*" _n /// + _col(4)"* ******************************************************************** *" _n *Write devisor ending setting standardize section writeDevisor `subHandle' 0 End_StandardSettings - - file write `subHandle' /// - _col(8)"*Install all packages that this project requires:" _n /// - _col(8)"ssc install ietoolkit, replace" _n /// + + file write `subHandle' /// + _col(4)"*Install all packages that this project requires:" _n /// + _col(4)"local user_commands ietoolkit" _col(40) "//Fill this list will all commands this project requires" _n /// + _col(4)"foreach command of local user_commands {" _n /// + _col(8) "cap which " _char(96) "command'" _n /// + _col(8) "if _rc == 111 {" _n /// + _col(12) "cap ssc install " _char(96) "command'" _n /// + _col(8) "}" _n /// + _col(4)"}" _n /// _n /// - _col(8)"*Standardize settings accross users" _n /// - _col(8)"ieboilstart, version(12.1)" _col(40) "//Set the version number to the oldest version used by anyone in the project team" _n /// - _col(8) _char(96)"r(version)'" _col(40) "//This line is needed to actually set the version from the command above" _n - - + _col(4)"*Standardize settings accross users" _n /// + _col(4)"ieboilstart, version(12.1)" _col(40) "//Set the version number to the oldest version used by anyone in the project team" _n /// + _col(4) _char(96)"r(version)'" _col(40) "//This line is needed to actually set the version from the command above" _n + + end - + cap program drop mdofle_p1 program define mdofle_p1 @@ -910,17 +916,17 @@ cap program drop mdofle_p1 file write `subHandle' /// _col(4)"* ******************************************************************** *" _n /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"*" _col(12) "PART 1: PREPARING FOLDER PATH GLOBALS" _n /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"*" _col(16) "-Set the global box to point to the project folder" _n /// - _col(4)"*" _col(17) "on each collaborators computer." _n /// - _col(4)"*" _col(16) "-Set other locals that point to other folders of interest." _n /// - _col(4)"*" _n /// + _col(4)"*" _col(17) "on each collaborators computer." _n /// + _col(4)"*" _col(16) "-Set other locals that point to other folders of interest." _n /// + _col(4)"*" _n /// _col(4)"* ******************************************************************** *" _n - + file write `subHandle' /// - _n /// + _n /// _col(4)"* Users" _n /// _col(4)"* -----------" _n /// _n /// @@ -933,141 +939,141 @@ cap program drop mdofle_p1 _n /// _col(4)"* Root folder globals" _n /// _col(4)"* ---------------------" _n /// - _n /// + _n /// _col(4)"if "_char(36)"user == 1 {" _n /// _col(8)`"global projectfolder "$projectFolder""' _n /// - _col(4)"}" _n /// - _n /// + _col(4)"}" _n /// + _n /// _col(4)"if "_char(36)"user == 2 {" _n /// _col(8)`"global projectfolder "" //Enter the file path to the projectfolder of next user here"' _n /// _col(4)"}" _n _n /// "*These lines are used to test that name ois not already used (do not edit manually)" - + *For new main master do file if "`rndName'" == "" { - + *Write name line only in main master do file writeNameLine `subHandle' new } - + file write `subHandle' _n _n /// _col(4)"* Project folder globals" _n /// _col(4)"* ---------------------" _n _n /// _col(4)"global dataWorkFolder " _col(34) `"""' _char(36)`"projectfolder/DataWork""' _n *Write sub devisor starting master and monitor data section section - if "`rndName'" == "" writeDevisor `subHandle' 1 FolderGlobals subfolder - + if "`rndName'" == "" writeDevisor `subHandle' 1 FolderGlobals subfolder + *Write sub devisor starting master and monitor data section section - writeDevisor `subHandle' 1 FolderGlobals master - + writeDevisor `subHandle' 1 FolderGlobals master + di `" if "`rndName'" == "" "' *Create master data folder and add global to folder in master do file if "`rndName'" == "" createFolderWriteGlobal "MasterData" "dataWorkFolder" mastData `subHandle' //For new project if "`rndName'" != "" writeGlobal "MasterData" "dataWorkFolder" mastData `subHandle' //For new round *Write sub devisor starting master and monitor data section section - writeDevisor `subHandle' 1 FolderGlobals encrypted - + writeDevisor `subHandle' 1 FolderGlobals encrypted + *Create master data folder and add global to folder in master do file if "`rndName'" == "" createFolderWriteGlobal "EncryptedData" "dataWorkFolder" encryptFolder `subHandle' //For new project if "`rndName'" != "" writeGlobal "EncryptedData" "dataWorkFolder" encryptFolder `subHandle' //For new rounds - + *For new main master do file if "`rndName'" == "" { *Write sub devisor starting master and monitor data section section - writeDevisor `subHandle' 1 FolderGlobals endRounds + writeDevisor `subHandle' 1 FolderGlobals endRounds *Write sub devisor starting master and monitor data section section writeDevisor `subHandle' 1 End_FolderGlobals - + } - + *For new round master do file if "`rndName'" != "" { *Write sub devisor starting master and monitor data section section - writeDevisor `subHandle' 1 FolderGlobals `rndName' + writeDevisor `subHandle' 1 FolderGlobals `rndName' } -end +end + - cap program drop mdofle_p2 program define mdofle_p2 - - args subHandle + + args subHandle di "masterDofilePart2 start" - + *Write devisor starting standardization globals section - writeDevisor `subHandle' 2 StandardGlobals - + writeDevisor `subHandle' 2 StandardGlobals + file write `subHandle' /// _col(4)"* Set all non-folder path globals that are constant accross" _n /// - _col(4)"* the project. Examples are conversion rates used in unit" _n /// - _col(4)"* standardization, differnt set of control variables," _n /// - _col(4)"* ado file paths etc." _n _n /// + _col(4)"* the project. Examples are conversion rates used in unit" _n /// + _col(4)"* standardization, differnt set of control variables," _n /// + _col(4)"* ado file paths etc." _n _n /// _col(4) `"do ""' _char(36) `"dataWorkFolder/global_setup.do" "' _n _n - + *Write devisor ending standardization globals section - writeDevisor `subHandle' 2 End_StandardGlobals - + writeDevisor `subHandle' 2 End_StandardGlobals + di "masterDofilePart2 end" - -end + +end + - cap program drop mdofle_p3 program define mdofle_p3 - + args subHandle itemType rndName rnd di "masterDofilePart3 start" - + *Part number local partNum = 3 - + *Write devisor starting the section running sub-master dofiles - writeDevisor `subHandle' `partNum' RunDofiles - + writeDevisor `subHandle' `partNum' RunDofiles + file write `subHandle' /// _col(4)"* ******************************************************************** *" _n /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"*" _col(12) "PART `partNum': - RUN DOFILES CALLED BY THIS MASTER DO FILE" _n /// - _col(4)"*" _n - - if "`itemType'" == "project" { + _col(4)"*" _n + + if "`itemType'" == "project" { file write `subHandle' /// _col(4)"*" _col(16) "-When survey rounds are added, this section will" _n /// - _col(4)"*" _col(17) "link to the master dofile for that round." _n /// + _col(4)"*" _col(17) "link to the master dofile for that round." _n /// _col(4)"*" _col(16) "-The default is that these dofiles are set to not" _n /// _col(4)"*" _col(17) "run. It is rare that all round specfic master dofiles" _n /// _col(4)"*" _col(17) "are called at the same time, the round specific master" _n /// _col(4)"*" _col(17) "dofiles are almost always called individually. The" _n /// - _col(4)"*" _col(17) "exception is when reviewing or replicating a full project." _n - } - else if "`itemType'" == "round" { + _col(4)"*" _col(17) "exception is when reviewing or replicating a full project." _n + } + else if "`itemType'" == "round" { file write `subHandle' /// _col(4)"*" _col(16) "-A task master dofile has been created for each high" _n /// _col(4)"*" _col(17) "level task (cleaning, construct, analyze). By " _n /// _col(4)"*" _col(17) "running all of them all data work associated with the " _n /// _col(4)"*" _col(17) "`rndName' should be replicated, including output of " _n /// - _col(4)"*" _col(17) "tablets, graphs, etc." _n /// - _col(4)"*" _col(16) "-Feel free to add to this list if you have other high" _n /// - _col(4)"*" _col(17) "level tasks relevant to your project." _n + _col(4)"*" _col(17) "tablets, graphs, etc." _n /// + _col(4)"*" _col(16) "-Feel free to add to this list if you have other high" _n /// + _col(4)"*" _col(17) "level tasks relevant to your project." _n } - + file write `subHandle' /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"* ******************************************************************** *" _n - - if "`itemType'" == "round" { - + + if "`itemType'" == "round" { + file write `subHandle' _n /// _col(4)"**Set the locals corresponding to the taks you want" _n /// _col(4)"* run to 1. To not run a task, set the local to 0." _n /// @@ -1075,97 +1081,97 @@ cap program drop mdofle_p3 _col(4)"local cleaningDo" _col(25) "0" _n /// _col(4)"local constructDo" _col(25) "0" _n /// _col(4)"local analysisDo" _col(25) "0" _n /// - - *Create the references to the high level task + + *Create the references to the high level task highLevelTask `subHandle' "`rndName'" "`rnd'" "import" highLevelTask `subHandle' "`rndName'" "`rnd'" "cleaning" highLevelTask `subHandle' "`rndName'" "`rnd'" "construct" highLevelTask `subHandle' "`rndName'" "`rnd'" "analysis" - + } - + *Write devisor ending the section running sub-master dofiles writeDevisor `subHandle' `partNum' End_RunDofiles - + di "masterDofilePart3 end" - -end - -cap program drop highLevelTask + +end + +cap program drop highLevelTask program define highLevelTask - - args roundHandle rndName rnd task - + + args roundHandle rndName rnd task + di "highLevelTask start" - + *Import folder is in differnt location, in the encrypted folder if "`task'" != "import" { - + *The location of all the task master dofile apart from import master local taskdo_fldr "`rnd'_do" } else { - - **The import master dofile is in the encryption folder as it is + + **The import master dofile is in the encryption folder as it is * likely to have identifying information local taskdo_fldr "`rnd'_doImp" } - + *Write section where task master files are called file write `roundHandle' _n /// _col(4)"if (" _char(96) "`task'Do' == 1) { //Change the local above to run or not to run this file" _n /// _col(8) `"do ""' _char(36) `"`taskdo_fldr'/`rnd'_`task'_MasterDofile.do" "' _n /// _col(4)"}" _n - + *Create the task dofiles highLevelTaskMasterDofile `rndName' `task' `rnd' - + end - + cap program drop highLevelTaskMasterDofile program define highLevelTaskMasterDofile - + di "highLevelTaskMasterDofile start" - + args rndName task rnd - + *Write the round dofile - + *Create a temporary textfile tempname taskHandle tempfile taskTextFile file open `taskHandle' using "`taskTextFile'", text write replace - - mdofle_task `taskHandle' `rndName' `rnd' `task' - + + mdofle_task `taskHandle' `rndName' `rnd' `task' + *Closing the new main master dofile handle file close `taskHandle' *Import folder is in differnt location, in the encrypted folder if "`task'" != "import" { - + *Copy the new task master dofile from the tempfile to the original position copy "`taskTextFile'" "${`rnd'_do}/`rnd'_`task'_MasterDofile.do" , replace } else { - - *Copy the import task master do file in to the Dofile import folder. - copy "`taskTextFile'" "${`rnd'_doImp}/`rnd'_`task'_MasterDofile.do" , replace + + *Copy the import task master do file in to the Dofile import folder. + copy "`taskTextFile'" "${`rnd'_doImp}/`rnd'_`task'_MasterDofile.do" , replace } - -end - + +end + cap program drop mdofle_task program define mdofle_task - + args subHandle rndName rnd task - - **Create local with round name and task in + + **Create local with round name and task in * upper case for the titel of the master dofile local caps = upper("`rndName' `task'") - + *Differnt global suffix for different tasks - local suffix + local suffix if "`task'" == "import" { local suffix "_doImp" } @@ -1178,7 +1184,7 @@ cap program drop mdofle_task if "`task'" == "analysis" { local suffix "_doAnl" } - + file write `subHandle' /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* ******************************************************************** *" _n /// @@ -1189,16 +1195,16 @@ cap program drop mdofle_task _col(4)"*" _col(75) "*" _n /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* ******************************************************************** *" _n /// - _n /// - _col(4)"** IDS VAR:" _col(25) "list_ID_var_here //Uniquely identifies households (update for your project)" _n /// - _col(4)"** NOTES:" _n /// + _n /// + _col(4)"** IDS VAR:" _col(25) "list_ID_var_here //Uniquely identifies households (update for your project)" _n /// + _col(4)"** NOTES:" _n /// _col(4)"** WRITEN BY:" _col(25) "names_of_contributors" _n /// - _col(4)"** Last date modified: `c(current_date)'" _n _n /// + _col(4)"** Last date modified: `c(current_date)'" _n _n /// _n /// _col(4)"* ***************************************************** *" _n /// _col(4)"*" _col(60) "*" _n /// - - + + *Create the sections with placeholders for each dofile for this task mdofle_task_dosection `subHandle' "`rnd'" "`task'" "`suffix'" 1 mdofle_task_dosection `subHandle' "`rnd'" "`task'" "`suffix'" 2 @@ -1207,16 +1213,16 @@ cap program drop mdofle_task file write `subHandle' /// _col(4)"* ************************************" _n /// _col(4)"*" _col(8) "Keep adding sections for all additional dofiles needed" _n /// - + end cap program drop mdofle_task_dosection program define mdofle_task_dosection - + *Write the task master do-file section - + args subHandle rnd task suffix number - + file write `subHandle' /// _col(4)"* ***************************************************** *" _n /// _col(4)"*" _n /// @@ -1231,7 +1237,7 @@ cap program drop mdofle_task_dosection _col(4)"* ***************************************************** *" _n /// _n /// _col(8) `"*do ""' _char(36) `"`rnd'`suffix'/dofile`number'.do" //Give your dofile a more informative name, this is just a place holder name"' _n _n - + end /***************************************************************** @@ -1242,34 +1248,34 @@ end cap program drop global_setup program define global_setup - + *Create a temporary textfile - tempname glbStupHandle + tempname glbStupHandle tempfile glbStupTextFile - - cap file close `glbStupHandle' - file open `glbStupHandle' using "`glbStupTextFile'", text write append - - + + cap file close `glbStupHandle' + file open `glbStupHandle' using "`glbStupTextFile'", text write append + + file write `glbStupHandle' /// _col(4)"* ******************************************************************** *" _n /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"*" _col(12) "SET UP STANDARDIZATION GLOBALS AND OTHER CONSTANTS" _n /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"*" _col(16) "-Set globals used all across the projects" _n /// _col(4)"*" _col(16) "-It is bad practice to define these at mutliple locations" _n /// - _col(4)"*" _n /// + _col(4)"*" _n /// _col(4)"* ******************************************************************** *" _n - + file write `glbStupHandle' /// - _n /// + _n /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* Set all conversion rates used in unit standardization " _n /// _col(4)"* ******************************************************************** *" _n /// _n /// _col(4)"**Define all your conversion rates here instead of typing them each " _n /// _col(4)"* time you are converting amounts, for example - in unit standardization. " _n /// - _col(4)"* We have already listed common conversion rates below, but you" _n /// + _col(4)"* We have already listed common conversion rates below, but you" _n /// _col(4)"* might have to add rates specific to your project, or change the target " _n /// _col(4)"* unit if you are standardizing to other units than meters, hectares," _n /// _col(4)"* and kilograms." _n /// @@ -1280,21 +1286,21 @@ program define global_setup _col(8)"global km" _col(24) "= 1000" _n /// _col(8)"global yard" _col(24) "= 0.9144" _n /// _col(8)"global inch" _col(24) "= 0.0254" _n /// - _n /// + _n /// _col(4)"*Standardizing area to hectares" _n /// _col(8)"global sqfoot" _col(24) "= (1 / 107639)" _n /// _col(8)"global sqmile" _col(24) "= (1 / 258.999)" _n /// _col(8)"global sqmtr" _col(24) "= (1 / 10000)" _n /// _col(8)"global sqkmtr" _col(24) "= (1 / 100)" _n /// _col(8)"global acre" _col(24) "= 0.404686" _n /// - _n /// + _n /// _col(4)"*Standardizing weight to kilorgrams" _n /// _col(8)"global pound" _col(24) "= 0.453592" _n /// _col(8)"global gram" _col(24) "= 0.001" _n /// _col(8)"global impTon" _col(24) "= 1016.05" _n /// _col(8)"global usTon" _col(24) "= 907.1874996" _n /// _col(8)"global mtrTon" _col(24) "= 1000" _n /// - _n /// + _n /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* Set global lists of variables" _n /// _col(4)"* ******************************************************************** *" _n /// @@ -1305,11 +1311,11 @@ program define global_setup _col(4)"* By defining these lists here, you can easliy make updates and have " _n /// _col(4)"* those updates being applied to all regressions without a large risk " _n /// _col(4)"* of copy and paste errors." _n /// - _n /// + _n /// _col(8)"*Control Variables" _n /// _col(8)"*Example: global household_controls" _col(50) "income female_headed" _n /// _col(8)"*Example: global country_controls" _col(50) "GDP inflation unemployment" _n /// - _n /// + _n /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* Set custom ado file path" _n /// _col(4)"* ******************************************************************** *" _n /// @@ -1323,25 +1329,22 @@ program define global_setup _col(12)"adopath ++" _col(24) `"""' _char(36) `"ado/m" "' _n /// _col(12)"adopath ++" _col(24) `"""' _char(36) `"ado/b" "' _n /// _col(4)"*/"_n /// - _n /// + _n /// _col(4)"* ******************************************************************** *" _n /// _col(4)"* Anything else" _n /// _col(4)"* ******************************************************************** *" _n /// - _n /// + _n /// _col(4)"**Everything that is constant may be included here. One example of" _n /// _col(4)"* something not constant that should be included here is exchange" _n /// _col(4)"* rates. It is best practice to have one global with the exchange rate" _n /// _col(4)"* here, and reference this each time a currency conversion is done. If " _n /// _col(4)"* the currency exchange rate needs to be updated, then it only has to" _n /// - _col(4)"* be done at one place for the whole project." _n /// - + _col(4)"* be done at one place for the whole project." _n /// + *Closing the new main master dofile handle file close `glbStupHandle' - - *Copy the new master dofile from the tempfile to the original position - copy "`glbStupTextFile'" "$dataWorkFolder/global_setup.do" , replace - -end - + *Copy the new master dofile from the tempfile to the original position + copy "`glbStupTextFile'" "$dataWorkFolder/global_setup.do" , replace +end diff --git a/src/ado_files/iegitaddmd.ado b/src/ado_files/iegitaddmd.ado index f93d094f..81b3836a 100644 --- a/src/ado_files/iegitaddmd.ado +++ b/src/ado_files/iegitaddmd.ado @@ -1,4 +1,4 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org cap program drop iegitaddmd program define iegitaddmd @@ -173,7 +173,7 @@ cap program drop writeGitKeep *Closing the file file close `newHandle' - + } end diff --git a/src/ado_files/iegraph.ado b/src/ado_files/iegraph.ado index 19e7aba1..645ee487 100644 --- a/src/ado_files/iegraph.ado +++ b/src/ado_files/iegraph.ado @@ -1,73 +1,73 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org - +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + cap program drop iegraph program define iegraph, rclass - + syntax varlist, [noconfbars BASICTItle(string) save(string) ignoredummytest confbarsnone(varlist) /// confintval(numlist min=1 max=1 >0 <1) VARLabels BAROPTions(string) norestore /// GREYscale yzero *] - + if "`restore'" == "" preserve - + qui { - + version 11 - - *Only keep the observations in the regressions + + *Only keep the observations in the regressions keep if e(sample) == 1 - + *Copy beta matrix to a regular matrix mat BETA = e(b) - + *Unabbriviate and varlists unab varlist : `varlist' - - *Testing to see if the variables used in the regressions are actual dummy variables as treatment vars need to be dummy variables. + + *Testing to see if the variables used in the regressions are actual dummy variables as treatment vars need to be dummy variables. foreach var of local varlist { - + *Get the column number from this var local colnumber = colnumb(BETA,"`var'") - + *Test if this var was omitted from the regression if "`r(label`colnumber')'" == "(omitted)" { - + *Test if that dummy is not found in the estimation matrix noi di as error "{phang}Dummy variable `var' was not included in the regression, or was omitted from it.{p_end}" - error 480 + error 480 } } - + foreach var of local varlist { - *Assigning variable coefficient/standard errors/no of obs. to scalars with the name Coeff_(`variable name') + *Assigning variable coefficient/standard errors/no of obs. to scalars with the name Coeff_(`variable name') *coeff_se_(`variable name'), obs_(`variable name'). - + *Access and store the beta value ( - scalar coeff_`var' = _b[`var'] + scalar coeff_`var' = _b[`var'] *Access and store standard errors for the dummy scalar coeff_se_`var' = _se[`var'] - + *Store the number of observations for this dummy count if `var' == 1 //Count one tmt group at the time scalar obs_`var' = r(N) - } - + } + *Test if the list of dummies are valid if "`ignoredummytest'" == "" testDums `varlist' - + *Checking to see if the noconfbars option has been used and assigning 1 and 0 based *on that to the CONFINT_BAR variable. if "`confbars'" != "" { - local CONFINT_BAR = 0 + local CONFINT_BAR = 0 } else if "`confbars'" == "" { - local CONFINT_BAR = 1 + local CONFINT_BAR = 1 } - - *Testing to see if the variables used in confbarsnone are - *actually in the list of + + *Testing to see if the variables used in confbarsnone are + *actually in the list of *variables used for the regression/graph. local varTest : list confbarsnone in varlist @@ -77,67 +77,67 @@ cap program drop iegraph noi display "" error 111 } - - - *Checking to see if the save option is used what is the extension related to it. + + + *Checking to see if the save option is used what is the extension related to it. if "`save'" != "" { - + **Find the last . in the path name and assume that * the file extension is what follows. However, with names - * that have multiple dots in it, the user has to explicitly - * specify the file name. - + * that have multiple dots in it, the user has to explicitly + * specify the file name. + **First, will extract the file names from the combination of file - * path and files names. We will use both backslash and forward slash + * path and files names. We will use both backslash and forward slash * to account for differences in Windows/Unix file paths local backslash = strpos(reverse("`save'"), "\") local forwardslash = strpos(reverse("`save'"), "/") - + ** Replacing the value of forward/back slash with the other value if one of the - * values is equal to zero. + * values is equal to zero. if `forwardslash' == 0 local forwardslash = `backslash' if `backslash' == 0 local backslash = `forwardslash' - + **Extracting the file name from the full file path by reversing and breaking the path - * at the first occurence of slash. + * at the first occurence of slash. local file_name = substr(reverse("`save'"), 1, (min(`forwardslash', `backslash')-1)) local file_name = reverse("`file_name'") **If no slashes it means that there is no file path and just a file name, so the name of the file will be * the local save. - if (`forwardslash' == 0 & `backslash' == 0) local file_name = "`save''" - + if (`forwardslash' == 0 & `backslash' == 0) local file_name = "`save''" + *Assign the full file path to the local file_suffix local file_suffix = "`file_name'" - + *Find index for where the file type suffix start - local dot_index = strpos("`file_name'",".") - + local dot_index = strpos("`file_name'",".") + local file_suffix = substr("`file_name'", `dot_index' + 1, .) - + *If no dot in the name, then no file extension if `dot_index' == 0 { local save `"`save'.gph"' local file_suffix "gph" local save_export = 0 } - - **If there is one or many . in the file path than loop over + + **If there is one or many . in the file path than loop over * the file path until we have found the last one. - + **Find index for where the file type suffix start. We are re-checking - * to see if there are any more dots than the first one. If there are, + * to see if there are any more dots than the first one. If there are, * then there needs to be an error message saying remove the dots. local dot_index = strpos("`file_suffix'",".") - + *Extract the file index - + if (`dot_index' > 0) { di as error "{pstd}File names cannot have more than one dot. Please only use the dot to separate the filename and file format.{p_end}" error 198 } - + *List of formats to which the file can be exported local nonGPH_formats png tiff gph ps eps pdf wmf emf @@ -149,33 +149,33 @@ cap program drop iegraph *If a file format suffix is specified make sure that it is one of the eight formats allowed. else if `:list file_suffix in nonGPH_formats' != 0 { local save_export = 1 - + if ("`file_suffix'" == "wmf" | "`file_suffix'" == "emf") & "`c(os)'" != "Windows" { di as error "{pstd}The file formats .wmf and .emf are only allowed when using Stata on a Windows computer.{p_end}" error 198 - } + } } - *If a different extension was used then displaying an error. + *If a different extension was used then displaying an error. else { - + di as error "{pstd}You are not using a allowed file format in save(`save'). Only the following formats are allowed: gph `nonGPH_formats'. {p_end}" error 198 } } else { - + *Save option is not used, therefore save export will not be used local save_export = 0 - + } - - + + local count: word count `varlist' // Counting the number of total vars used as treatment. local graphCount = `count' + 1 // Number of vars needed for the graph is total treatment vars plus one(control). //Make all vars tempvars (maybe do later) //Make sure that missing is properly handled - + tempvar anyTMT control egen `anyTMT' = rowmax(`varlist') gen `control' = (`anyTMT' == 0) if !missing(`anyTMT') @@ -183,40 +183,40 @@ cap program drop iegraph sum `e(depvar)' if `control' == 1 scalar ctl_N = r(N) scalar ctl_mean = r(mean) - scalar ctl_mean_sd = r(sd) + scalar ctl_mean_sd = r(sd) + - /** Calculate t-statistics **/ - + *If not set in options, use default of 95% if "`confintval'" == "" { local confintval = .95 } - + **Since we calculating each tail separetely we need to convert * the two tail % to one tail % local conintval_1tail = ( `confintval' + (1-`confintval' ) / 2) *degreeds of freedom in regression local df = `e(df_r)' - + *Calculate t-stats to be used local tstats = invt(`df' , `conintval_1tail' ) - + foreach var of local varlist { - + *Caculating confidnece interval - scalar conf_int_min_`var' = (coeff_`var'-(`tstats'*coeff_se_`var') + ctl_mean) - scalar conf_int_max_`var' = (coeff_`var'+(`tstats'*coeff_se_`var') + ctl_mean) + scalar conf_int_min_`var' = (coeff_`var'-(`tstats'*coeff_se_`var') + ctl_mean) + scalar conf_int_max_`var' = (coeff_`var'+(`tstats'*coeff_se_`var') + ctl_mean) **Assigning stars to the treatment vars. - + *Perform the test to get p-values - test `var' + test `var' local star_`var' " " - + scalar pvalue =r(p) if pvalue < 0.10 { local star_`var' "*" @@ -232,101 +232,101 @@ cap program drop iegraph } /************************************* - + Set up temp file where results are written - - *************************************/ + + *************************************/ tempfile newTextFile - tempname newHandle - cap file close `newHandle' + tempname newHandle + cap file close `newHandle' file open `newHandle' using "`newTextFile'", text write append - + *Write headers and control value file write `newHandle' /// "position" _tab "xLabeled" _tab "mean" _tab "coeff" _tab "conf_int_min" _tab "conf_int_max" _tab "obs" _tab "star" _n /// - %9.3f (1) _tab "Control" _tab %9.3f (ctl_mean) _tab _tab _tab _tab %9.3f (ctl_N) _tab _n + %9.3f (1) _tab "Control" _tab %9.3f (ctl_mean) _tab _tab _tab _tab %9.3f (ctl_N) _tab _n - tempvar newCounter + tempvar newCounter gen `newCounter' = 2 //First tmt group starts at 2 (1 is control) - + foreach var in `varlist' { - + if "`varlabels'" == "" { - + *Default is to use the varname in legend local var_legend "`var'" - + } else { - + *Option to use the variable label in the legen instead local var_legend : variable label `var' - + } - + *Writing the necessary tables for the graph to list to file. - if `: list var in confbarsnone' { + if `: list var in confbarsnone' { file write `newHandle' %9.3f (`newCounter') _tab `"`var_legend'"' /// _tab %9.3f (tmt_mean_`var') _tab %9.3f (coeff_`var') _tab _tab /// - _tab %9.3f (obs_`var') _tab "`star_`var''" _n - - replace `newCounter' = `newCounter' + 1 + _tab %9.3f (obs_`var') _tab "`star_`var''" _n + + replace `newCounter' = `newCounter' + 1 } - else { + else { file write `newHandle' %9.3f (`newCounter') _tab `"`var_legend'"' /// _tab %9.3f (tmt_mean_`var') _tab %9.3f (coeff_`var') /// _tab %9.3f (conf_int_min_`var') _tab %9.3f (conf_int_max_`var') /// - _tab %9.3f (obs_`var') _tab "`star_`var''" _n - + _tab %9.3f (obs_`var') _tab "`star_`var''" _n + replace `newCounter' = `newCounter' + 1 } } - - file close `newHandle' - + + file close `newHandle' + /************************************* - + Create the graph - + *************************************/ - + *Read file with results insheet using `newTextFile', clear - - *Defining various options to go on the graph option. - + + *Defining various options to go on the graph option. + local tmtGroupBars "" local xAxisLabels `"xlabel( "' local legendLabels "" local legendNumbers "" - + forval tmtGroupCount = 1/`graphCount' { - + ************ *Create the bar for this group - + if "`greyscale'" == "" { - colorPicker `tmtGroupCount' `graphCount' + colorPicker `tmtGroupCount' `graphCount' } else { - greyPicker `tmtGroupCount' `graphCount' + greyPicker `tmtGroupCount' `graphCount' } - - local tmtGroupBars `"`tmtGroupBars' (bar mean position if position == `tmtGroupCount', `baroptions' color("`r(color)'") lcolor(black) ) "' - + + local tmtGroupBars `"`tmtGroupBars' (bar mean position if position == `tmtGroupCount', `baroptions' color("`r(color)'") lcolor(black) ) "' + ************ - *Create labels etc. for this group + *Create labels etc. for this group - local obs = obs[`tmtGroupCount'] + local obs = obs[`tmtGroupCount'] local stars = star[`tmtGroupCount'] local legendLabel = xlabeled[`tmtGroupCount'] - + local xAxisLabels `"`xAxisLabels' `tmtGroupCount' "(N = `obs') `stars'" "' local legendLabels `"`legendLabels' lab(`tmtGroupCount' "`legendLabel'") "' - local legendNumbers `"`legendNumbers' `tmtGroupCount'"' + local legendNumbers `"`legendNumbers' `tmtGroupCount'"' } - + *Close or comple some strings local xAxisLabels `"`xAxisLabels' ,noticks labsize(medsmall)) "' local legendOption `"legend(order(`legendNumbers') `legendLabels')"' @@ -337,90 +337,90 @@ cap program drop iegraph if `CONFINT_BAR' == 0 { local confIntGraph = "" - } + } else if `CONFINT_BAR' == 1 { local confIntGraph = `"(rcap conf_int_max conf_int_min position, lc(gs)) (scatter mean position, msym(none) mlabs(medium) mlabpos(10) mlabcolor(black))"' } - + local titleOption `" , xtitle("") ytitle("`e(depvar)'") "' if "`save'" != "" { local saveOption saving("`save'", replace) } - + ******************************************************************************* *** Generating the graph axis labels for the y-zero option used.. ******************************************************************************* - + *Calculations needed if yzero used - if ("`yzero'" != "" ) { - + if ("`yzero'" != "" ) { + **Testing if yzero is applicable ******************************** - - **Yzero is only applicable if all values used in the graph - * are all negative or all postive. If there is a mix, then + + **Yzero is only applicable if all values used in the graph + * are all negative or all postive. If there is a mix, then * the yzero option will be ignored - + *Finding the min value for all values used in the graph gen row_minvalue = min(mean, conf_int_min, conf_int_max) sum row_minvalue local min_value `r(min)' - - *Finding the min value for all values used in the graph + + *Finding the min value for all values used in the graph gen row_maxvalue = max(mean , conf_int_max, conf_int_min) - sum row_maxvalue + sum row_maxvalue local max_value `r(max)' - + *Locals used for logic below noi di "local signcheck = ((`r(max)' * `r(min)') >= 0) " local signcheck = ((`max_value' * `min_value') >= 0) // dummy local for both signs the same (positive or negative) local negative = (`max_value' <= 0) // dummy for max value still negative (including 0) - - **If yzero() is used and min and max does not have + + **If yzero() is used and min and max does not have * the same sign, then the yzero() is not applicable. - + if (`signcheck' == 0 ) { - + **** yzero is NOT applicable and will be ignored - ************************************************* - + ************************************************* + noi di "{pstd}{error:WARNING:} Option yzero will be ignored as the graph has values both on the the positve and negative part of the y-axis. This only affects formatting of the graph. See helpfile for more details.{p_end}" } else { - + **** yzero is applicable and will be used - ***************************************** - + ***************************************** + *Get max value if only postive values - if (`negative' == 0) { - + if (`negative' == 0) { + sum row_maxvalue local absMax = `max_value' } - + *Get absolute min (will convert back below) if only negative values else { - + sum row_minvalue local absMax = abs(`min_value') } - + *Rounded up to the nearest power of ten - local logAbsMax = ceil(log10(`absMax')) + local logAbsMax = ceil(log10(`absMax')) local absMax = 10 ^ (`logAbsMax') - + *Generating quarter value for y-axis markers. local quarter = (`absMax') / 4 - - **Constuct the option to be applied to + + **Constuct the option to be applied to * the graph using the values calculated - if (`negative' == 0) { - + if (`negative' == 0) { + local yzero_option ylabel(0(`quarter')`absMax') } else { - + local absMax = `absMax' * (-1) //Convert back to negative local yzero_option ylabel(`absMax'(`quarter')0) } @@ -430,48 +430,48 @@ cap program drop iegraph ******************************************************************************* ***Graph generation based on if the option save has a export or a save feature. ******************************************************************************* - + *Store all the options in one local local commandline `" `tmtGroupBars' `confIntGraph' `titleOption' `legendOption' `xAxisLabels' title("`basictitle'") `yzero_option' `options' "' - + *Error message used in both save-option cases below. local graphErrorMessage `" Something went wrong while trying to generate the graph. Click {stata di r(cmd) :display graph options } to see what graph options iegraph used. This can help in locating the source of the error in the command. "' - + if `save_export' == 0 { - + *Generate a return local with the code that will be used to generate the graph return local cmd `"graph twoway `commandline' `saveOption'"' - + *Generate the graph cap graph twoway `commandline' `saveOption' - + *If error, provide error message and then run the code again allowing the program to crash - if _rc { - + if _rc { + di as error "{pstd}`graphErrorMessage'{p_end}" graph twoway `commandline' `saveOption' } } else if `save_export' == 1 { - + *Generate a return local with the code that will be used to generate the graph return local cmd `"graph twoway `commandline'"' - + *Generate the graph cap graph twoway `commandline' - + *If error, provide error message and then run the code again allowing the program to crash - if _rc { - + if _rc { + di as error "{pstd}`graphErrorMessage'{p_end}" graph twoway `commandline' } - + *Export graph to preferred option graph export "`save'", replace - - } - + + } + if "`restore'" == "" restore } @@ -484,11 +484,11 @@ end ******************************************* cap program drop colorPicker program define colorPicker , rclass - - args groupCount totalNumGroups - + + args groupCount totalNumGroups + if `totalNumGroups' == 2 { - + if `groupCount' == 1 return local color "215 25 28" if `groupCount' == 2 return local color "43 123 182" } @@ -499,14 +499,14 @@ end if `groupCount' == 3 return local color "43 123 182" } else if `totalNumGroups' == 4 { - + if `groupCount' == 1 return local color "215 25 28" if `groupCount' == 2 return local color "255 255 191" if `groupCount' == 3 return local color "171 217 233" if `groupCount' == 4 return local color "43 123 182" } else { - + *For five or more colors we repeat the same pattern local colourNum = mod(`groupCount', 5) @@ -515,62 +515,56 @@ end if `colourNum' == 3 return local color "255 255 191" if `colourNum' == 4 return local color "171 217 233" if `colourNum' == 0 return local color "43 123 182" - + } - + end - + ******************************************* ******************************************* ******* Greyscale Option ******* - ******* Colour Picker ******* + ******* Colour Picker ******* ******************************************* ******************************************* cap program drop greyPicker program define greyPicker , rclass - - args groupCount totalNumGroups - + + args groupCount totalNumGroups + if `groupCount' == 1 { - + return local color "black" } else if `groupCount' == 2 & `totalNumGroups' <= 3 { - + return local color "gs14" } else { - - local grayscale = round( (`groupCount'-1) * (100 / (`totalNumGroups'-1) )) - + + local grayscale = round( (`groupCount'-1) * (100 / (`totalNumGroups'-1) )) + return local color "`grayscale' `grayscale' `grayscale' `grayscale'" } - + end - + ******************************************* ******************************************* ******* Test if valid ******* - ******* dummies ******* + ******* dummies ******* ******************************************* ******************************************* - + cap program drop testDums program define testDums - + unab dumlist : `0' - - *What we know: - * No all same values in variable (would have been dropped in regression and we test that it is in the regression) - - *Test: all values dummies (missing would have been excluded in regression and we keep if e(sample) - + foreach dumvar of varlist `dumlist' { - - *tab `dumvar', m - + + *Test: all values dummies (missing would have been excluded in regression and we keep if e(sample) cap assert inlist(`dumvar',0,1) if _rc { noi display as error "{phang} The variable `dumvar' is not a dummy. Treatment variable needs to be a dummy (0 or 1) variable. {p_end}" @@ -578,74 +572,91 @@ end error 149 } } - + + /*What we are testing for below: + - We count the number of dummies that each obervation has the value 1 for. + - The count numbers must either fit the case of diff-in-diff or the case of regression with one dummy for each treatment arms + + Regular regression with one dummy for each treatment arm + - Some observations don't have 1 for any dummy - omitted control observations + - No observation has the value 1 in more than one observation - can't be in more than one treatment group + - No traetment group can have no observation with value 1 for that dummy + + Diff-in-Diff + - Some observations don't have 1 for any dummy - omitted controls observations in time = 0 + - Some observation must have value 1 for only the treatment dummy - treatment observations in time = 0 + - Some observation must have value 1 for only the time dummy - control observations in time = 1 + - Some observation must have value 1 for in all three of time, treatment and interaction dummy - treatment observations in time = 1 + + */ + *Count how many dummies is 1 for each observation tempvar dum_count egen `dum_count' = rowtotal(`dumlist') - + *Exactly one dummy is 1, meaning this observation is in one of the treatment arms count if `dum_count' == 1 local dum_count_1 `r(N)' - + *No dummies is 1, meaning this observation is control count if `dum_count' == 0 local dum_count_0 `r(N)' *Exactly 3 dummies are three. Only allowed in the exact case of diff-and-diff regressions count if `dum_count' == 3 - local dum_count_3 `r(N)' - + local dum_count_3 `r(N)' + *Exactly 2 or more than three is never correct. count if `dum_count' == 2 | `dum_count' > 3 local dum_count_2orgt3 `r(N)' - + *Test that there is at least some treatment observations if `dum_count_0' == 0 noi di as error "{phang} There are no control observations. One category must be omitted and it should be the omitted category in the regression. The omitted category will be considerd the control group. See helpfile for more info. Disable this test by using option ignoredummytest.{p_end}" - if `dum_count_0' == 0 error 480 - + if `dum_count_0' == 0 error 480 + *Test that there is at least some control observations (this error should be caught by dummies omitted in the regression) if `dum_count_1' == 0 noi di as error "{phang} There are no treatment observations. None of the dummies have observations for which the dummy has the value 1. See helpfile for more info. Disable this test by using option ignoredummytest.{p_end}" if `dum_count_1' == 0 error 480 - + *Test if there are any observations that have two or more than three dummies that is 1 if `dum_count_2orgt3' > 0 noi di as error "{phang} There is overlap in the treatment dummies. The dummies must be mutually exclusive meaning that no observation has the value 1 in more than one treatment dummy. The exception is when you use a diff-and-diff, but this dummies is not a valid diff and diff. See helpfile for more info. Disable this test by using option ignoredummytest.{p_end}" if `dum_count_2orgt3' > 0 error 480 - - *After passing the previous two steps, test if there are cases that are only allowed in diff - if `dum_count_3' > 0 { - + + *After passing the previous two steps, test if there are cases that are only allowed in diff + if `dum_count_3' > 0 { + *Diff-and-diff must have exactly 3 dummies if `:list sizeof dumlist' != 3 noi di as error "{phang} There is overlap in the treatment dummies. The dummies must be mutually exclusive meaning that no observation has the value 1 in more than one treatment dummy. The exception is when you use a diff-and-diff, but this dummies is not a valid diff and diff. See helpfile for more info. Disable this test by using option ignoredummytest.{p_end}" if `:list sizeof dumlist' != 3 error 480 - - * Test if valid diff-diff + + * Test if valid diff-diff testDumsDD `dum_count' `dumlist' } - + end - + cap program drop testDumsDD program define testDumsDD - + local dum_count `1' - + **Test that for only two of three dummies there are observations - * that has only that dummy. I.e. the two that is not the + * that has only that dummy. I.e. the two that is not the * interaction. If the interaction is 1, all three shluld be 1. - + *Count how many dummies the condition is above applies to local counter 0 - + *Loop over all dummies forvalues i = 2/4 { - + *Test the number count if ``i'' == 1 & `dum_count' == 1 if `r(N)' > 0 local ++counter - + } *Count that exactly two dummies fullfilledthe condition - if `counter' != 2 noi di as error "{phang} There is overlap in the treatment dummies. The dummies must be mutually exclusive meaning that no observation has the value 1 in more than one treatment dummy. The exception is when you use a diff-and-diff, but this dummies is not a valid diff and diff. See helpfile for more info. Disable this test by using option ignoredummytest.{p_end}"" + if `counter' != 2 noi di as error "{phang} There is overlap in the treatment dummies. The dummies must be mutually exclusive meaning that no observation has the value 1 in more than one treatment dummy. The exception is when you use a diff-and-diff, but this dummies is not a valid diff and diff. See helpfile for more info. Disable this test by using option ignoredummytest.{p_end}"" if `counter' != 2 error 480 - + end diff --git a/src/ado_files/iematch.ado b/src/ado_files/iematch.ado index c6752ffa..2b55f06d 100644 --- a/src/ado_files/iematch.ado +++ b/src/ado_files/iematch.ado @@ -1,15 +1,16 @@ -*! version 5.4 15DEC2017 DIME Analytics lcardosodeandrad@worldbank.org +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org cap program drop iematch program define iematch - + syntax [if] [in] , /// GRPdummy(varname) /// MATCHvar(varname) /// [ /// IDvar(varname) /// m1 /// - maxdiff(numlist max = 1 min = 1) /// + maxdiff(numlist >0 max = 1 min = 1) /// + maxmatch(numlist >0 integer max = 1 min = 1) /// seedok /// MATCHIDname(string) /// MATCHDIffname(string) /// @@ -21,51 +22,51 @@ ***** qui { - + * Set version version 11.0 - + ******************************** * * Gen temp sort and merge var * ******************************** - + tempvar originalSort ifinvar - - **Generate a variable that is used + + **Generate a variable that is used * to restore original sort. gen `originalSort' = _n preserve - - * String used to store why and how + + * String used to store why and how * many observations are excluded. local obsexcludstring "" - + *Dummy local that is 1 if replace is used if "`replace'" == "" local REPLACE_USED 0 if "`replace'" != "" local REPLACE_USED 1 - - *Deal with if and in and display info. + + *Deal with if and in and display info. if "`if'`in'" != "" { - + * Gen dummy that is 1 when if/in is true. Then - * count when this dummy is not 1. Ther is no way + * count when this dummy is not 1. Ther is no way * to negate `if'`in' gen `ifinvar' = 1 `if'`in' count if `ifinvar' != 1 if `r(N)' > 0 { - + *This is not an error just outputting the number local obsexcludstring "`obsexcludstring'`r(N)' observation(s) were excluded in {inp:if}/{inp:in}: (`if'`in').{break}" - + } - + *Drop variables excluded by `if'`in' keep `if'`in' } - + ******************************** * * Checking ID var (and create if needed) @@ -81,14 +82,14 @@ *Make sure that the default name _ID is not already used cap confirm variable `idvar' if _rc == 0 { - + *Test if option replace is used if `REPLACE_USED' == 1 { *Replace option is used, drop the variable with the name specified drop `idvar' } else { - *Replace is not used, throw an error. + *Replace is not used, throw an error. di as error "{pstd}A variable with name `idvar' is already defined. Either drop this variable manually, use the replace option or specify a variable using idvar() that fully and uniquely identifies the data set.{p_end}" error 110 } @@ -97,8 +98,8 @@ *Generate the ID var from row number gen `idvar' = _n } - - *Make sure that idvar is uniquelly and fully identifying + + *Make sure that idvar is uniquelly and fully identifying cap isid `idvar' if _rc != 0 { @@ -106,20 +107,20 @@ error 450 } - *local indicating assume ID is string - local IDtypeNumeric 0 - + *local indicating assume ID is string + local IDtypeNumeric 0 + *Change local to 1 if ID actally string cap confirm numeric variable `idvar' if _rc == 0 local IDtypeNumeric 1 - + ******************************** * * Checking duplicates in variable names in options * ******************************** - + *Create a local of all varnames used in any option local allnewvars `idvar' `matchidname' `matchdiffname' `matchresultname' `matchcountname' @@ -132,28 +133,31 @@ noi di as error "{pstd}The variable name(s) [`dupnewvars'] was used twice or more in the options that manually name the outcome varaibles. Go back and make sure that no name is used more than once.{p_end}" error 198 - } + } ******************************** * * Test the names used for output vars * ******************************** - - iematchMatchVarCheck _matchResult `REPLACE_USED' "`matchresultname'" + + iematchMatchVarCheck _matchResult `REPLACE_USED' "`matchresultname'" local matchResultName "`r(validVarName)'" - + iematchMatchVarCheck _matchID `REPLACE_USED' "`matchidname'" local matchIDname "`r(validVarName)'" - + iematchMatchVarCheck _matchDiff `REPLACE_USED' "`matchdiffname'" local matchDiffName "`r(validVarName)'" - + if "`m1'" != "" { iematchMatchVarCheck _matchCount `REPLACE_USED' "`matchcountname'" local matchCountName "`r(validVarName)'" } - + + *List of output vars used to delete vars that should be replaced by new output vars, used after restore + local outputNames `matchResultName' `matchIDname' `matchDiffName' `matchCountName' + *Option matchcountname() is not allowed if it is not a many-one match if "`matchcountname'" != "" & "`m1'" == "" { di as error "{pstd}Option {inp:matchcountname()} is only allowed in combination with option {inp:m1}.{p_end}" @@ -165,43 +169,43 @@ * Create and label output vars * ******************************** - + * MATCH RESULT VAR label define matchLabel 0 "Not Matched" 1 "Matched" .i "Obs excluded due to if/in" /// .g "Missing value in `grpdummy'" .m "Missing value in `matchvar'" /// - .d "No match within maxdiff", replace - + .d "No match within maxdiff" .t "No more eligible target obs", replace + gen `matchResultName' = . label variable `matchResultName' "Matched obs = 1, not mathched = 0, all other different missing values" label value `matchResultName' matchLabel - + * MATCH ID VAR if `IDtypeNumeric' == 1 gen `matchIDname' = . //Main ID var is numeric if `IDtypeNumeric' == 0 gen `matchIDname' = "" //Main ID var is string if "`m1'" != "" label variable `matchIDname' "The ID of the target var in each matched group" //If many to one - if "`m1'" == "" label variable `matchIDname' "The ID of the target var in each matched pair" //If one to one - + if "`m1'" == "" label variable `matchIDname' "The ID of the target var in each matched pair" //If one to one + * MATCH DIFF VAR gen `matchDiffName' = . if "`m1'" != "" label variable `matchDiffName' "The difference in matchvar() between base obs and the target obs it matched with" //If many to one if "`m1'" == "" label variable `matchDiffName' "The difference in matchvar() between base and target obs in each pair" //If one to one - + *Match count, only many-one if "`m1'" != "" { gen `matchCountName' = . label variable `matchCountName' "The number of base obs this target obs matched with" } - + ******************************** * * Keep only relevant vars * ******************************** - + *Keep only input vars and output vars keep `grpdummy' `idvar' `matchvar' `originalSort' `matchResultName' `matchIDname' `matchDiffName' `matchCountName' - + ******************************** * * Checking group dummy @@ -215,18 +219,18 @@ di as error "{pstd}The variable in grpdummy(`grpdummy') is not a dummy variable. The variable is only allowed to have the values 1, 0 or missing. Observations with missing varaibles in the grpdummy are ignored by this command.{p_end}" error _rc } - + ********** **Exclude obs with missing value in groupdummy *Count number of obs to be dropped and and output that number if more than zero count if missing(`grpdummy') if `r(N)' > 0 { - + *Prepare the local to be outputted with info on observations excluded local obsexcludstring "`obsexcludstring'`r(N)' observation(s) were excluded due to missing value in grpdummy(`grpdummy').{break}" } - + *Drop obs with missing value in groupvar drop if missing(`grpdummy') @@ -245,33 +249,33 @@ error 109 } - **All variables with value 1 or 0 in the group dummy and - * not excluded by if/in must have a value in matchvar + **All variables with value 1 or 0 in the group dummy and + * not excluded by if/in must have a value in matchvar count if missing(`matchvar') if `r(N)' > 0 { - + *Prepare the local to be outputted with info on observations excluded - local obsexcludstring "`obsexcludstring'`r(N)' observation(s) were excluded due to missing value in matchvar(`matchvar').{break}" + local obsexcludstring "`obsexcludstring'`r(N)' observation(s) were excluded due to missing value in matchvar(`matchvar').{break}" } - + *Drop obs with missing values in match var drop if missing(`matchvar') - - + + ******************************** * * Checking match var and group dummy are unique * - ******************************** - + ******************************** + if "`seedok'" == "" { - - **test that there are no duplicates in match + + **test that there are no duplicates in match * var within each type of observation cap isid `matchvar' `grpdummy' - + if _rc != 0 { - + *Output the error message noi di as error "{pstd}There are base observations or target observations {...}" noi di as error "with duplicate values in matchvar(`matchvar'). To guarantee {...}" @@ -279,49 +283,62 @@ noi di as error "this error message after you have set a the seed, or if a {...}" noi di as error "replicable match is not important to you, use option {inp:seedok}{p_end}" error 198 - + } } - + ******************************** * - * Checking that there are more target + * Checking that there are more target * obs than base obs in a 1-to-1 match * - ******************************** - + ******************************** + if "`m1'" == "" { - + *Count number of base vars count if `grpdummy' == 1 local numBaseObs `r(N)' - + *Count number of target vars count if `grpdummy' == 0 local numTrgtObs `r(N)' - + *Test that there are more target - cap assert `numTrgtObs' >= `numBaseObs' - + cap assert `numTrgtObs' >= `numBaseObs' + if _rc != 0 { - + noi di as error "{pstd}There are more base observations than target observations. This is not allowed in a one-to-one match. See option {inp:m1} for an alternative matching where it is allowed to have more base observations than target ovbservations.{p_end}" error _rc } - } + } + + ******************************** + * + * Checking that matchCount is only used with + * many-to-one matching + * + ******************************** + + if "`m1'" == "" & "`maxmatch'" != "" { + + noi di as error "{pstd}The option {inp:maxmatch()} can only be used when option {inp:m1} is used, as restricting to a maximum number of matches is only applicable in a many-to-one match.{p_end}" + error 198 + } ******************************** * * Output exclude string * - ******************************** - - *The exclude string is created above but only displayed - *after all testing of input is done. - + ******************************** + + *The exclude string is created above but only displayed + *after all testing of input is done. + if "`obsexcludstring'" != "" { - + noi di "" noi di "{hline}" noi di "" @@ -333,8 +350,8 @@ ******************************** * Start matching ******************************** - - + + ******************************** * * Creating tempvar used in matching @@ -342,19 +359,24 @@ ******************************** *Initiate the temporary variables used by this command - tempvar prefID prefDiff matched + tempvar prefID prefDiff matched matchcount maxmatchprefid gen `prefDiff' = . gen byte `matched' = 0 *Allow the ID var used to be string if `IDtypeNumeric' == 1 { - gen `prefID' = . + gen `prefID' = . + gen `maxmatchprefid' = . } else { - gen `prefID' = "" - } - + gen `prefID' = "" + gen `maxmatchprefid' = "" + } + + *Gen a variable that indicates for target vars if the max match is reached + gen `matchcount' = . + ** Generate the inverse of the matchvar to sort descending (gsort is too slow), * a random var to seperate two values with the same match var, and the inverse @@ -371,28 +393,28 @@ *Tempvars for matching *Diffvars, they are always numeric - tempvar diffup diffdo valUp_0 valDo_0 valUp_1 valDo_1 + tempvar diffup diffdo valUp_0 valDo_0 valUp_1 valDo_1 local updownTempVars `diffup' `diffdo' `valUp_0' `valDo_0' `valUp_1' `valDo_1' foreach tempVar of local updownTempVars { gen `tempVar' = . } - + *ID vars, allowed to be both numeric and string tempvar IDup IDdo IDup_0 IDdo_0 IDup_1 IDdo_1 local updownIDTempVars `IDup' `IDdo' `IDup_0' `IDdo_0' `IDup_1' `IDdo_1' foreach tempVar of local updownIDTempVars { - + *Allow the ID var used to be string if `IDtypeNumeric' == 1 { gen `tempVar' = . } else { gen `tempVar' = "" - } - } + } + } *************************** * @@ -406,28 +428,28 @@ noi di "" noi di "{hline}{break}" noi di "{pstd}{ul:Matching one-to-one. Base observations left to match:}{p_end}" - count if `grpdummy' == 1 & `matched' == 0 - + *Create local to display "obs left to match" and to use in while loop + count if `grpdummy' == 1 & `matched' == 0 local left2Match = `r(N)' noi di "{pstd}`left2Match' " _c - + *Match until no more observations to match. while (`left2Match' > 0) { - + **For all observations still to be matched, assign the preferred * match among the other unmatched observations qui updatePrefDiffPreffID `prefID' `prefDiff' `matchvar' `invsort' `idvar' `grpdummy' `matched' `rand' `invrand' `updownTempVars' `updownIDTempVars' - + *Restrict matches to within maxdiff() if that option is used. if "`maxdiff'" != "" { - + *Omit base observation from matching if diff is to big replace `matched' = 1 if `prefDiff' > `maxdiff' & `grpdummy' == 1 - + *Indicate in result var that this obs did not have valid match within maxdiff() replace `matchResultName' = .d if `prefDiff' > `maxdiff' & `grpdummy' == 1 - + *Removed preferred match if `IDtypeNumeric' == 1 { *IDvar is numeric @@ -436,9 +458,9 @@ else { *IDvar is string replace `prefID' = "" if `prefDiff' > `maxdiff' - } + } } - + *If two observations mutually prefer each other, then indicate both of them as matched. replace `matched' = 1 if `matched' == 0 & `prefID' == `idvar'[_n-1] & `prefID'[_n-1] == `idvar' replace `matched' = 1 if `matched' == 0 & `prefID' == `idvar'[_n+1] & `prefID'[_n+1] == `idvar' @@ -449,17 +471,21 @@ noi di "`left2Match' " _c } - + *End formatting for the "base obs left to match" noi di "{p_end}" _c } - *************************** * - * Many to one match + * Many to one match with no restriction on # matches * *************************** - else { + else if "`maxmatch'" == "" { + + *Start outputting the countdown + noi di "" + noi di "{hline}{break}" + noi di "{pstd}{ul:Matching many-to-one.}{p_end}" **For all observations to be matched, assign the preferred * match among the other unmatched observations @@ -467,10 +493,10 @@ *Restrict matches to within maxdiff() if that option is used. if "`maxdiff'" != "" { - + *Indicate in result var that this obs did not have valid match within maxdiff() replace `matchResultName' = .d if `prefDiff' > `maxdiff' & `grpdummy' == 1 - + *Removed preferred match if `IDtypeNumeric' == 1 { *IDvar is numeric @@ -479,36 +505,136 @@ else { *IDvar is string replace `prefID' = "" if `prefDiff' > `maxdiff' - } + } } - + *Assign it's own ID as pref ID for all target vars replace `prefID' = `idvar' if `grpdummy' == 0 - - * Replace the _matchCount var with number of base observations in each - * match group. Each group is all base observation plus the target + + * Replace the _matchCount var with number of base observations in each + * match group. Each group is all base observation plus the target * observation, therefore (_N - 1) bys `prefID' : replace `matchCountName' = _N - 1 if !missing(`prefID') - - **Replace prefID to missing for target obs that had no base + **Replace prefID to missing for target obs that had no base * obs matched to it. T - if `IDtypeNumeric' == 1 { + if `IDtypeNumeric' == 1 { *IDvar is numeric replace `prefID' = . if `matchCountName' == 0 } else { *IDvar is string replace `prefID' = "" if `matchCountName' == 0 - } - + } + *Only target obs with base obs prefering it are matched replace `matched' = 1 if `matchCountName' != 0 - + *Remove values for target obs that were not matched replace `matchCountName' = . if `matchCountName' == 0 + } + *************************** + * + * Many to one match with restriction on # matches + * + *************************** + else { + + noi di "max count" + *pause + + *Start outputting the countdown + noi di "" + noi di "{hline}{break}" + noi di "{pstd}{ul:Matching many-to-one. Base observations left to match:}{p_end}" + + *Create local to display "obs left to match" and to use in while loop + count if `grpdummy' == 1 & `matched' == 0 + local left2Match = `r(N)' + noi di "{pstd}`left2Match' " _c + + *Match until no more observations to match. + while (`left2Match' > 0) { + + **For all observations to be matched, assign the preferred + * match among the other unmatched observations + updatePrefDiffPreffID `prefID' `prefDiff' `matchvar' `invsort' `idvar' `grpdummy' `matched' `rand' `invrand' `updownTempVars' `updownIDTempVars' + + *Restrict matches to within maxdiff() if that option is used. + if "`maxdiff'" != "" { + + *Omit base observation from matching if diff is to big + replace `matched' = 1 if `prefDiff' > `maxdiff' & `grpdummy' == 1 + *Indicate in result var that this obs did not have valid match within maxdiff() + replace `matchResultName' = .d if `prefDiff' > `maxdiff' & `grpdummy' == 1 + + *Removed preferred match + if `IDtypeNumeric' == 1 { + *IDvar is numeric + replace `prefID' = . if `prefDiff' > `maxdiff' + } + else { + *IDvar is string + replace `prefID' = "" if `prefDiff' > `maxdiff' + } + } + + *If a base observation is mutually preferred by a target observation + replace `matched' = 1 if `grpdummy' == 1 & `matched' == 0 & `prefID' == `idvar'[_n-1] & `prefID'[_n-1] == `idvar' + replace `matched' = 1 if `grpdummy' == 1 & `matched' == 0 & `prefID' == `idvar'[_n+1] & `prefID'[_n+1] == `idvar' + + *Set maxmatchprefid to the matched id for matched base obs, and its own id for target obs + replace `maxmatchprefid' = `prefID' if `grpdummy' == 1 & `matched' == 1 + replace `maxmatchprefid' = `idvar' if `grpdummy' == 0 + + *By the maxmatchprefid, count how many base observations (all obs + * minus target) that are matched to this target obs + bys `maxmatchprefid' : replace `matchcount' = _N - 1 if `matchResultName' != .d + + *Set the target obs as matched if the max match count is reached (matching one at the time so no risk of overstepping) + replace `matched' = 1 if `grpdummy' == 0 & `matchcount' == (`maxmatch') & `matchResultName' != .d + + *Update local to display "obs left to match" and to use in while loop + count if `grpdummy' == 1 & `matched' == 0 + local left2Match = `r(N)' + noi di "`left2Match' " _c + + *Test that there are still target obs left to match against + count if `grpdummy' == 0 & `matched' == 0 + if `r(N)' == 0 { + + *set left to match to -1 to exit while loop and to output message after loop + local left2Match = -1 + + *Set all unmatched base obs to .t (no more eligible target obs) + replace `matchResultName' = .t if `grpdummy' == 1 & `matched' == 0 + } + + } + + *End the outputted count down on how many observations left to match + noi di "{p_end}" _c + + *If ran out of target out put that + if `left2Match' == -1 { + + noi di "" + noi di "" + noi di "{pstd}No more target obs to match{p_end}" + } + + *Set all target obs with at least 1 matched base obs as matched + replace `matched' = 1 if `grpdummy' == 0 & `matchcount' >= 1 & `matchResultName' != .d + + *Remove match diff for target obs (does not make sense with mutliple obs) + replace `prefDiff' = . if `grpdummy' == 0 + + *Set the match count output var to the number of matched base obs + replace `matchCountName' = `matchcount' if `matched' == 1 & `matchResultName' != .d + + } *Update all return vars replace `matchDiffName' = `prefDiff' if `matched' == 1 @@ -517,10 +643,10 @@ *Remove the best match value in obs that did not have a match within maxdiff() replace `matchDiffName' = . if `matchResultName' == .d - + *Matched observations are give value 1 in result var replace `matchResultName' = 1 if `matched' == 1 & `matchResultName' != .d - + *Target obs not used replace `matchResultName' = 0 if `matched' == 0 & `grpdummy' == 0 @@ -535,35 +661,44 @@ *************************** * - * Merge match results to + * Merge match results to * original data and assign * remaining missing values * to the result var * - *************************** + *************************** + + **Drop any vars with same name as the output vars. The command has already + * tested that replace was used if variable already exists. If variable does + * not exist nothing is done + foreach outputVar of local outputNames { + + *Drop vars with same name as output vars. + cap drop `outputVar' + } *Merge the results with the original data - tempvar mergevar - merge 1:1 `originalSort' using `mergefile', gen(`mergevar') - - *remaining missing values are listed in ascending order of importance. + tempvar mergevar + merge 1:1 `originalSort' using `mergefile', gen(`mergevar') + + *remaining missing values are listed in ascending order of importance. *Meaning that if a variable is both .m and .i then it will be .i as it *is assigned afterwards below. *Missing matching var replace `matchResultName' = .m if missing(`matchvar') - + *Msising dummy var replace `matchResultName' = .g if missing(`grpdummy') - + *Excluded in if/in if "`if'`in'" != "" { - + tempvar ifinvar gen `ifinvar' = 1 `if'`in' replace `matchResultName' = .i if `ifinvar' != 1 } - + *comress the variables generated compress `matchIDname' `matchDiffName' `matchResultName' `matchCountName' @@ -571,7 +706,7 @@ noi outputTable `matchResultName' `grpdummy' *Restore the oridinal sort - sort `originalSort' + sort `originalSort' } @@ -582,7 +717,7 @@ end program define iematchMatchVarCheck , rclass args defaultName replace_used userName - + if "`defaultName'" == "_matchID" local optionName matchidname if "`defaultName'" == "_matchDiff" local optionName matchdiffname if "`defaultName'" == "_matchResult" local optionName matchresult @@ -605,7 +740,7 @@ end noi di as error "{pstd}The new name specified in `optionName'(`userName') is not allowed to be _ID, `Oth1', `Oth2', or `Oth3'{p_end}" error 198 } - + *Test if name is set manually if "`userName'" != "" { *Use the manually set name @@ -615,19 +750,19 @@ end *No manually set name, use deafult name local validMatchVarname `defaultName' } - + *Make sure that the manually entered name is not already used cap confirm variable `validMatchVarname' if _rc == 0 { - + if `replace_used' == 1 { //Replace is used, drop the old var drop `validMatchVarname' } else { - + *Replace is not used. Prepare an error message and throw error - + if "`userName'" != "" { *Error message for user specified name s local nameErrorString "The variable name specified in `optionName'(`userName')" @@ -636,8 +771,8 @@ end *Error message for user specified name s local nameErrorString "A variable with name `validMatchVarname'" } - - *Trow error + + *Throw error noi di as error "{pstd}`nameErrorString' is already defined in the data set. Either drop this variable, use the replace option or specify a another variable name using `optionName'()." error 110 @@ -715,13 +850,13 @@ end program define updateBestValID args grpvl matchval idvar grpvar matched bestVal bestID - - *Test if ID var is string or numeric + + *Test if ID var is string or numeric local IDNumeric 0 - cap confirm numeric variable `idvar' + cap confirm numeric variable `idvar' if _rc == 0 local IDNumeric 1 - - + + *Reset all values replace `bestVal' = . if `IDNumeric' == 1 replace `bestID' = . @@ -740,54 +875,55 @@ end - ** This function creates the output table. + ** This function creates the output table. cap program drop outputTable - program define outputTable - + program define outputTable + qui { - args resultVar grpdum - - *List the texts used in the first column in the + args resultVar grpdum + + *List the texts used in the first column in the local text_1 " 1 Matched" local text_0 " 0 Not matched" local text_d ".d No match within maxdiff()" local text_i ".i Excluded using if/in" local text_g ".g Missing grpdummy()" - local text_m ".m Missing matchvar()" - - *The minimum width of the - local firstColWidth = max(strlen(" `resultVar'") , strlen(" value and result")) + local text_m ".m Missing matchvar()" + local text_t ".t No more target obs" + + *The minimum width of the + local firstColWidth = max(strlen(" `resultVar'") , strlen(" value and result")) + + levelsof `resultVar' , missing local(resultsUsed) - levelsof `resultVar' , missing local(resultsUsed) - foreach result of local resultsUsed { - + *Remove the . from the local to match the string local result = subinstr("`result'",".","",1) - - * Test if the text in the first column is longer + + * Test if the text in the first column is longer * for this row than previous row local firstColWidth = max(`firstColWidth' , strlen("`text_`result''")) } - - * Create a local that indicates if there is column + + * Create a local that indicates if there is column * for obs with missing value in tmt dummy local missingGrpDum 0 count if missing(`grpdum') if `r(N)' > 0 local missingGrpDum 1 - - + + *Set all locals that determins column width - + local C1 "{col 4}{c |}" - + di "`firstColWidth'" - - local col1width = `firstColWidth' + 1 - local hli1 "{hline `col1width'}" + + local col1width = `firstColWidth' + 1 + local hli1 "{hline `col1width'}" local cen1 "{center `col1width':" - - local col2border = `firstColWidth' + 6 + + local col2border = `firstColWidth' + 6 local C2 "{col `col2border'}{c |}" local C2a "{col `col2border'}" @@ -795,112 +931,109 @@ end local C3 "{col `col3border'}{c |}" local col4border = `firstColWidth' + 30 - local C4 "{col `col4border'}{c |}" - + local C4 "{col `col4border'}{c |}" + local grpdumCentre 23 //Overspills automatically - + local lastT "{c RT}" local lastTdown "{c RT}" local lastC "{c BRC}" - + if `missingGrpDum' { - + local lastT "{c +}{hline 9}{c RT}" local lastTdown "{c BT}{hline 9}{c RT}" local lastC "{c BT}{hline 9}{c BRC}" local missTitle " missing " local grpdumCentre 33 - + } - + noi di "" noi di "" noi di "{hline}" noi di "" noi di "{pstd}{ul:Output of Matching Result:}{p_end}" noi di "" - noi di "{col 5}`cen1'`resultVar'}`C2a' {centre `grpdumCentre':`grpdum' }" + noi di "{col 5}`cen1'`resultVar'}`C2a' {centre `grpdumCentre':`grpdum' }" noi di "{col 4}{c LT}`hli1'{c +}{hline `grpdumCentre'}{c RT}" noi di "`C1'`cen1'value and result}`C2' 1 (base) 0 (target) `missTitle'{c |}" noi di "{col 4}{c LT}`hli1'{c +}{hline 10}{c +}{hline 12}`lastT'" - + *This is the roworder for the output table - local resultTableOrder 1 0 .d .i .g .m - + local resultTableOrder 1 0 .d .t .i .g .m + foreach result of local resultTableOrder { - + *Test if this result was used if `:list result in resultsUsed' { - + *Count base observations in this result type qui count if `resultVar' == `result' & `grpdum' == 1 local numBase = trim("`: display %16.0gc `r(N)''") - + *Count target observations in this result type qui count if `resultVar' == `result' & `grpdum' == 0 - local numTarg = trim("`: display %16.0gc `r(N)''") - + local numTarg = trim("`: display %16.0gc `r(N)''") + *Count observations that are neither base or target in this result type if `missingGrpDum' { - + qui count if `resultVar' == `result' & missing(`grpdum') local numMiss = trim("`: display %16.0gc `r(N)''") - + local missCol "{ralign 9 :`numMiss' }{c |}" - + } - + *Prepare the local that displays the columns with numbers local numCols "`C2'{ralign 10 :`numBase' }`C3'{ralign 12 :`numTarg' }`C4'`missCol'" - + *Remove the . from the local to match the string local result = subinstr("`result'",".","",1) - + *Output the table row noi di "`C1'`text_`result''`numCols'" - - } + + } } - + *Output line before N per group row noi di "{col 4}{c LT}`hli1'{c +}{hline 10}{c +}{hline 12}`lastT'" - + *Caculate N per group row for base observations qui count if `grpdum' == 1 local numBase = trim("`: display %16.0gc `r(N)''") - + *Caculate N per group row for target observations qui count if `grpdum' == 0 local numTarg = trim("`: display %16.0gc `r(N)''") - - + + *Caculate N per group row for observations neither base or target if `missingGrpDum' { - + qui count if missing(`grpdum') local numMiss = trim("`: display %16.0gc `r(N)''") - + local missCol "{ralign 9 :`numMiss' }{c |}" - - } - + + } + *Output N per group row noi di "`C1'`cen1'N per group}`C2'{ralign 10 :`numBase' }`C3'{ralign 12 :`numTarg' }`C4'`missCol'" - + *Output line before total N row noi di "{col 4}{c LT}`hli1'{c +}{hline 10}{c BT}{hline 12}`lastTdown'" - + *Count total N qui count local numTot = trim("`: display %16.0gc `r(N)''") - + *Output total N row noi di "`C1'`cen1'Total N}`C2'{centre `grpdumCentre':`numTot'}`C4'" - + *Output bottom line noi di "{col 4}{c BLC}`hli1'{c BT}{hline `grpdumCentre'}{c BRC}" } end - - - diff --git a/src/ado_files/ietoolkit.ado b/src/ado_files/ietoolkit.ado new file mode 100644 index 00000000..86f92cb0 --- /dev/null +++ b/src/ado_files/ietoolkit.ado @@ -0,0 +1,36 @@ +*! version 5.5 26APR2018 DIME Analytics lcardosodeandrad@worldbank.org + +capture program drop ietoolkit +program ietoolkit, rclass + + * UPDATE THESE LOCALS FOR EACH NEW VERSION PUBLISHED + local version "5.5" + local versionDate "26APR2018" + + + syntax [anything] + + /********************** + Error messages + **********************/ + + * Make sure that no arguments were passed + if "`anything'" != "" { + noi di as error "This command does not take any arguments, write only {it:ietoolkit}" + error 198 + } + + /********************** + Output + **********************/ + + * Prepare returned locals + return local versiondate "`versionDate'" + return scalar version = `version' + + * Display output + noi di "" + noi di _col(4) "This version of ietoolkit installed is version " _col(54)"`version'" + noi di _col(4) "This version of ietoolkit was released on " _col(54)"`versionDate'" + +end diff --git a/src/help_files/iebaltab.sthlp b/src/help_files/iebaltab.sthlp index 1713ae62..35568294 100644 --- a/src/help_files/iebaltab.sthlp +++ b/src/help_files/iebaltab.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iebaltab} {hline} @@ -8,7 +8,7 @@ help for {hi:iebaltab} {phang2}{cmdab:iebaltab} {hline 2} produces balance tables with multiple groups or treatment arms -{phang2}For a more descriptive discussion on the intended usage and work flow of this +{phang2}For a more descriptive discussion on the intended usage and work flow of this command please see the {browse "https://dimewiki.worldbank.org/wiki/Iebaltab":DIME Wiki}. {title:Syntax} @@ -17,9 +17,9 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Iebaltab":DI {cmdab:iebaltab} {it:balancevarlist} [{help if:if}] [{help in:in}] , {cmdab:grpv:ar(}{it:varname}{cmd:)} {c -(} {cmdab:save(}{it:{help filename}}{cmd:)} | {cmdab:savet:ex(}{it:{help filename}}{cmd:)} | {cmdab:browse} {c )-} [ -{it:{help iebaltab##columnoptions:column_options} {help iebaltab##labeloptions:label_options}} -{it:{help iebaltab##statsoptions:stats_options} {help iebaltab##ftestoptions:ftest_options}} -{it: {help iebaltab##display:display_options} {help iebaltab##exportoptions:export_options}} +{it:{help iebaltab##columnoptions:column_options} {help iebaltab##labeloptions:label_options}} +{it:{help iebaltab##statsoptions:stats_options} {help iebaltab##ftestoptions:ftest_options}} +{it: {help iebaltab##display:display_options} {help iebaltab##exportoptions:export_options}} ] {phang2}where {it:balancevarlist} is one or several variables (from here on called balance variables) for which the command @@ -70,15 +70,19 @@ will test for differences across the catagories in grpvar({it:varname}). {marker ftestoptions}{...} {pstd}{it: F-test:}{p_end} -{synopt :{cmdab:ft:est}}Include an F-test for joint significance{p_end} +{synopt :{cmdab:ft:est}}Include a row with the F-test for joint significance of all balance variables{p_end} {synopt :{cmdab:fm:issok}}Suppress the error caused by missing values in F-test{p_end} {synopt :{cmd:fnoobs}}Do not display number of observations from the F-test regressions{p_end} {marker displayoptions}{...} {pstd}{it: Table display options:}{p_end} {synopt :{cmdab:pt:test}}Show p-values instead of difference-in-mean between the groups in the column for t-tests{p_end} -{synopt :{cmdab:pf:test}}Show p-values instead of F-statistics in the row for F-tests{p_end} +{synopt :{cmdab:not:test}}Supresses the column for pairwise t-tests{p_end} +{synopt :{cmdab:normd:iff}}Adds a column with pairwise normalized difference{p_end} +{synopt :{cmdab:feqt:est}}Adds a column with F-test for joint orthogonality of each balance variable across all treatment arms{p_end} +{synopt :{cmdab:pf:test}}Show p-values instead of F-statistics for all F-tests{p_end} {synopt :{cmdab:pb:oth}}Identical to specifying both {cmd:pttest} and {cmd:pftest}{p_end} +{synopt :{cmdab:std:ev}}Displays standard deviations instead of standard errors{p_end} {synopt :{cmdab:star:levels(}{it:{help numlist:numlist}}{cmd:)}}Manually set the three significance levels used for significance stars{p_end} {synopt :{cmdab:starsno:add}}Do not add any stars to the table{p_end} {synopt :{cmdab:form:at(}{it:{help format:%fmt}}{cmd:)}}Apply Stata formats to the values outputted in the table{p_end} @@ -259,7 +263,7 @@ in the {it:nametitlestring}. The title can consist of several words. Everything of a string or a "@" will be included in the title.{p_end} {phang}{cmdab:onerow} displays the number of observations in additional row at the bottom of the table if each group has the same -number of observations for all variables in {it:balancevarlist}. This also applies to number of clusters. If not specified, the +number of observations for all variables in {it:balancevarlist}. This also applies to number of clusters. If not specified, the number of observations (and clusters) per variable per group is displayed on the same row in additional column besides the mean value.{p_end} {pstd}{it: Statistics and data modification:}{p_end} @@ -311,8 +315,8 @@ missing, but regular missing values will be replaced.{p_end} {pstd}{it: F-test:}{p_end} -{phang}{cmdab:ft:est} includes an F-test for joint significance across all balance variables. See the description section above for details on how the F-test estimation -regressions are specified. All options specified in the {it:Statistics and data modification} section above, also applies to the F-tests.{p_end} +{phang}{cmdab:ft:est} includes a row with an F-test for joint significance of all balance variables. See the description section above for details on how the F-test estimation +regressions are specified. All options specified in the {it:Statistics and data modification} section above also apply to the F-tests.{p_end} {phang}{cmdab:fm:issok} suppress the error caused by missing values in any of the balance variables in the F-test. Stata always drops observations with missing values in at least one the variables used in a regression. This command throws an error if any observation has missing @@ -325,10 +329,19 @@ are excluded from F-tests. Also see {cmd:balmiss()} and {cmd:balmissreg()} for o {phang}{cmdab:pt:test} makes this command show p-values instead of difference-in-mean between the groups in the column for t-tests.{p_end} -{phang}{cmdab:pf:test} makes this command show p-values instead of F-statistics in the row for F-tests.{p_end} +{phang}{cmdab:not:test} supresses the column for pairwise t-tests across treatment arms.{p_end} + +{phang}{cmdab:normd:iff} adds a column with pairwise normalized difference across treatment arms.{p_end} + +{phang}{cmdab:feqt:est} adds a column with an F-test for joint orthogonality of each variable across all treatment arms. Please note that this F-test is different from the one performed by option {cmd:ftest}, +which adds a row indicating if all variables are jointly significant when each pair of treatment arms is compared.{p_end} + +{phang}{cmdab:pf:test} makes this command show p-values instead of F-statistics in the row for F-tests created using option {cmd:ftest} and/or the columns for F-test created using option {cmd:feqtest}.{p_end} {phang}{cmdab:pb:oth} is identical to specifying both {cmd:pttest} and {cmd:pftest}.{p_end} +{phang}{cmdab:std:ev} displays standard deviations in parenthesis instead of standard errors.{p_end} + {phang}{cmdab:star:levels(}{it:{help numlist:numlist}}{cmd:)} manually sets the three significance levels used for significance stars. Use decimals in descending order. The default is (.1 .05 .01) where .1 corresponds to one star, .05 to two stars and .01 to three stars.{p_end} @@ -365,9 +378,9 @@ The note width is a multiple of text width. If not specified, default width is t {phang}{cmdab:texdoc:ument} creates a stand-alone TeX document that can be readily compiled, without the need to import it to a different file. As default, {cmd:savetex()} creates a fragmented TeX file consisting only of a tabular environment.{p_end} -{phang}{cmd:texvspace(}{it:string}{cmd:)} sets the size of the line space between two variable rows. {it:string} must consist of a numeric value -and one of the following units: "cm", "mm", "pt", "in", "ex" or "em". Note that the resulting line space displayed will be equal to the -specified value minus the height of one line of text. Default is "3ex". For more information on units, +{phang}{cmd:texvspace(}{it:string}{cmd:)} sets the size of the line space between two variable rows. {it:string} must consist of a numeric value +and one of the following units: "cm", "mm", "pt", "in", "ex" or "em". Note that the resulting line space displayed will be equal to the +specified value minus the height of one line of text. Default is "3ex". For more information on units, {browse "https://en.wikibooks.org/wiki/LaTeX/Lengths":check LaTeX lengths manual}. {p_end} {phang}{cmd:texcolwidth(}{it:string}{cmd:)} limits the width of table's first column so that a line break is added when a variable's name diff --git a/src/help_files/ieboilsave.sthlp b/src/help_files/ieboilsave.sthlp index f452ce3c..a96605a2 100644 --- a/src/help_files/ieboilsave.sthlp +++ b/src/help_files/ieboilsave.sthlp @@ -1,14 +1,17 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:ieboilsave} {hline} {title:Title} -{phang2}{cmdab:ieboilsave} {hline 2} Checks that a data sets follows DECIE +{phang2}{cmdab:ieboilsave} {hline 2} Checks that a data sets follows DECIE standards for a data set, and tag the data set with meta data. +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieboilsave":DIME Wiki}. + {title:Syntax} {phang2} @@ -21,7 +24,7 @@ help for {hi:ieboilsave} {synoptset 18}{...} {synopthdr:options} {synoptline} -{synopt :{cmdab:idvar:name(}{it:varname}{cmd:)}}specifies the ID-variable +{synopt :{cmdab:idvar:name(}{it:varname}{cmd:)}}specifies the ID-variable uniquely and fully identifying the data set{p_end} {synopt :{cmdab:missingok}}regular missing values are allowed{p_end} {synopt :{cmdab:diout:put}}display output summarizing results of tests made and meta data stored{p_end} @@ -31,99 +34,99 @@ help for {hi:ieboilsave} {title:Description} -{pstd}{cmdab:ieboilsave} standardizes the boilerplate (section of standardized - code) used at DECIE before saving a data set. This includes checking that - the ID variable is uniquely and fully identifying the data set. The test use the +{pstd}{cmdab:ieboilsave} standardizes the boilerplate (section of standardized + code) used at DECIE before saving a data set. This includes checking that + the ID variable is uniquely and fully identifying the data set. The test use the command {help isid}, but provides a more useful output. Only one variable is allowed to be the ID variable, see more in {help ieboilsave##IDnotes:Notes on ID variables} below. - -{pstd}The command also checks that no regular missing values are used. Missing values should - be replaced with the extended missing values .a, .b, ... , .z where each + +{pstd}The command also checks that no regular missing values are used. Missing values should + be replaced with the extended missing values .a, .b, ... , .z where each extended missing value represents a reason for why the value is missing. - + {pstd}The command also tags meta data to the data set with information useful to future users. The meta data is tagged to the data set using {cmdab:char}. - Char stores meta data to the data set using an associative array, - see {help char} for an explanation on how to access data stored with + Char stores meta data to the data set using an associative array, + see {help char} for an explanation on how to access data stored with char. The charnames (which is the equivalence to key or index in associative - arrays) is listed below. When applicable these values are taken from the + arrays) is listed below. When applicable these values are taken from the system parameters stored in {cmdab:c()} (see {help creturn}), and the - {cmdab:c()} parameters used by this command are listed below. When a data - set already have these charnames, the old values are overwritten with the + {cmdab:c()} parameters used by this command are listed below. When a data + set already have these charnames, the old values are overwritten with the new ones. - + {p2colset 5 24 26 2} {p2col : Charname}Meta data associated with the charname{p_end} {p2line} -{p2col :{cmdab:_dta[ie_idvar]}}stores the name of the ID variable that uniquely +{p2col :{cmdab:_dta[ie_idvar]}}stores the name of the ID variable that uniquely and fully identifies the data set.{p_end} -{p2col :{cmdab:_dta[ie_version]}}stores the Stata version used (not installed, - see {help ieboilstart: ieboilstart} for more details) to create the data +{p2col :{cmdab:_dta[ie_version]}}stores the Stata version used (not installed, + see {help ieboilstart: ieboilstart} for more details) to create the data set. Retrieved from {cmdab:c(version)}.{p_end} -{p2col :{cmdab:_dta[ie_date]}}stores the date the file was saved. Copying files, - sharing files over sync services or emails may change the time stamp shown +{p2col :{cmdab:_dta[ie_date]}}stores the date the file was saved. Copying files, + sharing files over sync services or emails may change the time stamp shown in folder. Retrieved from {cmdab:c(current_date)}.{p_end} -{p2col :{cmdab:_dta[ie_name]}}stores user name chosen when installing the - instance of Stata that was used when generating the file. Retrieved from +{p2col :{cmdab:_dta[ie_name]}}stores user name chosen when installing the + instance of Stata that was used when generating the file. Retrieved from {cmdab:c(username)}. Storing this meta data is optional.{p_end} -{p2col :{cmdab:_dta[ie_host]}}stores computer name chosen when installing the - instance of the operative system that was used when generating the file. +{p2col :{cmdab:_dta[ie_host]}}stores computer name chosen when installing the + instance of the operative system that was used when generating the file. Retrieved from {cmdab:c(hostname)}. Storing this meta data is optional.{p_end} -{p2col :{cmdab:_dta[ie_boilsave]}}stores a short summary of the result of +{p2col :{cmdab:_dta[ie_boilsave]}}stores a short summary of the result of running {cmdab:ieboilsave}. See option {cmdab:dioutput} below for more details.{p_end} {p2line} {title:Options} -{phang}{cmdab:idvar:name(}{it:varname}{cmd:)} specifies the ID variable that is - supposed to be fully and uniquely identifying the data set. This command +{phang}{cmdab:idvar:name(}{it:varname}{cmd:)} specifies the ID variable that is + supposed to be fully and uniquely identifying the data set. This command uses the command {help isid:isid} but provides a more helpful output in case - the ID variable has duplicates or missing values. Using multiple ID variables + the ID variable has duplicates or missing values. Using multiple ID variables to uniquely identify a data set is not best practice, and only one variable - is therefore allowed in {it:varname}. See {help ieboilsave##IDnotes:Notes on ID variables} below + is therefore allowed in {it:varname}. See {help ieboilsave##IDnotes:Notes on ID variables} below read a justification for why it is bad practice.{p_end} -{phang}{cmdab:diout:put} displays the same information stored in {cmdab:_dta[ie_boilsave]} in - the output window in Stata. This information includes the results of the ID - variable test, the missing values test and all the meta data stored +{phang}{cmdab:diout:put} displays the same information stored in {cmdab:_dta[ie_boilsave]} in + the output window in Stata. This information includes the results of the ID + variable test, the missing values test and all the meta data stored with {cmdab:char}. Unless this option is specified, {cmdab:ieboilsave} runs silently as long as it does not cause any errors.{p_end} - -{phang}{cmdab:missingok} allows the data set to have the standard missing values, + +{phang}{cmdab:missingok} allows the data set to have the standard missing values, see {help missing values}. Since changing regular missing values to extended - missing values is time consuming it might not always be a good use of a + missing values is time consuming it might not always be a good use of a Stata coder's time to do this for intermediary data sets. But since it should be done for all final data sets, the default is to not allow regular missing values.{p_end} -{phang}{cmdab:tagnoname} prevents the command to tag the data set with meta data - containing user name and computer (host) name. User name and computer name - can be very useful when facing issues related to replicability. For privacy +{phang}{cmdab:tagnoname} prevents the command to tag the data set with meta data + containing user name and computer (host) name. User name and computer name + can be very useful when facing issues related to replicability. For privacy reasons this can be disabled, but best practice is to keep it enabled at least for all data sets that are not meant for public dissemination.{p_end} -{phang}{cmdab:tagnohost} is similar to {cmdab:tagnoname} but it only prevents the +{phang}{cmdab:tagnohost} is similar to {cmdab:tagnoname} but it only prevents the command to tag the data set with meta data containing the computer name. - Specifying {cmdab:tagnohost} is redundant if {cmdab:tagnoname} is already + Specifying {cmdab:tagnohost} is redundant if {cmdab:tagnoname} is already specified.{p_end} - + {title:Examples} {pstd} {hi:Example 1.} {pmore}{inp:ieboilsave, idvarname(respondent_ID)} -{pmore}In the example above, the command checks that the variable {it:respondent_ID} -uniquely and fully identifies that data set, checks that there is no missing -values that are not among the extended missing values and saves meta data to the +{pmore}In the example above, the command checks that the variable {it:respondent_ID} +uniquely and fully identifies that data set, checks that there is no missing +values that are not among the extended missing values and saves meta data to the data set using char. {pstd} {hi:Example 2.} {pmore}{inp:ieboilsave, idvarname(respondent_ID) dioutput} -{pmore}The only difference between example 1 and this example is that in this - example the command outputs the information stored in _dta[ie_boilsave]. The +{pmore}The only difference between example 1 and this example is that in this + example the command outputs the information stored in _dta[ie_boilsave]. The output will look similar to this: {pmore}. {inp:ieboilsave, idvarname(respondent_ID) dioutput}{p_end} @@ -136,69 +139,67 @@ data set using char. {pmore}{inp:local localname : char _dta[ie_boilsave]}{p_end} {pmore}{inp:di "`localname'"}{p_end} -{pmore}Example 3 would generate exactly the same output as example 2 (formatted +{pmore}Example 3 would generate exactly the same output as example 2 (formatted slightly different) but this example shows how to display the information - in char _dta[ie_boilsave] at any point after running the command. For - example, if you receive new data set where _dta[ie_boilsave] is already - specified, then the two last lines of code is how you easiest access that + in char _dta[ie_boilsave] at any point after running the command. For + example, if you receive new data set where _dta[ie_boilsave] is already + specified, then the two last lines of code is how you easiest access that information in a readable way. {marker IDnotes}{...} {title:Notes on ID variables} {pstd}The concept of {it:Unique and Fully Identifying IDs} (in short unique IDs) and - {it:Unit of Observation} are two concepts that cannot be emphasized enough in - data management best practices. The unit of observation is what each row - represents in a data set, and the unique ID should be unique for each - instance of the unit of observation. This is mostly the same unit as the - respondent during data collection. - -{pstd}For example let's say the respondents during a data collection was farmers, + {it:Unit of Observation} are two concepts that cannot be emphasized enough in + data management best practices. The unit of observation is what each row + represents in a data set, and the unique ID should be unique for each + instance of the unit of observation. This is mostly the same unit as the + respondent during data collection. + +{pstd}For example let's say the respondents during a data collection was farmers, then the data set is downloaded from the servers with farmers as unit of observation. However, let's say that the analysis was carried out on plot level. The data - set prepared for the plot level regressions no longer has farmer as - unit of observation, it is plots and the data set should be identified using - plot IDs not farmer IDs. If farmer IDs are unique for each farmer, and plot - IDs are unique among the plots for each farmer, then technically, those two - IDs uniquely identifies the data set. While it is technically true, it is - not good practice. Impact Evaluations run over many years and there is - likely going to be several different people working with the data set, and - the slightest confusion in ID variables can lead to large analysis - mistakes. It can lead to data sets merged incorrectly, that can lead to - duplicates and it can lead to several observations included multiple times + set prepared for the plot level regressions no longer has farmer as + unit of observation, it is plots and the data set should be identified using + plot IDs not farmer IDs. If farmer IDs are unique for each farmer, and plot + IDs are unique among the plots for each farmer, then technically, those two + IDs uniquely identifies the data set. While it is technically true, it is + not good practice. Impact Evaluations run over many years and there is + likely going to be several different people working with the data set, and + the slightest confusion in ID variables can lead to large analysis + mistakes. It can lead to data sets merged incorrectly, that can lead to + duplicates and it can lead to several observations included multiple times in a regression therefore inflating N and underestimating the p-value, causing false positives. -{pstd}Best practice is to always create a single variable that uniquely and fully - identifies every unit in the unit of observation before saving a data - set. Common practice is to make this the first (leftmost) variable in a data set - using {help order:order}. It is also best practice to always start by making - sure you fully understand the unit of observation in data sets you get from - someone else. After you think you know the unit of observation, make sure - that you have a single variable that uniquely and fully identifies the +{pstd}Best practice is to always create a single variable that uniquely and fully + identifies every unit in the unit of observation before saving a data + set. Common practice is to make this the first (leftmost) variable in a data set + using {help order:order}. It is also best practice to always start by making + sure you fully understand the unit of observation in data sets you get from + someone else. After you think you know the unit of observation, make sure + that you have a single variable that uniquely and fully identifies the unit of observation in the data set. - -{pstd}These concepts are also central to modern database design. It is approached - somewhat differently as databases mostly consists of more than one data set, - but the principles are the same. There are a lot of reading material online + +{pstd}These concepts are also central to modern database design. It is approached + somewhat differently as databases mostly consists of more than one data set, + but the principles are the same. There are a lot of reading material online search for {it:primary keys} and {it:normalization} in database design resources. {title:Acknowledgements} {phang}I would like to acknowledge the help in testing and proofreading I received in relation to this command and help file from (in alphabetic order):{p_end} -{pmore}Michell Dong{break}Paula Gonzales - +{pmore}Michell Dong{break}Paula Gonzales + {title:Author} -{phang}Kristoffer Bjärkefur, The World Bank, DECIE +{phang}Kristoffer Bj�rkefur, The World Bank, DECIE -{phang}Please send bug-reports, suggestions and requests for clarifications +{phang}Please send bug-reports, suggestions and requests for clarifications writing "ietools ieboilsave" in the subject line to:{break} kbjarkefur@worldbank.org - -{phang}You can also see the code, make comments to the code, see the version + +{phang}You can also see the code, make comments to the code, see the version history of the code, and submit additions or edits to the code through the github repository of ietoolkit:{break} {browse "https://github.com/worldbank/ietoolkit"} - - diff --git a/src/help_files/ieboilstart.sthlp b/src/help_files/ieboilstart.sthlp index f207d661..fc93a421 100644 --- a/src/help_files/ieboilstart.sthlp +++ b/src/help_files/ieboilstart.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:ieboilstart} {hline} @@ -8,6 +8,8 @@ help for {hi:ieboilstart} {phang}{cmdab:ieboilstart} {hline 2} Harmonizes settings across team members in the same project to the longest extent technically possible. +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieboilstart":DIME Wiki}. {phang}{hi:DISCLAIMER} {hline 1} Due to technical reasons, it is impossible to guarantee that different types of Stata (version number, Small/IC/SE/MP @@ -173,8 +175,8 @@ details {help ieboilstart##comp:below}. unless there is something specific to a newer version that is required for any dofile. Only major and recent versions are allowed in order to reduce errors and complexity. The valid versions are 11.0, 11.1, 11.2, 12.0, 12.1, 13.0, 13.1, - 14.0, 14.1, 14.2, 15.0 and all versions without decimals. However, it is recommended - to use a .1 over a .0 version. .1 is free of charge if you already have the + 14.0, 14.1, 14.2, 15.0 and all versions without decimals. However, it is recommended + to use a .1 over a .0 version. .1 is free of charge if you already have the corresponding .0 and .1 includes bug fixes to the functions introduced in .0. All versions of Stata can be set to run any older version of Stata but not a newer. {p_end} diff --git a/src/help_files/iecompdup.sthlp b/src/help_files/iecompdup.sthlp index 499c6bca..6da7977a 100644 --- a/src/help_files/iecompdup.sthlp +++ b/src/help_files/iecompdup.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iecompdup} {hline} @@ -7,20 +7,19 @@ help for {hi:iecompdup} {title:Title} {phang2}{cmdab:iecompdup} {hline 2} Compares two duplicates and generate a list -of the variables where the duplicates are identical and a list of the variables where the +of the variables where the duplicates are identical and a list of the variables where the duplicates differ -{phang2}For a more descriptive discussion on the intended usage and work flow of this +{phang2}For a more descriptive discussion on the intended usage and work flow of this command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates":DIME Wiki}. Note that this command share wiki article with {help ieduplicates}. - {title:Syntax} {phang2} {cmdab:iecompdup} {it:idvariable} -, {cmdab:id(}{it:string}{cmd:)} [{cmdab:didi:fference} {cmdab:keepdiff:erence} +, {cmdab:id(}{it:string}{cmd:)} [{cmdab:didi:fference} {cmdab:keepdiff:erence} {cmdab:keepoth:er(}{it:varlist}{cmd:)} {cmdab:more2ok}] {marker opts}{...} @@ -36,48 +35,48 @@ Note that this command share wiki article with {help ieduplicates}. {title:Description} -{pstd}{cmdab:iecompdup} compare all variables for observations that are duplicates in -ID variable {it:varname} and the duplicated value is {cmdab:id(}{it:string}{cmd:)}. Duplicates can -be identified and corrected with its sister command {help ieduplicates}. {cmdab:iecompdup} -is intended to assist in the process of investigating why two observations are duplicates in -the ID variable, and what correction is appropriate. +{pstd}{cmdab:iecompdup} compare all variables for observations that are duplicates in +ID variable {it:varname} and the duplicated value is {cmdab:id(}{it:string}{cmd:)}. Duplicates can +be identified and corrected with its sister command {help ieduplicates}. {cmdab:iecompdup} +is intended to assist in the process of investigating why two observations are duplicates in +the ID variable, and what correction is appropriate. -{pstd}{cmdab:iecompdup} returns two locals {cmd:r(matchvars)} and {cmd:r(diffvars)}. {cmd:r(matchvars)} returns -a list of the names of all variables for which the two observations +{pstd}{cmdab:iecompdup} returns two locals {cmd:r(matchvars)} and {cmd:r(diffvars)}. {cmd:r(matchvars)} returns +a list of the names of all variables for which the two observations have identical values, unless both values are missing values or the empty -string. {cmd:r(diffvars)} returns a list of the names -of all variables where the two observations are not identical. - -{pstd}For example, if a duplicate is found in a dataset downloaded from a data -collection server (ODK or similar) and the duplicates were due to redundant submissions -of the same data, then {cmd:r(diffvars)} would only include the submission time -variable and any unique key used by the server. In such case, one observation can be -dropped without risking losing information, since it is an identical submission of +string. {cmd:r(diffvars)} returns a list of the names +of all variables where the two observations are not identical. + +{pstd}For example, if a duplicate is found in a dataset downloaded from a data +collection server (ODK or similar) and the duplicates were due to redundant submissions +of the same data, then {cmd:r(diffvars)} would only include the submission time +variable and any unique key used by the server. In such case, one observation can be +dropped without risking losing information, since it is an identical submission of the exact same observation. (See Examples section below for a more detailed suggestion on how to use the command. ) {title:Options} -{phang}{cmdab:id(}{it:string}{cmd:)} is used to specify the ID value that the +{phang}{cmdab:id(}{it:string}{cmd:)} is used to specify the ID value that the duplicates share. Both text strings and numeric values are allowed. -{phang}{cmdab:didi:fference} is used to display the list of all variables for which -the ID variable duplicates differ. The default is to provide this list in a local, and only +{phang}{cmdab:didi:fference} is used to display the list of all variables for which +the ID variable duplicates differ. The default is to provide this list in a local, and only display the number of variables that differ. -{phang}{cmdab:keepdiff:erence} is used to return the data set with only the ID -variable and variables that differs between the duplicates. This means that the -command would drop all variables where the duplicates are identical or both +{phang}{cmdab:keepdiff:erence} is used to return the data set with only the ID +variable and variables that differs between the duplicates. This means that the +command would drop all variables where the duplicates are identical or both missing. It also drops all observations but the two duplicates compared. -{phang}{cmdab:keepoth:er(}{it:varlist}{cmd:)} is used to keep more variables than the variables +{phang}{cmdab:keepoth:er(}{it:varlist}{cmd:)} is used to keep more variables than the variables that differs between the duplicates when {cmdab:keepdifference} is specified. The command can keep, -for example, a variable with information about who collected these data. This +for example, a variable with information about who collected these data. This option returns an error if it is specified not in conjunction with {cmdab:keepdifference}. -{phang}{cmdab:more2ok} allows running the command on groups of more than two observations, -although only the first two duplicates (in the order the data is sorted) are compared. In a group of three duplicates, -run the command three times on each combination of the three duplicates. A future update that +{phang}{cmdab:more2ok} allows running the command on groups of more than two observations, +although only the first two duplicates (in the order the data is sorted) are compared. In a group of three duplicates, +run the command three times on each combination of the three duplicates. A future update that includes the possibility to compare more than one case is under consideration{p_end} {title:Stored results} @@ -93,7 +92,7 @@ includes the possibility to compare more than one case is under consideration{p_ {p2col 5 15 19 2: Scalars}{p_end} {synopt:{cmd:r(nummatch)}}The number of variables in {cmd:r(matchvars)}{p_end} {synopt:{cmd:r(numdiff)}}The number of variables in {cmd:r(matchvars)}{p_end} -{synopt:{cmd:r(numnomiss)}}The number of variables for which at least one of +{synopt:{cmd:r(numnomiss)}}The number of variables for which at least one of the duplicates has a non-missing value. By definition, {cmd:r(numnomiss)} equals the sum of {cmd:r(nummatch)} and {cmd:r(numdiff)}{p_end} {p2colreset}{...} @@ -107,57 +106,57 @@ A series of examples on how to specify command, and how to evaluate output: {phang2}{inp:iecompdup HH_ID , id(55424) didifference}{p_end} -{pmore}In the example above, let's say that there are two observations in the data set with the value 55424 -for variable HH_ID. HH_ID holds an ID that was uniquely assigned to each household. Before continuing the analysis, one must +{pmore}In the example above, let's say that there are two observations in the data set with the value 55424 +for variable HH_ID. HH_ID holds an ID that was uniquely assigned to each household. Before continuing the analysis, one must investigate why two observations were assigned the same ID. iecompdup is a great place to start. -{pmore}Specifying the command as above compares the two observations that both -have value 55424 in the ID variable. The output displayed will -only be number of non-missing variables for which the two observations have identical -values, and the number of non-missing variables for which the two observations -have different values. The list of those two sets of variables are stored as locals. +{pmore}Specifying the command as above compares the two observations that both +have value 55424 in the ID variable. The output displayed will +only be number of non-missing variables for which the two observations have identical +values, and the number of non-missing variables for which the two observations +have different values. The list of those two sets of variables are stored as locals. The data set is returned exactly as it was. -{pmore}The locals stored in {cmd:r(diffvars)} and {cmd:r(nummatch)} can be used -to provide information on why the two observations are duplicates. A suggested +{pmore}The locals stored in {cmd:r(diffvars)} and {cmd:r(nummatch)} can be used +to provide information on why the two observations are duplicates. A suggested method to evaluate these two lists are presented in Example 2 below. {pstd}{hi:Example 2.} {phang2}{inp:iecompdup HH_ID , id(55424) didifference}{p_end} -{pmore}This example makes the same assumptions as example 1 that there are two -observations in the data set with the value 55424 for variable HH_ID. The only -difference is that the option didifference is specified. The output is the same -as example 1 but with the addition that the list stored in {cmd:r(diffvars)} is +{pmore}This example makes the same assumptions as example 1 that there are two +observations in the data set with the value 55424 for variable HH_ID. The only +difference is that the option didifference is specified. The output is the same +as example 1 but with the addition that the list stored in {cmd:r(diffvars)} is displayed in the output window. The data set is returned exactly as it was. -{pmore}The method to evaluate the output presented in this example focus on the -variables for which the duplicates are different. Therefore, start by looking at +{pmore}The method to evaluate the output presented in this example focus on the +variables for which the duplicates are different. Therefore, start by looking at the list of variables displayed by {inp:didifference}. Do the variables with different values across the duplicates contain observation data like "number of household members" or "annual income", or are they -submission information such as "submission ID", "server key" or "submission time"? -The answer to this question could suggest one of the three solutions below. Note -that this method should only be used as a guiding rule of thumb, all suggested +submission information such as "submission ID", "server key" or "submission time"? +The answer to this question could suggest one of the three solutions below. Note +that this method should only be used as a guiding rule of thumb, all suggested solutions should be evaluated qualitatively as well. -{pmore}{ul:Solution 1. All variables contain submission information data.} The -far most common mistake leading to duplicates in household surveys is that the same -observation data is submitted to the server twice. If that is the case, then only submission -information variables would be outputted by the command, not any observation data. +{pmore}{ul:Solution 1. All variables contain submission information data.} The +far most common mistake leading to duplicates in household surveys is that the same +observation data is submitted to the server twice. If that is the case, then only submission +information variables would be outputted by the command, not any observation data. If this is the case, then you can safely delete either of the observations. - -{pmore}{ul:Solution 2. Most variables contain submission information data, but a few contain observation data.} If a -few observation data variables are displayed together with submission information -variables then it is likely that it is the same observation but some variables -were edited after the first submission. Follow up with your field team to see + +{pmore}{ul:Solution 2. Most variables contain submission information data, but a few contain observation data.} If a +few observation data variables are displayed together with submission information +variables then it is likely that it is the same observation but some variables +were edited after the first submission. Follow up with your field team to see why some variables were changed. See the tips in example 3 below before following up. -{pmore}{ul:Solution 3. Many variables contain observation data.} If many -observation data variables are displayed together with submission data variables, +{pmore}{ul:Solution 3. Many variables contain observation data.} If many +observation data variables are displayed together with submission data variables, then it is likely that two different observations have accidently been given the -same ID. That is especially likely if location variables or name variables are -different, or if the values for enumerator and/or supervisor are different. See the +same ID. That is especially likely if location variables or name variables are +different, or if the values for enumerator and/or supervisor are different. See the tips in example 3 below before following up. {pmore}The cases listed above will solve the vast majority of duplicates encountered in @@ -167,17 +166,17 @@ household surveys. The appropriate correction can afterwards be applied using th {phang2}{inp:iecompdup HH_ID , id(55424) didifference keepdifference keepother(village enumerator supervisor)}{p_end} -{pmore}This example again makes the same assumptions as example 1 and example 2 that there are two -observations in the data set with the value 55424 for variable HH_ID. This -time {inp:keepdifference} and {inp:keepother()} are specified. Those two options -can be used to provide additional information to the field team when following up -based on solution 2 and solution 3 in example 2. {inp:keepdifference} drops all -variables apart from the ID variable and the variables in {cmd:r(diffvars)}. Any -variables in {inp:keepother()} are also kept. All observations apart from the -duplicates with the ID specified in {inp:id()} are also dropped. This data can be -exported to excel and sent to a field team that can see how the observations differ. -In this example the field team can also see in which village the data was collected, -as well as the name of the enumerator and the supervisor. Any other information +{pmore}This example again makes the same assumptions as example 1 and example 2 that there are two +observations in the data set with the value 55424 for variable HH_ID. This +time {inp:keepdifference} and {inp:keepother()} are specified. Those two options +can be used to provide additional information to the field team when following up +based on solution 2 and solution 3 in example 2. {inp:keepdifference} drops all +variables apart from the ID variable and the variables in {cmd:r(diffvars)}. Any +variables in {inp:keepother()} are also kept. All observations apart from the +duplicates with the ID specified in {inp:id()} are also dropped. This data can be +exported to excel and sent to a field team that can see how the observations differ. +In this example the field team can also see in which village the data was collected, +as well as the name of the enumerator and the supervisor. Any other information helpful to the field team can be entered in {inp:keepother()}. {title:Acknowledgements} @@ -187,15 +186,13 @@ helpful to the field team can be entered in {inp:keepother()}. {title:Author} -{phang}Kristoffer Bjärkefur, The World Bank, DECIE +{phang}Kristoffer Bj�rkefur, The World Bank, DECIE -{phang}Please send bug-reports, suggestions and requests for clarifications +{phang}Please send bug-reports, suggestions and requests for clarifications writing "ietools iecompdup" in the subject line to:{break} kbjarkefur@worldbank.org - -{phang}You can also see the code, make comments to the code, see the version + +{phang}You can also see the code, make comments to the code, see the version history of the code, and submit additions or edits to the code through the github repository of ietoolkit:{break} {browse "https://github.com/worldbank/ietoolkit"} - - diff --git a/src/help_files/iedropone.sthlp b/src/help_files/iedropone.sthlp index d0e04b42..02806086 100644 --- a/src/help_files/iedropone.sthlp +++ b/src/help_files/iedropone.sthlp @@ -1,22 +1,22 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iedropone} {hline} {title:Title} -{phang2}{cmdab:iedropone} {hline 2} Same function as {help drop} but prevents - that additional observations are unintentionally dropped. - -{phang2}For a more descriptive discussion on the intended usage and work flow of this -command please see the {browse "https://dimewiki.worldbank.org/wiki/Iedropone":DIME Wiki}. +{phang2}{cmdab:iedropone} {hline 2} Same function as {help drop} but prevents + that additional observations are unintentionally dropped. + +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Iedropone":DIME Wiki}. {title:Syntax} {phang2} -{cmdab:iedropone} [if] , {cmdab:n:umobs(}{it:integer}{cmd:)} - {cmdab:mvar(}{it:varname}{cmd:)} {cmdab:mval(}{it:list of values}{cmd:)} +{cmdab:iedropone} [if] , {cmdab:n:umobs(}{it:integer}{cmd:)} + {cmdab:mvar(}{it:varname}{cmd:)} {cmdab:mval(}{it:list of values}{cmd:)} {cmd:zerook}] {marker opts}{...} @@ -33,102 +33,102 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Iedropone":D {title:Description} {pstd}This commands might be easier to understand by following - the {help iedropone##examples:examples below} before reading the + the {help iedropone##examples:examples below} before reading the description or the explanations of the options. -{pstd}{cmdab:iedropone} has the identical purpose as {help drop} when dropping - observations. However, {cmdab:iedropone} safeguards that no additional - observations are unintentionally dropped, or that changes are made to the - data so that the observations that are supposed to be dropped are no longer dropped. - -{pstd}{cmdab:iedropone} checks that no more or fewer observations than intended are +{pstd}{cmdab:iedropone} has the identical purpose as {help drop} when dropping + observations. However, {cmdab:iedropone} safeguards that no additional + observations are unintentionally dropped, or that changes are made to the + data so that the observations that are supposed to be dropped are no longer dropped. + +{pstd}{cmdab:iedropone} checks that no more or fewer observations than intended are dropped. For example, in the case that one observation has been identified to - be dropped, then we want to make sure that when re-running the do-file - no other observations are dropped even if more observations are added to that data - set or changed in any other way. + be dropped, then we want to make sure that when re-running the do-file + no other observations are dropped even if more observations are added to that data + set or changed in any other way. {pstd}While the default is 1, {cmdab:iedropone} allows the user to set any another number - of observation that should be dropped. If the number of observations that fit the - drop condition is different, then the command will throw an error. + of observation that should be dropped. If the number of observations that fit the + drop condition is different, then the command will throw an error. {marker optslong} {title:Options} -{phang}{cmdab:n:umobs(}{it:integer}{cmd:)} this allows the user to set the - number of observation that should be dropped. The default is 1 but any +{phang}{cmdab:n:umobs(}{it:integer}{cmd:)} this allows the user to set the + number of observation that should be dropped. The default is 1 but any positive integer can be used. The command throws an error if any other number of observations match the drop condition. -{phang}{cmd:zerook} allows that no observations are dropped. The default is that +{phang}{cmd:zerook} allows that no observations are dropped. The default is that an error is thrown if no observations are dropped. - -{phang}{cmdab:mvar(}{it:varname}{cmd:)} and {cmdab:mval(}{it:list of values}{cmd:)} allows - that multiple values in one variable are dropped. These two options must be used together. - If the variable in {cmd:mvar()} is a string variable and some of the values in {cmd:mval()} - includes spaces, then the list of values in {cmd:mval()} must be listed exactly as in example 4 below. The - command loops over the values in {cmd:mval()} and drops the observations that + +{phang}{cmdab:mvar(}{it:varname}{cmd:)} and {cmdab:mval(}{it:list of values}{cmd:)} allows + that multiple values in one variable are dropped. These two options must be used together. + If the variable in {cmd:mvar()} is a string variable and some of the values in {cmd:mval()} + includes spaces, then the list of values in {cmd:mval()} must be listed exactly as in example 4 below. The + command loops over the values in {cmd:mval()} and drops the observations that satisfy the {it:if} condition and each of the value in {cmd:mval()}. For example:{p_end} - + {pmore}{inp:iedropone if village == 100 , mvar(household_id) mval(21 22 23)} - + {pmore}is identical to: - + {pmore}{inp:iedropone if village == 100 & household_id == 21}{break}{inp:iedropone if village == 100 & household_id == 22}{break}{inp:iedropone if village == 100 & household_id == 23} -{pmore}The default is that exactly one observation should be dropped for each - value in {cmd:mval()} unless {cmd:numobs()} or {cmd:zerook} is used. If those +{pmore}The default is that exactly one observation should be dropped for each + value in {cmd:mval()} unless {cmd:numobs()} or {cmd:zerook} is used. If those options are used then, then they apply to all values in {cmd:mval()} separately. {phang}{cmdab:mval(}{it:list of values}{cmd:)}, see {cmdab:mvar(}{it:varname}{cmd:)} above. -{marker examples} +{marker examples} {title:Examples} {pstd} {hi:Example 1.} {pmore}{inp:iedropone if household_id == 712047} -{pmore}Let's say that we have identified the household with the ID 712047 to be - incorrect and it should be dropped. Identical to {inp:drop if household_id == 712047} - but it will test that exactly one observation is dropped each time the do-file runs. - This guarantees that we will get an error message that no observation is dropped - if someone makes a change to the ID. Otherwise we would unknowingly keep this +{pmore}Let's say that we have identified the household with the ID 712047 to be + incorrect and it should be dropped. Identical to {inp:drop if household_id == 712047} + but it will test that exactly one observation is dropped each time the do-file runs. + This guarantees that we will get an error message that no observation is dropped + if someone makes a change to the ID. Otherwise we would unknowingly keep this incorrect observation in our data set. - -{pmore}Similarly, if a new observation is added that is the correct household with ID 712047, - then both observation would be dropped without warning if we would have - used {inp:drop if household_id == 712047}. {cmd:iedropone} will make sure that + +{pmore}Similarly, if a new observation is added that is the correct household with ID 712047, + then both observation would be dropped without warning if we would have + used {inp:drop if household_id == 712047}. {cmd:iedropone} will make sure that our drop condition are applied as intended even if the data set is changed. - + {pstd} {hi:Example 2.} {pmore}{inp:iedropone if household_id == 712047 & household_head == "Bob Smith"} -{pmore}Let's say we have added a new household with the ID 712047. In order to - drop only one of those observations we must expand the if condition to +{pmore}Let's say we have added a new household with the ID 712047. In order to + drop only one of those observations we must expand the if condition to indicate which one of them we want to drop. - + {pstd} {hi:Example 3.} {pmore}{inp:iedropone if household_id == 712047, numobs(2)} -{pmore}Let's say we added a new household with the ID 712047 but we want to drop - exactly both of them, then we can use the option {cmd:numobs()} like above. - The command will now throw an error if not exactly two observations have the +{pmore}Let's say we added a new household with the ID 712047 but we want to drop + exactly both of them, then we can use the option {cmd:numobs()} like above. + The command will now throw an error if not exactly two observations have the household ID 712047. - + {pstd} {hi:Example 4.} {pmore}{inp:iedropone if village == 100, mvar(household_head) mvar(`" "Bob Smith" "Ann Davitt" "Blessing Johnson" "')} -{pmore}If the values in {cmd:mvar()} are strings with empty spaces then then each - value have to be enclosed in double qoutes and the full list needs to start +{pmore}If the values in {cmd:mvar()} are strings with empty spaces then then each + value have to be enclosed in double qoutes and the full list needs to start with {inp:`"} and end with {inp:"'}. {title:Acknowledgements} -{phang}I would like to acknowledge the help in testing and proofreading I +{phang}I would like to acknowledge the help in testing and proofreading I received in relation to this command and help file from (in alphabetic order):{p_end} {pmore}Paula Gonzalez-Martinez{break}Seungmin Lee{break}Mrijan Rimal{break} diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index 16ddfe8a..7863e012 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -1,15 +1,15 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:ieduplicates} {hline} {title:Title} -{phang2}{cmdab:ieduplicates} {hline 2} Identify duplicates in ID variable and export them in +{phang2}{cmdab:ieduplicates} {hline 2} Identify duplicates in ID variable and export them in an Excel file that also can be used to correct the duplicates -{phang2}For a more descriptive discussion on the intended usage and work flow of this +{phang2}For a more descriptive discussion on the intended usage and work flow of this command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates":DIME Wiki}. {title:Syntax} @@ -17,8 +17,8 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {phang2} {cmdab:ieduplicates} {it:ID_varname} -, {cmdab:fol:der(}{it:string}{cmd:)} {cmdab:unique:vars(}{it:varlist}{cmd:)} -[{cmdab:keep:vars(}{it:varlist}{cmd:)} {cmdab:tostringok} {cmdab:droprest} +, {cmdab:fol:der(}{it:string}{cmd:)} {cmdab:unique:vars(}{it:varlist}{cmd:)} +[{cmdab:keep:vars(}{it:varlist}{cmd:)} {cmdab:tostringok} {cmdab:droprest} {cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)} {cmdab:min:precision(}{it:numlist}{cmd:)]} {phang2}where {it:ID_varname} is the variable that will be controlled for duplicates @@ -40,174 +40,174 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {title:Description} {dlgtab:In brief:} -{pstd}{cmd:ieduplicates} outputs a report with any duplicates in {it:ID_varname} to an Excel file -and return the data set without those duplicates. Each time {cmd:ieduplicates} executes, it first +{pstd}{cmd:ieduplicates} outputs a report with any duplicates in {it:ID_varname} to an Excel file +and return the data set without those duplicates. Each time {cmd:ieduplicates} executes, it first looks for an already created version of the Excel report, and applies any corrections already listed in it -before generating a new report. Note that there is no need import the corrections manually. This command -reads the corrections directly from the Excel file as long as the is saved at the same folder location +before generating a new report. Note that there is no need import the corrections manually. This command +reads the corrections directly from the Excel file as long as the is saved at the same folder location with the same file name. {dlgtab:In more detail:} -{pstd}{cmd:ieduplicates} takes duplicates observations in {it:ID_varname} and export -them to an Excel report in directory {cmdab:fol:der(}{it:string}{cmd:)}. {it:ID_varname} -is per definition not unique in this Excel Report and {cmdab:unique:vars(}{it:varlist}{cmd:)} +{pstd}{cmd:ieduplicates} takes duplicates observations in {it:ID_varname} and export +them to an Excel report in directory {cmdab:fol:der(}{it:string}{cmd:)}. {it:ID_varname} +is per definition not unique in this Excel Report and {cmdab:unique:vars(}{it:varlist}{cmd:)} needs to be specified in order to have a unique reference for each row in the Excel report. The -{it:varlist} in {cmdab:unique:vars(}{it:varlist}{cmd:)} must uniquely and fully identify all +{it:varlist} in {cmdab:unique:vars(}{it:varlist}{cmd:)} must uniquely and fully identify all observations in the Excel report, either on its own or together with {it:ID_varname}. {cmd:ieduplicates} then returns the data set without these duplicates. -{pstd}The Excel report includes three columns called {it:correct}, {it:drop} and {it:newID}. -Each of them represents one way to correct the duplicates. If {it:correct} is indicated with -a "Yes" then that observation is kept unchanged, if {it:drop} is indicated with a "yes" then -that observation is deleted and if {it:newID} is indicated then that observation is assigned -a new ID using the value in column {it:newID}. After corrections are entered, the report should +{pstd}The Excel report includes three columns called {it:correct}, {it:drop} and {it:newID}. +Each of them represents one way to correct the duplicates. If {it:correct} is indicated with +a "Yes" then that observation is kept unchanged, if {it:drop} is indicated with a "yes" then +that observation is deleted and if {it:newID} is indicated then that observation is assigned +a new ID using the value in column {it:newID}. After corrections are entered, the report should be saved in the same location {cmdab:fol:der(}{it:string}{cmd:)} without any changes to its name. -{pstd}Before outputting a new report {cmd:ieduplicates} always checks if there already are an -Excel report with corrections and applies those corrections before generating a new report. It is -at this stage that {cmdab:unique:vars(}{it:varlist}{cmd:)} is required as it otherwise is impossible +{pstd}Before outputting a new report {cmd:ieduplicates} always checks if there already are an +Excel report with corrections and applies those corrections before generating a new report. It is +at this stage that {cmdab:unique:vars(}{it:varlist}{cmd:)} is required as it otherwise is impossible to know which duplicate within a group of duplicates that should be corrected in which way. -{pstd}{cmd:ieduplicates} keeps only one observation if a group of duplicates are duplicates in -all variables across the data set without any action is needed in the Excel report. These cases +{pstd}{cmd:ieduplicates} keeps only one observation if a group of duplicates are duplicates in +all variables across the data set without any action is needed in the Excel report. These cases are not even exported to the Excel report. -{pstd}{cmdab:keep:vars(}{it:varlist}{cmd:)} allows the user to include more variables in the Excel report -that can help identifying each duplicate is supposed to be corrected. The report also includes two -columns {it:initials} and {it:notes}. Using these columns is not required but it is recommended to use {it:initials} -to keep track of who decided how to correct that duplicate and to use {it:notes} to document why +{pstd}{cmdab:keep:vars(}{it:varlist}{cmd:)} allows the user to include more variables in the Excel report +that can help identifying each duplicate is supposed to be corrected. The report also includes two +columns {it:initials} and {it:notes}. Using these columns is not required but it is recommended to use {it:initials} +to keep track of who decided how to correct that duplicate and to use {it:notes} to document why the correction was chosen. If {it:initials} and {it:notes} are used, then the Excel report also functions -as an excellent documentation of the correction made. +as an excellent documentation of the correction made. {space 4}{hline} {title:Options} -{phang}{cmdab:fol:der(}{it:string}{cmd:)} specifies the folder where previous Excel -files will be looked for, and where the updated Excel Report will be exported. Note that -this folder needs to have a subfolder called {it:Daily} where the duplicate report +{phang}{cmdab:fol:der(}{it:string}{cmd:)} specifies the folder where previous Excel +files will be looked for, and where the updated Excel Report will be exported. Note that +this folder needs to have a subfolder called {it:Daily} where the duplicate report file is backed up daily. -{phang}{cmdab:unique:vars(}{it:varlist}{cmd:)} list variables that by themselves or together -with {it:ID_varname} uniquely identifies all observations. This varlist is required when the corrections are -imported back into Stata and merged with the original data set. Time variables +{phang}{cmdab:unique:vars(}{it:varlist}{cmd:)} list variables that by themselves or together +with {it:ID_varname} uniquely identifies all observations. This varlist is required when the corrections are +imported back into Stata and merged with the original data set. Time variables should always be avoided if possible in {cmdab:uniquevars()}. See option {cmdab:min:precision()} for -an explanation of why time variables should be avoided. Data that has been downloaded from -a server usually has a variable called "KEY" or similar. Such a variable would be optimal -for {cmdab:unique:vars(}{it:varlist}{cmd:)}. +an explanation of why time variables should be avoided. Data that has been downloaded from +a server usually has a variable called "KEY" or similar. Such a variable would be optimal +for {cmdab:unique:vars(}{it:varlist}{cmd:)}. -{phang}{cmdab:keep:vars(}{it:varlist}{cmd:)} list variables to be included in the exported -Excel report. These variables can help team members identifying which observation to keep, -drop and assign a new ID to. For data integrity reasons, be careful not to export and share +{phang}{cmdab:keep:vars(}{it:varlist}{cmd:)} list variables to be included in the exported +Excel report. These variables can help team members identifying which observation to keep, +drop and assign a new ID to. For data integrity reasons, be careful not to export and share Excel files including both identifying variables and names together with {it:ID_varname}. -{phang}{cmdab:tostringok} allows {it:ID_varname} to be turned into a string variable in case +{phang}{cmdab:tostringok} allows {it:ID_varname} to be turned into a string variable in case {it:ID_varname} is numeric but a value listed in {it:newID} is non-numeric. Otherwise an error is generated. {phang}{cmdab:droprest} disables the requirement that duplicates must be explicitly deleted. -The default is that if one of the duplicates in a group of duplicates has a -correction, then that correction is only valid if all other duplicates in that +The default is that if one of the duplicates in a group of duplicates has a +correction, then that correction is only valid if all other duplicates in that group have a correction as well. For example, if there are four observation with -the same value for {it:ID_varname} and one is correct, one needs a new ID and -two are incorrect and should be deleted. Then the first one is indicated to be -kept in the {it:correct} column, the second one is given a new ID in {it:newID} -and the other two observations must be indicated for deletion in {it:drop} -unless {cmdab:droprest}. The first two corrections are not considered valid and -will cause an error in case if {cmdab:droprest} is not specified and the other -two observations are not explicitly indicated to be dropped. It is recommended -to not use {cmdab:droprest} and to manually indicate all deletions to avoid +the same value for {it:ID_varname} and one is correct, one needs a new ID and +two are incorrect and should be deleted. Then the first one is indicated to be +kept in the {it:correct} column, the second one is given a new ID in {it:newID} +and the other two observations must be indicated for deletion in {it:drop} +unless {cmdab:droprest}. The first two corrections are not considered valid and +will cause an error in case if {cmdab:droprest} is not specified and the other +two observations are not explicitly indicated to be dropped. It is recommended +to not use {cmdab:droprest} and to manually indicate all deletions to avoid mistakes, but this option exist for cases when that might be very inconvenient. -{phang}{cmdab:suf:fix(}{it:string}{cmd:)} allows the user to set a unique file name suffix to -the Excel report. This is meant to be used when a project has multiple data sets that are -checked for duplicates seperately. The command will not work as intended (most liekly even -crash) if the duplicate report for one data set is used when checking for duplicates in -another data set. To prevent this, the Excel report must either be exported to seperate folders or -be assigned different file names using this option. If the string in suffix() is, for example, "AAA", -then the report exported will be "iedupreport_AAA.xlsx". Any characters allowed in file names in -Excel and in Stata are allowed in suffix(). Note, that if suffix() is added after the first report is outputted, -then the name of the outputted report must be updated manually. The command will otherwise not -apply any changes already entered in the original report. - -{phang}{cmdab:nodaily} disables the generation of daily back-up copies of the -Excel report. The default is that the command saves dated copies of the Excel -report in a sub-folder called Daily in the folder specified in {cmdab:folder()}. If -the folder Daily does not exist, then it is creaetd unless the +{phang}{cmdab:suf:fix(}{it:string}{cmd:)} allows the user to set a unique file name suffix to +the Excel report. This is meant to be used when a project has multiple data sets that are +checked for duplicates seperately. The command will not work as intended (most liekly even +crash) if the duplicate report for one data set is used when checking for duplicates in +another data set. To prevent this, the Excel report must either be exported to seperate folders or +be assigned different file names using this option. If the string in suffix() is, for example, "AAA", +then the report exported will be "iedupreport_AAA.xlsx". Any characters allowed in file names in +Excel and in Stata are allowed in suffix(). Note, that if suffix() is added after the first report is outputted, +then the name of the outputted report must be updated manually. The command will otherwise not +apply any changes already entered in the original report. + +{phang}{cmdab:nodaily} disables the generation of daily back-up copies of the +Excel report. The default is that the command saves dated copies of the Excel +report in a sub-folder called Daily in the folder specified in {cmdab:folder()}. If +the folder Daily does not exist, then it is creaetd unless the option {cmdab:nodaily} is used. -{phang}{cmdab:min:precision(}{it:numlist}{cmd:)} is rarely used but can be used -to manually set the precision (in minutes) when exporting and importing a time -variable to and from the Excel report. Time variables should always be avoided +{phang}{cmdab:min:precision(}{it:numlist}{cmd:)} is rarely used but can be used +to manually set the precision (in minutes) when exporting and importing a time +variable to and from the Excel report. Time variables should always be avoided if possible in {cmdab:uniquevars()}, but sometimes they are the only option. While -Stata and Excel both keep a very high precision in time variables, they do so -slightly differently, and this can generate a difference of a few seconds after -a time variable was exported to Excel and then imported back to Stata. If the -time variable is used in {cmdab:uniquevars()}, then the time variable may no -longer be identical to its original value after it is imported back to Stata, and it -may therefore no longer be possible to use it to merge the Excel data to the correct Stata +Stata and Excel both keep a very high precision in time variables, they do so +slightly differently, and this can generate a difference of a few seconds after +a time variable was exported to Excel and then imported back to Stata. If the +time variable is used in {cmdab:uniquevars()}, then the time variable may no +longer be identical to its original value after it is imported back to Stata, and it +may therefore no longer be possible to use it to merge the Excel data to the correct Stata observation. If this happens, then {cmdab:min:precision()} can be used to set the -precision manually. This should only be considered a solution of last resort, +precision manually. This should only be considered a solution of last resort, as lowering the precision increases the risk the time variable no longer uniquely identifies each observation. The typical user will never use this option. {title:The Excel Report} -{pstd}A report of duplicates will be created in {cmdab:fol:der(}{it:string}{cmd:)} -if any duplicates in {it:ID_varname} were found. The folder listed in -{cmdab:fol:der(}{it:string}{cmd:)} must have a subfolder called {it:Daily} -where daily back-ups of the report are saved. If a report is back-uped already -that day, then that report will be overwritten. +{pstd}A report of duplicates will be created in {cmdab:fol:der(}{it:string}{cmd:)} +if any duplicates in {it:ID_varname} were found. The folder listed in +{cmdab:fol:der(}{it:string}{cmd:)} must have a subfolder called {it:Daily} +where daily back-ups of the report are saved. If a report is back-uped already +that day, then that report will be overwritten. -{pstd}All duplicates in a group of duplicates must have a correction indicated. If -one or more duplicates are indicated as correct in {it:correct} or assigned a new +{pstd}All duplicates in a group of duplicates must have a correction indicated. If +one or more duplicates are indicated as correct in {it:correct} or assigned a new ID in {it:newID}, then all other duplicates with the same value in {it:ID_varname} must -be explicitly indicated for deletion. This requirement may (but probably +be explicitly indicated for deletion. This requirement may (but probably shouldn't) be disabled by option {cmdab:droprest}. {dlgtab:Columns in Excel Report filled in automatically:} -{phang}{it:dupListID} stores an auto incremented duplicate list ID that is used +{phang}{it:dupListID} stores an auto incremented duplicate list ID that is used to maintain the sort order in the Excel Report regardless of how the data in memory is sorted at the time {cmd:ieduplicates} is executed. {phang}{it:dateListed} stores the date the duplicate was first identified. -{phang}{it:dateFixed} stores the date a valid correction was imported the first +{phang}{it:dateFixed} stores the date a valid correction was imported the first time for that duplicate. {dlgtab:Columns in Excel Report to be filled in manually by a user:} -{phang}{it:correct} is used to indicate that the duplicate should be kept. Valid values are -restricted to "yes" and "y" to reduce the risk of unintended entries. The values -are not sensitive to case. All valid values are changed to "yes" lower case when +{phang}{it:correct} is used to indicate that the duplicate should be kept. Valid values are +restricted to "yes" and "y" to reduce the risk of unintended entries. The values +are not sensitive to case. All valid values are changed to "yes" lower case when imported. If {it:correct} is indicated then both {it:drop} and {it:newID} must be left empty. -{phang}{it:drop} is used to indicate that the duplicate should be deleted. Valid values are -restricted to "yes" and "y" to reduce the risk of unintended entries. The values -are not sensitive to case. All valid values are changed to "yes" lower case when +{phang}{it:drop} is used to indicate that the duplicate should be deleted. Valid values are +restricted to "yes" and "y" to reduce the risk of unintended entries. The values +are not sensitive to case. All valid values are changed to "yes" lower case when imported. If {it:drop} is indicated then both {it:correct} and {it:newID} must be left empty. -{phang}{it:newID} is used to assign a new ID values to a duplicate. If {it:ID_varname} +{phang}{it:newID} is used to assign a new ID values to a duplicate. If {it:ID_varname} is a string then all values are valid for {it:newID}. If {it:ID_varname} is numeric then -only digits are valid, unless the option {cmdab:tostringok} is specified. -If {cmdab:tostringok} is specified and {it:newID} is non-numeric, then {it:ID_varname} +only digits are valid, unless the option {cmdab:tostringok} is specified. +If {cmdab:tostringok} is specified and {it:newID} is non-numeric, then {it:ID_varname} is recasted to a string variable. If {it:newID} is indicated then both {it:correct} and {it:drop} must be left empty. -{phang}{it:initials} allows the team working with this data to keep track on who +{phang}{it:initials} allows the team working with this data to keep track on who decided on corrections. -{phang}{it:notes} allows the team working with this data to document the reason +{phang}{it:notes} allows the team working with this data to document the reason for the duplicates and the why one type of correction was chosen over the others. {dlgtab:Columns in Excel Report with data from the data set:} -{pstd}The columns above are followed by the values in {cmdab:unique:vars(}{it:varlist}{cmd:)} -and in {cmdab:keep:vars(}{it:varlist}{cmd:)}. These column keeps the name the -variables have in the data set. These variables can help the team to identify +{pstd}The columns above are followed by the values in {cmdab:unique:vars(}{it:varlist}{cmd:)} +and in {cmdab:keep:vars(}{it:varlist}{cmd:)}. These column keeps the name the +variables have in the data set. These variables can help the team to identify which correction should be applied to which duplicate. {space 4}{hline} @@ -224,8 +224,8 @@ which correction should be applied to which duplicate. {p2colreset}{...} {pstd} -{cmd:r(numDup)} is intended to allow for the option to pause Stata in case unresolved duplicates -are found. See example 4 below for a code example on how to use {cmd:r(numDup)}. See {help pause} for instructions +{cmd:r(numDup)} is intended to allow for the option to pause Stata in case unresolved duplicates +are found. See example 4 below for a code example on how to use {cmd:r(numDup)}. See {help pause} for instructions on how to resume the execution of the code if Stata is paused. @@ -236,10 +236,10 @@ on how to resume the execution of the code if Stata is paused. {phang2}{inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY)}{p_end} -{pmore}Specified like this {cmdab:ieduplicates} start by looking for any corrections in any -duplicates report in the folder C:\myImpactEvaluation\baseline\data. If there is a report -with corrections those corrections are applied to the data set. Then the command looks for -unresolved duplicates in HHID and exports a new report if any duplicates were found. The data +{pmore}Specified like this {cmdab:ieduplicates} start by looking for any corrections in any +duplicates report in the folder C:\myImpactEvaluation\baseline\data. If there is a report +with corrections those corrections are applied to the data set. Then the command looks for +unresolved duplicates in HHID and exports a new report if any duplicates were found. The data set is returned without any of the unresolved duplicates. The variable KEY is used to separate observations that are duplication in the ID var. @@ -248,11 +248,11 @@ observations that are duplication in the ID var. {phang2}{inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) keepvars(enumerator) uniquevars(KEY)}{p_end} -{pmore}Similar to the example above, but it also includes the variable enumerator in the Excel +{pmore}Similar to the example above, but it also includes the variable enumerator in the Excel report which is most likely helpful if the data set is collected through a household survey. {phang} -{hi:Example 3.} Using {cmd:r(numDup)} to pause the execution of the code if +{hi:Example 3.} Using {cmd:r(numDup)} to pause the execution of the code if unresolved duplicates were found {phang2}{inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY)}{p_end} @@ -276,18 +276,18 @@ unresolved duplicates were found {col 3}{c |}{col 4}9834{col 10}8{col 21}11Jan2016{col 33} {col 44} {col 53} {col 59} {col 65} {col 75} {col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |} {col 3}{c BLC}{hline 116}{c BRC} -{pmore}The table above shows an example of an Excel report with 4 duplicates groups with -two duplicates in each groups. The duplicates in 4321 and in 1145 have both been corrected -but 7365 and 9834 are still unresolved. Before any observation was corrected, all observations had -{it:dateFixed}, {it:correct}, {it:drop}, {it:newID}, {it:initials} and {it:note} empty just like the observations for ID 7365 and 9834. {it:dateFixed} +{pmore}The table above shows an example of an Excel report with 4 duplicates groups with +two duplicates in each groups. The duplicates in 4321 and in 1145 have both been corrected +but 7365 and 9834 are still unresolved. Before any observation was corrected, all observations had +{it:dateFixed}, {it:correct}, {it:drop}, {it:newID}, {it:initials} and {it:note} empty just like the observations for ID 7365 and 9834. {it:dateFixed} is not updated by the user, the command adds this date the first time the correction is made. -{pmore}Observation with dupListID == 5 was found to have been +{pmore}Observation with dupListID == 5 was found to have been assigned the incorrect ID while the data was collected. This observation is assigned the correct ID in {it:newID} -and observation dupListID == 6 is indicated to be correct. Someone with initials IB made this +and observation dupListID == 6 is indicated to be correct. Someone with initials IB made this correction and made a note. This note can and should be more descriptive but is kept short in this example. -{pmore}Observations with dupListID == 1 and dupListID == 2 were identified as a duplicate submissions of the same +{pmore}Observations with dupListID == 1 and dupListID == 2 were identified as a duplicate submissions of the same observation. One is kept and one is dropped, usually it does not matter which you keep and which you drop, but that should be confirmed. {pmore}Both corrections described in the example would have been easily identified using this command's sister command {help iecompdup}. @@ -295,19 +295,17 @@ observation. One is kept and one is dropped, usually it does not matter which yo {title:Acknowledgements} {phang}I would like to acknowledge the help in testing and proofreading I received in relation to this command and help file from (in alphabetic order):{p_end} -{pmore}Mehrab Ali{break}Michell Dong{break}Paula Gonzales{break}Seungmin Lee +{pmore}Mehrab Ali{break}Michell Dong{break}Paula Gonzalez{break}Seungmin Lee {title:Author} -{phang}Kristoffer Bjärkefur, The World Bank, DECIE +{phang}Kristoffer Bjärkefur, The World Bank, DECIE -{phang}Please send bug-reports, suggestions and requests for clarifications +{phang}Please send bug-reports, suggestions and requests for clarifications writing "ietools ieduplicates" in the subject line to:{break} kbjarkefur@worldbank.org - -{phang}You can also see the code, make comments to the code, see the version + +{phang}You can also see the code, make comments to the code, see the version history of the code, and submit additions or edits to the code through the github repository of ietoolkit:{break} {browse "https://github.com/worldbank/ietoolkit"} - - diff --git a/src/help_files/iefolder.sthlp b/src/help_files/iefolder.sthlp index b5df86f2..ebf86e30 100644 --- a/src/help_files/iefolder.sthlp +++ b/src/help_files/iefolder.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iefolder} {hline} @@ -8,6 +8,9 @@ help for {hi:iefolder} {phang2}{cmdab:iefolder} {hline 2} sets up project folders and master do-files according to World Bank DIME's standards.{p_end} +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Iefolder":DIME Wiki}. + {title:Syntax} {pstd} {ul:When initially setting up the {hi:DataWork} folder in a new project:}{p_end} @@ -51,7 +54,7 @@ help for {hi:iefolder} {pstd}{ul:{hi:itemtypes}}{break} This command can create either a new DataWork folder or add folders to an existing DataWork folder. The existing DataWork folder must have been created - with {cmd:iefolder} for the additions to work. A new DataWork folder is created using specifying {ul:project}. There are three types of folders that can be added to an existing folder by specifying {ul:round}, {ul:untiofobs} or {ul:subfolder}. See + with {cmd:iefolder} for the additions to work. A new DataWork folder is created using specifying {ul:project}. There are three types of folders that can be added to an existing folder by specifying {ul:round}, {ul:unitofobs} or {ul:subfolder}. See next paragraphs for descriptions. {pstd}{ul:{it:project}} sets up a new DataWork folder and its initial folder structure. You must always do this before you can do anything else. It also sets up the main master do-file for this DataWork folder. {cmd:iefolder} is implemented so that you can keep working for years with your project in between adding folders. The command reads and preserves changes made manually to the DataWork folder and master do-file before adding more folders using {cmd:iefolder}. diff --git a/src/help_files/iegitaddmd.sthlp b/src/help_files/iegitaddmd.sthlp index 3b04d204..db7c0022 100644 --- a/src/help_files/iegitaddmd.sthlp +++ b/src/help_files/iegitaddmd.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iegitaddmd} {hline} @@ -8,6 +8,9 @@ help for {hi:iegitaddmd} {phang}{cmdab:iegitaddmd} {hline 2} Creates a placeholder file in subfolders of a GitHub repository folder, which allows committing folder structures with empty folders. +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Iegitaddmd":DIME Wiki}. + {title:Syntax} {phang} {cmdab:iegitaddmd} , {cmd:folder(}{it:file_path}{cmd:)} [file({help filename}) all skip replace] @@ -27,9 +30,9 @@ help for {hi:iegitaddmd} {title:Description} {pstd}GitHub does not sync empty folders, or folder that only contain ignored files. - However, it is common in research projects - that a folder structure is added to the GitHub repository at the beginning of a project. At - the time the folder structure is added to the repository, several folders might still + However, it is common in research projects + that a folder structure is added to the GitHub repository at the beginning of a project. At + the time the folder structure is added to the repository, several folders might still be empty and GitHub will not sync them, meaning that they will not be available to the full team. {cmd:iegitaddmd} is a Stata adaptation of {it:Solution B} in {browse "http://bytefreaks.net/gnulinux/bash/how-to-add-automatically-all-empty-folders-in-git-repository" :this post}. @@ -75,9 +78,9 @@ help for {hi:iegitaddmd} {pstd}{inp:global github_folder "C:\Users\JohnSmith\Documents\GitHub\ProjectA"}{break}{inp:iegitaddmd , folder({it:"$github_folder"})} -{pstd}In the example above, there is a GitHub repository in the folder ProjectA. This - repository has a folder structure where some folders are still empty but will later - be populated with files. In order to have all folders, even the empty ones, synced on all +{pstd}In the example above, there is a GitHub repository in the folder ProjectA. This + repository has a folder structure where some folders are still empty but will later + be populated with files. In order to have all folders, even the empty ones, synced on all collaborators' cloned local copies of the repository, the folders need to contain at least one file, which is being created by the command. @@ -97,4 +100,3 @@ help for {hi:iegitaddmd} {pstd}You can also see the code, make comments to the code, see the version history of the code, and submit additions or edits to the code through the {browse "https://github.com/worldbank/ietoolkit" :ietoolkit github repository}. - diff --git a/src/help_files/iegraph.sthlp b/src/help_files/iegraph.sthlp index e9c0653a..eb07fa31 100644 --- a/src/help_files/iegraph.sthlp +++ b/src/help_files/iegraph.sthlp @@ -1,21 +1,24 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iegraph} {hline} {title:Title} -{phang2}{cmdab:iegraph} {hline 2} Generates graphs based on regressions with treatment dummies common in impact evaluations. +{phang2}{cmdab:iegraph} {hline 2} Generates graphs based on regressions with treatment dummies common in impact evaluations. + +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Iegraph":DIME Wiki}. {title:Syntax} {phang2} -{cmdab:iegraph} {varlist} +{cmdab:iegraph} {varlist} , [ {cmdab:basicti:tle(}{it:string}{cmd:)} {cmdab:varl:abels} {cmdab:save(}{it:string}{cmd:)} {cmdab:grey:scale} {cmdab:yzero} -{cmd:noconfbars} {cmdab:confbarsnone(}{it:varlist}{cmd:)} {cmdab:confintval(}{it:numlist}{cmd:)} -{cmd:norestore} {cmdab:baropt:ions(}{it:string}{cmd:)} {cmdab:ignoredummytest} +{cmd:noconfbars} {cmdab:confbarsnone(}{it:varlist}{cmd:)} {cmdab:confintval(}{it:numlist}{cmd:)} +{cmd:norestore} {cmdab:baropt:ions(}{it:string}{cmd:)} {cmdab:ignoredummytest} {it:{help scatter##twoway_options:twoway_scatter_options}} ] @@ -32,107 +35,112 @@ help for {hi:iegraph} {synopt :{cmdab:confbarsnone(}{it:varlist}{cmd:)}} Removes confidence interval bars from only the {it:varlist} listed.{p_end} {synopt :{cmdab:confintval(}{it:numlist}{cmd:)}} Sets the confidence interval for the confidence interval bars. Default is .95.{p_end} {synopt :{cmd:norestore}} Allows you to debug your two way graph settings on the data set prepared by iegraph. To be used with {it:r(cmd)}.{p_end} +{synopt :{cmdab:baropt:ions(}{it:string}{cmd:)}} Allows you to add formatting to the bars.{p_end} {synopt :{cmdab:ignoredummytest}} Ignores the tests that tests if the dummies fits one of the two models below.{p_end} {synoptline} -{pstd}Any twoway graph scatter options that can be used with normal twoway graph scatter commands can - also be used. If any of these commands conflict with any of the built in options, +{pstd}Any twoway graph scatter options that can be used with normal twoway graph scatter commands can + also be used. If any of these commands conflict with any of the built in options, then the user specified settings have precedence. See example 2 for details.{p_end} {marker desc} {title:Description} -{pstd}{cmdab:iegraph} This command creates bar graphs on the basis of the coefficients - of treatment dummies in regression results. This command is developed for reading - stored results from two types of impact evaluation regression models, but there - are countless of other examples where the command also can be used. {cmd:iegraph} must - be used immediately after running the regression or as long as the regression result is - still stored in or restored to Stata's {help ereturn} results. - -{pstd}{bf:Model 1: OLS with Treatment Dummies}{break}The most typical impact evaluation regression is - to have the outcome variable as the dependent variable and one dummy for each - treatment arm where control is the omitted category. These regressions can also include - covariates, fixed effects etc., but as long as the treatment status is defined by - mutually exclusive dummy variables. See especially examples 1 and 2 below. This command - works with any number of treatment arms but works best from two arms (treatment - and control) to five treatment arms (4 different treatments and control). More +{pstd}{cmdab:iegraph} This command creates bar graphs on the basis of the coefficients + of treatment dummies in regression results. This command is developed for reading + stored results from two types of impact evaluation regression models, but there + are countless of other examples where the command also can be used. {cmd:iegraph} must + be used immediately after running the regression or as long as the regression result is + still stored in or restored to Stata's {help ereturn} results. + +{pstd}{bf:Model 1: OLS with Treatment Dummies}{break}The most typical impact evaluation regression is + to have the outcome variable as the dependent variable and one dummy for each + treatment arm where control is the omitted category. These regressions can also include + covariates, fixed effects etc., but as long as the treatment status is defined by + mutually exclusive dummy variables. See especially examples 1 and 2 below. This command + works with any number of treatment arms but works best from two arms (treatment + and control) to five treatment arms (4 different treatments and control). More arms than that may result in a still correct but perhaps cluttered graph. - -{pstd}{bf:Model 2: Difference-in-Differences}{break}Another typical regression model in impact - evaluations are difference-in-difference (Diff-in-Diff) models with two treatment arms (treatment - and control) and two time periods. If the Diff-in-Diff regression is specified as having the - outcome variable as the dependent variable and three dummy variables (time, treatment - and timeXtreatment) as the independent variables, then this command will produce a nice + +{pstd}{bf:Model 2: Difference-in-Differences}{break}Another typical regression model in impact + evaluations are difference-in-difference (Diff-in-Diff) models with two treatment arms (treatment + and control) and two time periods. If the Diff-in-Diff regression is specified as having the + outcome variable as the dependent variable and three dummy variables (time, treatment + and timeXtreatment) as the independent variables, then this command will produce a nice graph. Controls, treatment effects etc. may be added to the regression model. See especially example 3. -{pstd}{bf:Graph Output}{break}The graph generated by this command is created using the following values. The - control bar is the mean of the outcome variable for the control group. It is not - the constant from the regression as those are not identical if, for example, fixed effects - and covariates were used. For each treatment group the bar is the sum of the value - of the control bar and the beta coefficient in the regression of the corresponding - treatment dummy. The confidence intervals are calculated from the variance in the +{pstd}{bf:Graph Output}{break}The graph generated by this command is created using the following values. The + control bar is the mean of the outcome variable for the control group. It is not + the constant from the regression as those are not identical if, for example, fixed effects + and covariates were used. For each treatment group the bar is the sum of the value + of the control bar and the beta coefficient in the regression of the corresponding + treatment dummy. The confidence intervals are calculated from the variance in the beta coefficients in the regression.{p_end} -{pstd}The graph also includes the N for each treatment arm in the regression and uses - that value as labels on the x-axis. Stars are added to this value if the corresponding - coefficient is statistically different from zero in the regression{p_end} - +{pstd}The graph also includes the N for each treatment arm in the regression and uses + that value as labels on the x-axis. Stars are added to this value if the corresponding + coefficient is statistically different from zero in the regression{p_end} + {marker optslong} {title:Options} -{phang}{cmdab:basicti:tle(}{it:string}{cmd:)} Manually sets the title of the graph. To - apply formatting like title size, position, etc., use Stata's built +{phang}{cmdab:basicti:tle(}{it:string}{cmd:)} Manually sets the title of the graph. To + apply formatting like title size, position, etc., use Stata's built in {help title_options:title() option} instead.{p_end} -{phang}{cmdab:varl:abels} Sets the legends to the variable labels for the +{phang}{cmdab:varl:abels} Sets the legends to the variable labels for the variables instead of the variable names.{p_end} {phang}{cmdab:save(}{it:string}{cmd:)} Sets the filename and the directory to which - the graph will be set. If the filename ends with no extension or '.gph', it will be - saved in the graph format. Extensions(png,pdf,tif,wmf,emf,ps,and eps) will be exported. - Wmf and Emf are only available for the Windows version of Stata. Png and Tif for all + the graph will be set. If the filename ends with no extension or '.gph', it will be + saved in the graph format. Extensions(png,pdf,tif,wmf,emf,ps,and eps) will be exported. + Wmf and Emf are only available for the Windows version of Stata. Png and Tif for all versions of Stata except Console.{p_end} -{phang}{cmdab:grey:scale} Uses greyscales for the bars instead of colors. The color - of the control bar will be black and the treatment bar will run in equal shade +{phang}{cmdab:grey:scale} Uses greyscales for the bars instead of colors. The color + of the control bar will be black and the treatment bar will run in equal shade differences from light grey to dark grey.{p_end} - -{phang}{cmdab:yzero} Manually sets the y-axis of the graph to start at zero - instead of the Stata default. In many cases, we expect that neither the default - settings nor this option will make the axes look perfect, but you may use Stata's built - in {help axis_option:axis options} that allow you to set the axes to perfectly fit + +{phang}{cmdab:yzero} Manually sets the y-axis of the graph to start at zero + instead of the Stata default. In many cases, we expect that neither the default + settings nor this option will make the axes look perfect, but you may use Stata's built + in {help axis_option:axis options} that allow you to set the axes to perfectly fit your data. The command will ignore the {cmdab:yzero} option in cases where the graph cannot be forced to zero i.e. where the values in the graph extend beyond zero, both positively - or negatively. A warning will be displayed telling the user that the option has + or negatively. A warning will be displayed telling the user that the option has been ignored. Despite the warning, the graph will be produced correctly.{p_end} - -{phang}{cmd:noconfbars} Removes the confidence interval bars from graphs for all + +{phang}{cmd:noconfbars} Removes the confidence interval bars from graphs for all treatments. The default value for the confidence interval bars is 95%. {p_end} - -{phang}{cmdab:confbarsnone(}{it:varlist}{cmd:)} Removes confidence interval bars - from only the {it:varlist} listed. The remaining variables in the graphs which + +{phang}{cmdab:confbarsnone(}{it:varlist}{cmd:)} Removes confidence interval bars + from only the {it:varlist} listed. The remaining variables in the graphs which have not been specified in {cmdab:confbarsnone} will still have the confidence interval bars. {p_end} -{phang}{cmdab:confintval(}{it:numlist}{cmd:)} Sets the confidence interval for - the confidence interval bars. Default is .95. Values between 0 and 1 are allowed.{p_end} - +{phang}{cmdab:confintval(}{it:numlist}{cmd:)} Sets the confidence interval for + the confidence interval bars. Default is .95. Values between 0 and 1 are allowed.{p_end} + {phang}{cmdab:norestore} Returns the data set that iegraph prepares to create - the graph. This is helpful when de-bugging how one of Stata's many graph - options can be applied to an iegraph graph. This option is meant to be - used in combination with the {help return:returned result} in {it:r(cmd)}. {it:r(cmd)} - gives you the line of code iegraph prepares to create the graph and {cmdab:norestore} gives you + the graph. This is helpful when de-bugging how one of Stata's many graph + options can be applied to an iegraph graph. This option is meant to be + used in combination with the {help return:returned result} in {it:r(cmd)}. {it:r(cmd)} + gives you the line of code iegraph prepares to create the graph and {cmdab:norestore} gives you access to the data that code is meant to be used on. This approach will help you de-bug how to apply Stata's built in graph options to an iegraph graph. Note that this option deletes any unsaved changes made to your data.{p_end} -{phang}{cmd:ignoredummytest} Ignores the tests that test if the dummies fits one - of the two models this command is intended for. The two models are described - in detail above above. There might be models we have not thought of for which - this command is helpful as well. Use this option to lift the restrictions of - those two models. But be careful, this command has not been tested for other - models than the two described.{p_end} - +{phang}{cmdab:baropt:ions(}{it:string}{cmd:)}} Allows you to add formatting + option that are applied to each bar and not the graph itself. Example of + such option are {help twoway_bar} options and {help axis_options} options. + +{phang}{cmd:ignoredummytest} Ignores the tests that test if the dummies fits one + of the two models this command is intended for. The two models are described + in detail above above. There might be models we have not thought of for which + this command is helpful as well. Use this option to lift the restrictions of + those two models. But be careful, this command has not been tested for other + models than the two described.{p_end} + {marker optslong} {title:Examples} @@ -141,12 +149,12 @@ help for {hi:iegraph} {pmore} {inp:regress} {it:outcomevar treatment_dummy}{break} {inp:iegraph} {it:treatment_dummy} , {inp:basictitle({it:"Treatment Effect on Outcome"})} -{pmore}In the example above, there are only two treatment arms (treatment and - control). {it:treatment_dummy} has a 1 for all treatment observations and - a 0 for all control observations. The graph will have one bar for control and - it shows the mean for {it:outcomevar} for all observations in control. The - second bar in the graph will be the sum of that mean and the coefficient - for {it:treatment_dummy} in the regression. The graph will also have the +{pmore}In the example above, there are only two treatment arms (treatment and + control). {it:treatment_dummy} has a 1 for all treatment observations and + a 0 for all control observations. The graph will have one bar for control and + it shows the mean for {it:outcomevar} for all observations in control. The + second bar in the graph will be the sum of that mean and the coefficient + for {it:treatment_dummy} in the regression. The graph will also have the title: Treatment Effect on Outcome. {pstd} {hi:Example 2.} @@ -154,13 +162,13 @@ help for {hi:iegraph} {pmore} {inp:regress} {it:income tmt_1 tmt_2 age education}{inp:, cluster(}{it:district}{inp:)}{break} {inp:iegraph} {it:tmt_1 tmt_2}{inp:, noconfbars yzero basictitle({it:"Treatment effect on income"}) } -{pmore}In the example above, the treatment effect on income in researched. There +{pmore}In the example above, the treatment effect on income in researched. There are three treatment arms; control, treatment 1 ({it:tmt_1}) and treatment - 2 ({it:tmt_2}). It is important that no observation has the value 1 in + 2 ({it:tmt_2}). It is important that no observation has the value 1 in both {it:tmt_1} and {it:tmt_2} (i.e. no observation is in more than one - treatment) and some observations must have the value 0 in both {it:tmt_1} - and {it:tmt_2} (i.e. control observations). The variables {it:age} and - {it:education} are covariates (control variables) and are not included + treatment) and some observations must have the value 0 in both {it:tmt_1} + and {it:tmt_2} (i.e. control observations). The variables {it:age} and + {it:education} are covariates (control variables) and are not included in {cmd:iegraph}. {inp:noconfbars} omits the confidence interval bars , and {inp:yzero} sets the y-axis to start at 0. @@ -168,29 +176,29 @@ help for {hi:iegraph} {pmore} {inp:regress} {it:chld_wght time treat timeXtreat}{break} {inp:iegraph} {it:time treat timeXtreat} {inp:, basictitle({it:"Treatment effect on Child Weight (Diff-in-Diff)"})} - -{pmore}In the example above, the data set is a panel data set with two time + +{pmore}In the example above, the data set is a panel data set with two time periods and the regression estimates the treatment effect on child weight using a Difference-in-Differences model. The dummy variable {it:time} indicates if it is time period 0 or 1. - The dummy variable {it:treat} indicates if the observation is treatment - or control. {it:timeXtreat} is the interaction term of {it:time} + The dummy variable {it:treat} indicates if the observation is treatment + or control. {it:timeXtreat} is the interaction term of {it:time} and {it:treat}. This the standard way to set up a Difference-in-Differences regression model. {pstd} {hi:Example 4.} {pmore} {inp:regress} {it:harvest T1 T2 T3 } {break} - {inp:iegraph} {it:T1 T2 T3} {inp:, basictitle({it:"Treatment effect on harvest"}) + {inp:iegraph} {it:T1 T2 T3} {inp:, basictitle({it:"Treatment effect on harvest"}) xlabel(,angle(45)) yzero ylabel(minmax) save({it:"Graph1.gph"})} - -{pmore}The example above shows how to save a graph to disk. It also shows that - most two-way graph options can be used. In this example the {cmd:iegraph} - option {cdm:yzero} conflicts with the two-way option {cmd:ylabel(minmax)}. - In such a case the user specified option takes precedence over {cmd:iegraph} + +{pmore}The example above shows how to save a graph to disk. It also shows that + most two-way graph options can be used. In this example the {cmd:iegraph} + option {cdm:yzero} conflicts with the two-way option {cmd:ylabel(minmax)}. + In such a case the user specified option takes precedence over {cmd:iegraph} options like {cdm:yzero}. - - + + {title:Acknowledgements} {phang}We would like to acknowledge the help in testing and proofreading we received in relation to this command and help file from (in alphabetic order):{p_end} diff --git a/src/help_files/iematch.sthlp b/src/help_files/iematch.sthlp index 7ff2eb5f..1550f26c 100644 --- a/src/help_files/iematch.sthlp +++ b/src/help_files/iematch.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 15 Dec 2017}{...} +{* 26 Apr 2018}{...} {hline} help for {hi:iematch} {hline} @@ -8,10 +8,13 @@ help for {hi:iematch} {phang2}{cmdab:iematch} {hline 2} Matching base observations towards target observations using on a single continous variable. +{phang2}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Iematch":DIME Wiki}. + {title:Syntax} {phang2} -{cmdab:iematch} {ifin} +{cmdab:iematch} {ifin} , {cmdab:grp:dummy(}{it:varname}{cmd:)} {cmdab:match:var(}{it:varname}{cmd:)} [{cmdab:id:var(}{it:varname}{cmd:)} {cmdab:m1} {cmdab:maxdiff(}{it:numlist}{cmd:)} {cmd:seedok} {cmdab:matchid:name(}{it:string}{cmd:)} {cmdab:matchdi:ffname(}{it:string}{cmd:)} @@ -29,11 +32,14 @@ help for {hi:iematch} {synopt :{cmdab:id:var(}{it:varname}{cmd:)}}The uniquely and fully identifying ID varaible. Used to indicate which target observation a base observation is match with. If omitted an ID variable will be created. See below if you have multiple ID vars.{p_end} -{synopt :{cmdab:m1}}Allows many-to-one matches. The default is to allow only - one-to-one matches. See the {help iematch##desc:description} section.{p_end} {synopt :{cmdab:maxdiff(}{it:numlist}{cmd:)}}Set a maximum difference allowed in {cmdab:matchvar()}. If a base observation has no match within this difference then it will remain unmatched{p_end} +{synopt :{cmdab:m1}}Allows many-to-one matches. The default is to allow only + one-to-one matches. See the {help iematch##desc:description} section.{p_end} +{synopt :{cmdab:maxmatch(}{it:integer}{cmd:)}}Sets the maximum number of base + observations that each target observation is allowed to match with in a {cmd:m1} + (many-to-one) match.{p_end} {synopt :{cmd:seedok}}Supresses the error maessage thrown when there are duplicates in {cmd:matchvar()}. When there are duplicates, the seed needs to be set in order to have a replicable match. The {help seed} should be set before this command.{p_end} @@ -49,7 +55,7 @@ help for {hi:iematch} {synopt :{cmdab:matchco:untname(}{it:string}{cmd:)}}Manually sets the name of the variable that indicates how many observations a target obsersvation is matched with in a many-to-one matches. The default is _matchCount{p_end} -{synopt :{cmd:replace}}Replaces variables in memory if there are name conflicts +{synopt :{cmd:replace}}Replaces variables in memory if there are name conflicts when generating the output variables.{p_end} {synoptline} @@ -158,35 +164,41 @@ help for {hi:iematch} that will be used in the variable that indicates which target observation each base observations matched against. If this option is omitted, a variable called _ID will be generated. The observation in the first row is given the value 1, - the second row value 2 and so fourth. - -{pmore}This command assumes only one ID variable as that is the best practice this command - follows (see next paragraph for the exception of panel data sets). Here follows two - suggested solutions if a data set this command will be used on has more than one ID + the second row value 2 and so fourth. + +{pmore}This command assumes only one ID variable as that is the best practice this command + follows (see next paragraph for the exception of panel data sets). Here follows two + suggested solutions if a data set this command will be used on has more than one ID variable. {bf:1.} Do not use the {cmd:idvar()} option and after the mathcing copy the mutliple - ID variables yourself. {bf:2.} Combine your ID variables into one ID variable. Here are two - examples on how that can be done (the examples below work just as well when combining more + ID variables yourself. {bf:2.} Combine your ID variables into one ID variable. Here are two + examples on how that can be done (the examples below work just as well when combining more than two ID variables to one.): - + {pmore2}{inp:egen }{it:new_ID_var }{inp:= group(}{it:old_ID_var1 old_ID_var2}{inp:)} {pmore2}{inp:gen}{space 2}{it:new_ID_var }{inp:= }{it:old_ID_var1 }{inp:+ "_" + }{it:old_ID_var2}{space 4}//Works only with string vars -{pmore}Panel data sets are one of the few cases where multiple ID variables is good practice. However, - in the case of matching it is unlikely that it is correct to include multiple time rounds for - the same observation. That would lead to some base observations being matched to - one target observation in the first round, and one another in the second. In impact +{pmore}Panel data sets are one of the few cases where multiple ID variables is good practice. However, + in the case of matching it is unlikely that it is correct to include multiple time rounds for + the same observation. That would lead to some base observations being matched to + one target observation in the first round, and one another in the second. In impact evaluations, matchings are almost exclusively done only on the baseline data. +{phang}{cmdab:maxdiff(}{it:numlist}{cmd:)} sets a maximum allowed difference between + a base observation and a target observation for a match to be valid. Any base + observation without a valid match within this difference will end up unmatched. + {phang}{cmdab:m1} sets the match to a many-to-one match (see {help iematch##desc:description}). This allows multiple base observations to be matched towards a single target observation. The default is the one-to-one match where a maximum one base observation is matched towards each target observation. This option allows the number of base observations to be larger then the number of target observations. -{phang}{cmdab:maxdiff(}{it:numlist}{cmd:)} sets a maximum allowed difference between - a base observation and a target observation for a match to be valid. Any base - observation without a valid match within this difference will end up unmatched. +{phang}{cmdab:maxmatch(}{it:integer}{cmd:)} sets the maximum number of base observations a + target observation is allowed to match with in a {cmd:m1} (many-to-one) match. The integer + in {cmd:maxmatch()} is the maximum number of base observations in group but there is also a + always a target observation in the group, so in a maxed out match group it will be {cmd:maxmatch()} + plus one observations. {phang}{cmd:seedok} supresses the error message throwned when there are duplicates among the base observations or the target observations in {cmd:matchvar()}. When there @@ -220,9 +232,9 @@ help for {hi:iematch} name is {inp:_matchCount}. The names {inp:_ID}, {inp:_matchID}, {inp:_matchDiff} and {inp:_matchResult} are not allowed. -{phang}{cmdab:replace} allows {cmd:iematch} to replace variables in memory when +{phang}{cmdab:replace} allows {cmd:iematch} to replace variables in memory when encountering name conflicts while creating the variables with the results of the matching. - + {title:Examples} {pstd} {hi:Example 1.} @@ -241,6 +253,19 @@ help for {hi:iematch} long as the difference in {it:p_hat} is less than .001. Only observations that has the value 1 in variable {it:baseline} will be included in the match. +{pstd} {hi:Example 3.} + +{pmore}{inp:iematch , grpdummy({it:tmt}) m1 maxmatch(5) matchvar({it:p_hat}) maxdiff(.001)} + +{pmore}In the example above, the observations with value 1 in {it:tmt} will be matched + towards the nearest, in terms of {it:p_hat}, observations with value 0 in {it:tmt} as + long as the difference in {it:p_hat} is less than .001. So far this example is identical + to example 2. However, in this example each target observation is allowed to match with up + to 5 base observations. Hence, instead of a result with only pairs of exactly one target + observation and one base observation in each pair, the result is instead match groups + with one target observation and up to 5 base observations. If {cmd:maxmatch()} is omitted + any number of base observations may match with each target observation. + {title:Acknowledgements} {phang}I would like to acknowledge the help in testing and proofreading I received in relation to this command and help file from (in alphabetic order):{p_end} diff --git a/src/help_files/ietoolkit.sthlp b/src/help_files/ietoolkit.sthlp new file mode 100644 index 00000000..8706b214 --- /dev/null +++ b/src/help_files/ietoolkit.sthlp @@ -0,0 +1,73 @@ +{smcl} +{* 26 Apr 2018}{...} +{hline} +help for {hi:ietoolkit} +{hline} + +{title:Title} + +{phang}{cmdab:ietoolkit} {hline 2} Returns information on the version of ietoolkit installed + +{phang}For a more descriptive discussion on the intended usage and work flow of this +command please see the {browse "https://dimewiki.worldbank.org/wiki/Ietoolkit":DIME Wiki}. + +{title:Syntax} + +{phang} +{cmdab:ietoolkit} + +{pstd}Note that this command takes no arguments at all.{p_end} + +{marker desc} +{title:Description} + +{pstd}{cmdab:iegraph} This command returns the version of ietoolkit installed. It + can be used in the beginning of a Master Do-file that is intended to be used + by multiple users to programmatically test if ietoolkit is not installed for + the user and therefore need to be installed, or if the version the user has + installed is too old and needs to be upgraded. + +{marker optslong} +{title:Options} + +{phang}This command does not take any options. + +{marker example} +{title:Examples} + +{pstd}The code below is an example code that can be added to the top of any do-file. + the example code first test if the command is installed, and install it if not. If it is + installed, it test if the version is less than version 5.0. If it is, it + replaces the ietoolkit file with the latest version. In your code you can skip + the second part if you are not sure which version is required. But you should + always have the first part testing that {inp:r(version)} has a value before using + it in less than or greater than expressions. + +{inp} cap ietoolkit +{inp} if "`r(version)'" == "" { +{inp} *ietoolkit not installed, install it +{inp} ssc install ietoolkit +{inp} } +{inp} else if `r(version)' < 5.0 { +{inp} ietoolkit version too old, install the latest version +{inp} ssc install ietoolkit , replace +{inp} }{text} + +{title:Acknowledgements} + +{phang}We would like to acknowledge the help in testing and proofreading we received + in relation to this command and help file from (in alphabetic order):{p_end} +{pmore}Luiza Cardoso De Andrade{break}Seungmin Lee{break} + +{title:Authors} + +{phang}Kristoffer Bjarkefur, The World Bank, DECIE + +{phang}Please send bug-reports, suggestions and requests for clarifications + writing "ietoolkit ietoolkit" in the subject line to the email address + found {browse "https://github.com/worldbank/ietoolkit":here} + +{phang}You can also see the code, make comments to the code, see the version + history of the code, and submit additions or edits to the code through + the github repository of ietoolkit:{break} + {browse "https://github.com/worldbank/ietoolkit"}