From 3dd86e8971c459c2b1fba1fc4bbdad370aa2dcae Mon Sep 17 00:00:00 2001 From: Joseph Luchman Date: Tue, 7 Feb 2023 19:10:50 -0600 Subject: [PATCH] Version 1.1.0 Update --- domme.ado | 82 ++++----- domme.sthlp | 470 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 330 insertions(+), 222 deletions(-) diff --git a/domme.ado b/domme.ado index 77c94e0..4138da7 100644 --- a/domme.ado +++ b/domme.ado @@ -1,10 +1,10 @@ -*! domme version 1.1.0 xx/xx/202x Joseph N. Luchman +*! domme version 1.1.0 2/7/2023 Joseph N. Luchman -program define domme, eclass // ~ history and version information at end of file ~ +program define domme, eclass // ~ history and version information at end of file ~ version 15.1 - if replay() & !strlen("`0'") { //replay results - error if "by"; domme allows nothing in "anything" - allow it to replay only when there is nothing in options + if replay() & !strlen("`0'") { //replay results - error if "by"; domme allows nothing in "anything" - allow it to replay only when there is nothing in options if ("`e(cmd)'" != "domme") error 301 @@ -18,16 +18,16 @@ program define domme, eclass // ~ history and version information at end of file /*define syntax*/ syntax [anything(id="equation names" equalok)] [in] [if] /// - [aw pw iw fw], reg(string) Fitstat(string) [Sets(string) /// + [aw pw iw fw], Reg(string) Fitstat(string) [Sets(string) /// noCOMplete noCONditional /// REVerse all(string) EXTraconstr(numlist) /// - ROPts(string) ADDConstr(numlist)] //addconstr() and extraconstr() undocumented - for use in possible extensions + ROPts(string) ADDConstr(numlist)] //addconstr() and extraconstr() undocumented - for use in possible extensions /*exit conditions*/ - capture which lb_dominance.mlib //is -domin- present? + capture which lb_dominance.mlib //is -domin- present? - if _rc { //if -domin- cannot be found, tell user to install it. + if _rc { //if -domin- cannot be found, tell user to install it. display "{err}Module {cmd:domin} not found. Install " /// "{cmd:domin} here {stata ssc install domin}." @@ -37,56 +37,56 @@ program define domme, eclass // ~ history and version information at end of file } /*general set up*/ - mata: model_specs = domme_specs() //initiate instance of domme_specs() structure + mata: model_specs = domme_specs() //initiate instance of domme_specs() structure - tempname ranks gendom stzd_gendom cdldom cptdom //temporary matrices for results + tempname ranks gendom stzd_gendom cdldom cptdom //temporary matrices for results - local two "`anything'" //rename the input equation-to-independent variable mapping in "anything": done to generalize and simplify loop below + local two "`anything'" //rename the input equation-to-independent variable mapping in "anything": done to generalize and simplify loop below - local drop_exit = 0 //local macro indicating that -domme- should drop constraints made so they do not persist due to error/when something goes wrong before the end of command's successful execution + local drop_exit = 0 //local macro indicating that -domme- should drop constraints made so they do not persist due to error/when something goes wrong before the end of command's successful execution //**process "anything" and obtain individual constraints**// - while strlen("`two'") { //process the equation-to-independent variable mapping in "anything" if something is present... + while strlen("`two'") { //process the equation-to-independent variable mapping in "anything" if something is present... - gettoken one two: two, bind //parse the equation-to-independent variable mapping to bind all parenthetical statements together and pull out first parenthetical statement/equation + gettoken one two: two, bind //parse the equation-to-independent variable mapping to bind all parenthetical statements together and pull out first parenthetical statement/equation - if !regexm("`one'", "=") { //exit if there is no equal sign to make an equation + if !regexm("`one'", "=") { //exit if there is no equal sign to make an equation display "{err}Equation {cmd:`one'} is missing a {cmd:=} to " /// "distinguish equation and independent variable names." - local drop_exit = 1 //indicate that constraints will be dropped + local drop_exit = 1 //indicate that constraints will be dropped - continue, break //stop the -while- loop... + continue, break //stop the -while- loop... } - local one = regexr("`one'", "[/(]", "") //remove left paren from equation statement + local one = regexr("`one'", "[/(]", "") //remove left paren from equation statement - local one = regexr("`one'", "[/)]", "") //remove right paren from equation statement + local one = regexr("`one'", "[/)]", "") //remove right paren from equation statement - gettoken dv ivlist: one, parse("=") //further parse the focal equation to separate out dependent from independent variables + gettoken dv ivlist: one, parse("=") //further parse the focal equation to separate out dependent from independent variables - if ( `: list sizeof dv' != 1 ) | regexm("`dv'", "=") { //multiple dependent variables/equations or no dependent variable where one should be... exit + if ( `: list sizeof dv' != 1 ) | regexm("`dv'", "=") { //multiple dependent variables/equations or no dependent variable where one should be... exit display "{err}Invalid equation name specified for {cmd:(`dv'`ivlist')}." - local drop_exit = 1 //indicate that constraints will be dropped + local drop_exit = 1 //indicate that constraints will be dropped - continue, break //stop the -while- loop... + continue, break //stop the -while- loop... } - local ivlist = regexr("`ivlist'", "=", "") //remove the equal sign from the independent variable list + local ivlist = regexr("`ivlist'", "=", "") //remove the equal sign from the independent variable list - if ( `: list sizeof ivlist' == 0 ) { //empty independent variable list... exit as an empty list is an error + if ( `: list sizeof ivlist' == 0 ) { //empty independent variable list... exit as an empty list is an error display "{err}Empty set of independent variables specified for " /// "equation {cmd:`dv'}." - local drop_exit = 1 //indicate that constraints will be dropped + local drop_exit = 1 //indicate that constraints will be dropped - continue, break //stop the -while- loop... + continue, break //stop the -while- loop... } @@ -425,7 +425,7 @@ program define domme, eclass // ~ history and version information at end of file scalar `allfs' = 0 //defining fitstat of "all subsets" parameters as 0 - needed for dominance() function if `:list sizeof all' { //if there is something in the "all" option - + local 2 "`all'" //rename the content of "all" to generalize and simplify loop below while strlen("`2'") & !`drop_exit' { //process the equation-to-independent variable mapping for "all"... @@ -545,16 +545,16 @@ program define domme, eclass // ~ history and version information at end of file if `built_in' { - compute_fitstat `fit_opts' iscons //if a built-in fistat desired, estimate it; note constant model + compute_fitstat `fit_opts' iscons //if a built-in fistat desired, estimate it; note constant model scalar `consfs' = `fitstat' } if !`built_in' & !missing(`fitstat') /// - scalar `consfs' = `fitstat' //return constant model's fitstat if user supplied and not missing + scalar `consfs' = `fitstat' //return constant model's fitstat if user supplied and not missing - if !`built_in' & missing(`fitstat') scalar `consfs' = 0 //otherwise assume the constant-only model is 0 + if !`built_in' & missing(`fitstat') scalar `consfs' = 0 //otherwise assume the constant-only model is 0 if strlen("`all'") { //distinguishes "all subsets" from "constant" fitstats/models @@ -584,12 +584,12 @@ program define domme, eclass // ~ history and version information at end of file st_local("conditional'"), st_local("complete"), /// st_local("ivs"), /// st_numscalar(st_local("allfs")), /// - st_numscalar(st_local("consfs"))*strtoreal(st_local("cons_no_add"))) //invoke "dominance()" function in Mata + st_numscalar(st_local("consfs"))*strtoreal(st_local("cons_no_add"))) //invoke "dominance()" function in Mata /*translate r-class results from me_dominance() into temp results*/ - matrix `gendom' = r(domwgts) //general dominance statistics + matrix `gendom' = r(domwgts) //general dominance statistics - matrix `stzd_gendom' = r(sdomwgts) //standardized general dominance statistics + matrix `stzd_gendom' = r(sdomwgts) //standardized general dominance statistics matrix `ranks' = r(ranks) //ranks based on general dominance statistics @@ -625,12 +625,14 @@ program define domme, eclass // ~ history and version information at end of file st_matrix("`cptdom'", st_matrix("`cptdom'"):*-1 ) //reverse the sign of the complete dominance designations matrix colnames `cptdom' = `cptivs' //name the columns of the complete dominance designations - - matrix coleq `cptdom' = dominated? //name the equation for the columns "dominated?" - + + mata: st_matrixcolstripe("`cptdom'", /// + ("?":+st_matrixrowstripe("`cptdom'")[,1], st_matrixrowstripe("`cptdom'")[,2])) //add name the equation for the rows ">?" replacement for "dominates?" } @@ -733,7 +735,7 @@ program define domme, eclass // ~ history and version information at end of file foreach constr of numlist `nobrkt_constrs' { //go through each constraint that was made... - constraint drop `constr' //drop the constraint + constraint drop `constr' //drop the constraint } @@ -1054,8 +1056,10 @@ end // 1.0.1 - April 17, 2021 (initiating new versioning: #major.#minor.#patch) -update to documentation for SJ article -bug fix on constraint dropping with all() option and use with xi: - // 1.1.0 - xx/xx/xxxx + // 1.1.0 - February 7, 2023 -leverage dominance() function in -domin-; create own function to function passing; Mata struct to handle input specs - -domin- now a dependency. - use an AssociativeArray to map parameters/parameter sets to constraints in a way that is conformable with -domin- + - extensive documentation update + - fixed complete dominance table display; retains equation name and adds informative prefix */ diff --git a/domme.sthlp b/domme.sthlp index b9a6b63..449d95c 100644 --- a/domme.sthlp +++ b/domme.sthlp @@ -1,207 +1,275 @@ {smcl} -{* *! version 1.0.1 April 17, 2021 J. N. Luchman}{...} +{* *! version 1.1.0 February 7, 2023 J. N. Luchman}{...} {cmd:help domme} -{hline}{...} {title:Title} {pstd} -{ul on}Dom{ul off}inance analysis for {ul on}m{ul off}ulitple {ul on}e{ul off}quation models{p_end} +{bf:domme} {hline 2} {ul on}Dom{ul off}inance analysis for {ul on}m{ul off}ulitple {ul on}e{ul off}quation models{p_end} + {title:Syntax} -{phang} +{p 8 16 2} {cmd:domme} [{cmd:(}{it:eqname1 = parmnamelist1}{cmd:)} {cmd:(}{it:eqname2 = parmnamelist2}{cmd:)} ... {cmd:(}{it:eqnameN = parmnamelistN}{cmd:)}] -{ifin} {weight}{cmd:,} {opt r:eg(full_estcmd)} -{opt f:itstat(returned_scalar | built_in_options)} -[{it:options}] +{ifin} {weight}{cmd:,} [{it:options}] +{synoptset 35 tabbed}{...} +{synopthdr} {synoptline} -{phang}{cmd:pweight}s, {cmd:aweight}s, {cmd:iweight}s, and {cmd:fweight}s are allowed but must -be able to be used by the command in {opt reg()}, see help {help weights:weights}. -{help Time series operators} are also allowed for commands in {opt reg()} that accept them. -Finally, {help Factor variables} are also allowed, but like weights and time series operators, -must be accepted by the command in {opt reg()}. - -{phang}{cmd:domme} requires installation of Ben Jann's {cmd:moremata} package -(install here: {stata ssc install moremata}). Users are strongly encouraged to install -{stata ssc install domin:domin} as well and read over its help file for basic information on -dominance analysis. - -{title:Development Webpage} +{syntab:Model} +{synopt :{opt r:eg(full_estimation_command)}}preditive model command to call{p_end} +{synopt :{opt rop:ts(command_options)}}options to command in {opt reg()}{p_end} +{synopt :{opt f:itstat(fitstat_opts)}}fit statistic returned by {opt reg()} or computed using built-in method{p_end} +{synopt :{opt s:ets([PEset_1] ... [PEset_x])}}sets of indepdendent variables{p_end} +{synopt :{opt a:ll(PEall)}}indepdendent variables included in all subets{p_end} + +{syntab:Reporting} +{synopt :{opt nocon:ditional}}suppresses computation of conditional dominance statistics{p_end} +{synopt :{opt nocom:plete}}suppresses computation of complete dominance designations{p_end} +{synopt :{opt rev:erse}}reverses interpretation for statistics that decrease with better fit{p_end} +{synoptline} +{p 4 6 2} +Command in {opt reg()} must accept {help constraint}s as a command option. {p_end} -{phang} Additional discussion of results, options, and conceptual issues on: +{p 4 6 2} +{cmd:aweight}s, {cmd:fweight}s, {cmd:iweight}s, and {cmd:pweight}s are +allowed; see {help weight}. Weight use is restricted to commands in {opt reg()} +that accept them.{p_end} -{phang}{browse "http://github.com/jluchman/domme/blob/master/README.md"} +{p 4 6 2} +Note that {cmd:domme} requires at least two parameter estimates or +sets of parameter estimates (see option {opt sets()} below). +Because it is possible to submit only sets of parameter estimates, the +initial parameter estimates specification statement is optional. {p_end} -{phang} Please report bugs, requests for features, and contribute to as well as follow on-going development of {cmd:domin} on: +{p 4 6 2} +{cmd:domme} requires installation of the {cmd:domin} package +(install {stata ssc install domin:here}). {p_end} -{phang}{browse "http://github.com/jluchman/domme"} {title:Table of Contents} -{help domme##desc: 1. Description} -{help domme##setup: 2. Set-up} -{help domme##opts: 3. Options} -{help domme##remark: 4. Final Remarks} -{help domme##examp: 5. Examples} -{help domme##sav: 6. Saved Results} -{help domme##refs: 7. References} +{space 4}{help domme##desc: 1. Description} +{space 4}{help domme##setup: 2. Set-up} +{space 4}{help domme##opts: 3. Options} +{space 4}{help domme##sav: 4. Saved Results} +{space 4}{help domme##examp: 5. Examples} +{space 4}{help domme##remark: 6. Final Remarks} +{space 4}{help domme##refs: 7. References} + {marker desc}{...} {title:1. Description} {pstd} -Dominance analysis for mulitple equation models is an extention of standard dominance analysis -(see {help domin}) which focuses on finding the relative importance -of parameter estimates in an estimation model based on contribution of each parameter -estimate to an overall model fit statistic (see Luchman, 2019 for a discussion). -As an extension of standard dominance analysis, it is recommended that the user -familiarize themselves with standard dominance analysis before attempting to use the -multiple equation version of the methodology. +Dominance analysis (DA) is a methodology for determining the relative +importance of independent variables or parameter estimates in a predictive model. +The {cmd: domme} approach extends on {cmd: domin} by being able to accommodate more +predictive models such as those with multiple equations/dependent variables +(see Luchman, Lei, and Kaplan, 2020 for a discussion). As an extension of single +equation dominance analysis, it is recommended that the user familiarize +themselves with single equation dominance analysis (i.e., see {help domin:domin}) +before attempting to use the multiple equation version of the methodology. {pstd} -Dominance analysis for mulitple equation models differs from standard dominance analysis -primarily in how the ensemble of fit metrics are collected. Standard dominance analysis -obtains the ensemble of fit metrics to compute dominance statistics by including or -excluding independent variables from a statistical model. Dominance analysis for mulitple -equation models obtains the ensemble of fit metrics to compute dominance statistics by -using {help constraint}s which permit each parameter estimate to be estimated from the -data or constrained to zero in a given statistical model. Constraining a parameter estimate to -zero effectively omits the parameter from estimation and it cannot contribute to model fit. +This multiple equation DA implementation differs from the implementation of +the single equation version of DA in how it includes or excludes parameter +estimates from the model. Multiple equation DA uses {help constraint}s to exclude +parameter estimates from the model by constraining their values to be 0. When constrained +to be 0, the parameter estimate cannot affect prediction and, thus, is effectively +excluded. This is why {cmd: domme} can only be used with commands that accept +constraints. + {marker setup}{...} {title:2. Set-up} {pstd} -{cmd:domme} requires that all parameters to be dominance analyzed are written out in the -initial {res:(eqname = parmnamelist)} statements. {cmd:domme} will use the {res:(eqname = parmnamelist)} -statements (similar to those of commands like {help sureg}) to create parameter statements that -it will produce {help constraint:constraints} from. Each entry in {res:parmnamelist} is given a -separate constraint with the associated {res:eqname}. For example, the statement: +This implementation of multiple equatuion DA must be provided with the components +of the parameter estimates from which it will create "parameter equals 0" +constraints. How these constraints are constructed follows from the way in which +Stata names each parameter estimate. Consider, for example, the following logistic +regression model (estimated on the {cmd: sysuse auto} data). {pstd} -{res:(price = mpg turn trunk foreign)} +{cmd:logit foreign price mpg turn trunk} {pstd} -will create the series of four parameters: +Following the estimation of this model, the user can ask for the names of all +the parameters in the {cmd: e(b)} or coefficient matrix by using: + +{pstd} +{cmd: display "`: colfullnames e(b)'"} + +{pstd} +which produces + +{pstd} +{res:foreign:price foreign:mpg foreign:turn foreign:trunk foreign:_cons} + +{pstd} +This series of names are the parameter names for all the coefficients in the +logit model. In order to use DA on this model, the user needs +to supply {cmd: domme} with the names of all the parameters that will be used. +One way to supply these parameter names is to use the initial +{res:(eqname = parmnamelist)} statements. For instance: + +{pstd} +{cmd: domme (foreign = price mpg turn trunk), ...} + +{pstd} +implies four constraints: + +{phang} +{res: _b[foreign:price] = 0} + +{phang} +{res: _b[foreign:mpg] = 0} + +{phang} +{res: _b[foreign:turn] = 0} + +{phang} +{res: _b[foreign:trunk] = 0} {pstd} -{res:_b[price:mpg] _b[price:turn] _b[price:trunk] _b[price:foreign]} +{cmd: domme} uses these constraints to "remove" parameters by constraining their +value to 0. This mimics {cmd: domin}'s method where the parameter's name is +removed from the {cmd: indepvars} list directly. {pstd} -such parameters would be produced by a model like {cmd:glm price mpg turn trunk foreign} +The way parameter constraints are produced with the {opt all()} and {opt sets()} +options is identical to that of the initial statements to {cmd: domme}. -{pstd}Note that the current version of {cmd:domme} does not check to ensure that the -parameters supplied it are in the model and it is the user's responsibility to ensure that the lists -supplied are valid parameters in the estimated model. +... brief discussion of why _cons not included? ... {marker opts}{...} {title:3. Options} -{phang}{opt reg(full_estimation_command)} refers {cmd:domme} to a command that accepts {help constraint}s, -uses {help ml} to estimate parameters, and that can produce the scalar in the {opt fitstat()} option. -{cmd:domme} is quite flexible and can be applied to any built-in or user-written -{help program}. - -{pmore}The {it:full_estcmd} is the full estimation command, not including options following the -comma, as would be submitted to Stata. The {opt reg()} option has no default and the user is -required to provide a valid statistical model. - -{phang}{opt f:itstat(returned_scalar | built_in_options)} refers {cmd:domme} to ascalar valued -model fit summary statistic used to compute all dominance statistics. The scalar in -{opt fitstat()} can be any {help return:returned}, {help ereturn:ereturned}, or other -{help scalar:scalar} produced by the estimation command in {opt reg()}. - -{pmore}In addition to fit statistics produced by the estimation command in {opt reg()}, {cmd:domme} -also allows several built-in model fit statistics to be computed using the model log-likelihood and -degrees of freedom. Four fit statistics are available using the built-in options for {cmd:domme}. -These options are the McFadden pseudo-R squared ({res:mcf}), the Estrella pseudo-R squared -({res:est}), the Akaike information criterion ({res:aic}), and the Bayesian information criterion -({res:bic}). - -{pmore}To instruct {cmd:domme} to compute a built-in fit statistic, supply the {opt fitstat()} option -with an empty ereturned statistic indicator (i.e., {res:e()}) and provide the three character code -for the desired fit statistic. For example, to ask {cmd:domme} to compute McFadden's pseuod-R -square as a fit statistic, type {res:fitstat(e(), mcf)}. Note that {cmd:domme} has no default -and the user is required to provide a valid fit statistic. - -{phang}{opt sets([(eqname1_set1 = parmnamelist1_set1) ... (eqnameR_set1 = parmnamelistR_set1)] ... [ ... (eqnameR_setN = parmnamelistR_setN)])} -binds together parameter estimates as a set in the all possible combinations -ensemble. Hence, all parameter estimates in a set will always appear together and are considered a -single parameter estimate in the all possible combinations ensemble. - -{pmore}{opt sets()} are generated in a way similar to that of the initial statements to {cmd:domme} -in that a series of {res:(eqname = parmnamelist)} statements must be provided and then bound together -to produce a set. Any set of {res:(eqname = paramlist)} in a single set must be bound by brackets -"{res:[]}". For example, consider again the model {cmd:glm price mpg turn trunk foreign}. To -produce two sets of parameters, one that includes {it:mpg} and {it:turn} as well as a second that -includes {it:trunk} and {it:foreign}, the {opt sets()} type {res:sets( [(price = mpg turn)]} -{res:[(price = trunk foreign)] )}. - -{pmore}This above {opt sets()} statement is rather simple and refers to single equations within -a model. A single set can include parameters from multiple equations - in fact, doing so is -how independent variable dominance statistics can be computed in {cmd:domme} ... note re: independent variables ... - -{phang}{opt all((eqname1_all = parmnamelist1_all) ... (eqnameR_all = parmnamelistR_all))} -defines a set of parameter estimates to be included in all the combinations in the -ensemble. Thus, all parameter estimates included in the {opt all()} option are effectively used -as of covariates which are to be included in the model fit metric, but for which dominance -statistics will not be computed. Thus, the magnitude of the overall fit statistic associated -with the set of parameter in the {opt all()} option are subtracted from overall fit metric prior -to the computation of dominance statistics for all the parameter estimiates to be dominance -analyzed. ...note this is how to take them out of the constant... - -{pmore}The {opt all()} statements are set up in a way identical to that of the initial statments -in a {res:(eqname = parmnamelist)} format. - -{phang}{opt ropts(command_options)} supplies the command in {opt reg()} with any relevant estimation options. -Any options formally following the comma in standard Stata syntax, besides {opt constraints()}, -can be supplied to the statisical model this way. +{dlgtab:Model} + +{phang}{opt reg(full_estimation_command)} refers {cmd:domme} to a command that accepts +{help constraint}s, uses {help ml} to estimate parameters, and that can produce the +scalar-valued statistic referenced in the {opt fitstat()} option. {cmd:domme} can be +applied to any built-in or user-written {help program} that meets these criteria. + +{pmore}{it:full_estimation_command} is the full estimation command, not including +a comma or options following the comma, as would be submitted to Stata. The +{opt reg()} option has no default and the user is required to provide a +{cmd:domme}-compatible statistical model. + +{phang}{opt ropts(command_options)} supplies the command in {opt reg()} with any +relevant estimation options. Any options normally following the comma in standard +Stata syntax can be supplied to the statisical model this way. The only exception +to is the use of {opt constraints()}; {cmd:domme} cannot, at current, accept +constraints other than those it creates. + +{phang}{opt f:itstat(fitstat_opts)} the scalar-valued model fit summary statistic +used in the dominance analysis. There are two ways {cmd:domme} points to fit +statistics. + +{pmore}The first method is identical to {cmd:domin}'s approach. {cmd:domme} +accepts any {help return:returned}, {help ereturn:ereturned}, or other +{help scalar:scalar} produced by the estimation command in {opt reg()}. Note +that some Stata commands change their list of ereturn-ed results when +constraints are applied (e.g., {cmd:logit}, {cmd:poisson}). Ensure that the +command used produces the desired scalar with constraints. + +{pmore}The second method accommodates Stata commands' tendency to not return +pseudo-R-square values with constraints and expands which commands can get a +fit statistic using a built-in fit statistic computation. When {opt fitstat()} +is asked for an empty ereturned statistic indicator (i.e., {res:e()}) you must provide a +the three character code as an option to the {opt fitstat()}. Four fit statistic +options are available. These options are the McFadden pseudo-R squared ({res:mcf}), +the Estrella pseudo-R squared ({res:est}), the Akaike information criterion ({res:aic}), +and the Bayesian information criterion ({res:bic}). For example, to ask {cmd:domme} to +compute McFadden's pseudo-R square as a fit statistic, type {res:fitstat(e(), mcf)} +(See Example #1). + +{pmore}Note that {cmd:domme} has no default fit statistic and the user is +required to provide a fit statistic option. In addition, the built-in options +assume the command in {opt reg()} ereturn specific scalars. {res:mcf} only +requires {res: e(ll)}. {res:est} requires both {res: e(ll)} and {res: e(N)}. +{res:aic} requires {res: e(ll)} and {res: e(parm)}. +Finally, {res: bic} requires {res: e(ll)}, {res: e(parm)}, and {res: e(N)}. + +{phang}{opt sets([PEset_1] ... [PEset_x])} binds together parameter estimate +constraints as a set that are always constrained jointly and act as a +single parameter estimate. + +{pmore}Each {it:PEset} is put together in the same way as the initial statements in +that they are constructed from a series of {res:(eqname = parmnamelist)} statements. +All {it:PEset}s must be bound by brackets "{res:[]}". For example, consider again +the model {cmd:logit foreign price mpg turn trunk}. To produce two sets of parameters, +one that includes {it:price} and {it:mpg} as well as a second that includes {it:turn} +and {it:trunk}, the {opt sets()} type {res:sets( [(foreign = price mpg)]} +{res:[(foreign = turn trunk)] )}. + +{pmore}Note that a single set can include parameters from multiple equations +(see Example #6). + +{phang}{opt all(PEall)} defines a set of parameter estimate constraints that +are allowed to explain the fit metric with a higher priority than the parameter +estimates in the initial statements or the {opt sets()} option (see Example #3). +In effect, the parameter estimates defined in the {opt all()} option are used +like covariates. + +{pmore}The {it:PEall} statement is set up in a way similar to the {it:PE_set}s +in a {res:(eqname = parmnamelist)} format and can accept parameters from multiple +equations. + +{dlgtab:Reporting} {phang}{opt noconditional} suppresses the computation and display of of the conditional dominance -statistics. Suppressing the computation of the conditional dominance statistics can save -computation time when conditional dominance statistics are not desired. Suppressing the -computation of conditional dominance statistics also suppresses the -"strongest dominance designations" list. +statistics which can save computation time when conditional dominance statistics +are not desired. Suppressing the computation of conditional dominance statistics +also suppresses the "strongest dominance designations" list. -{phang}{opt nocomplete} suppresses the computation of the complete dominance designations. -Suppressing the computation of the complete dominancedesignations can save computation time -when complete dominance designations are not desired. Suppressing the computation of -complete dominance designations also suppresses the "strongest dominance designations" list. +{phang}{opt nocomplete} suppresses the computation of the complete dominance designations +which can save computation time when complete dominance designations are not desired. +Suppressing the computation of complete dominance designations also suppresses +the "strongest dominance designations" list. {phang}{opt reverse} reverses the interpretation of all dominance statistics in the -{cmd:e(ranking)} vector, {cmd:e(cptdom)} matrix, fixes the computation of the -{cmd:e(std)} vector, and the "strongest dominance designations" list. -{cmd:domin} assumes by default that higher values on overall fit statistics constitute -better fit, as dominance analysis has historically been based on the explained-variance R2 metric. -However, dominance analysis can be applied to any model fit statistic -(see Azen, Budescu, & Reiser, 2001 for other examples). {opt reverse} is then useful -for the interpetation of dominance statistics based on overall model fit statistics -that decrease with better fit (e.g., the built in AIC, BIC statistics). +{cmd:e(ranking)} vector, {cmd:e(cptdom)} matrix, and corrects the computation of the +{cmd:e(std)} vector as well as the "strongest dominance designations" list. +{cmd:domme} assumes by default that higher values on overall fit statistics constitute +better fit. {opt reverse} is useful for the interpetation of dominance statistics +based on overall model fit statistics that decrease with better fit (e.g., AIC, BIC). + +{marker sav}{...} +{title:4. Saved Results} + +{phang}{cmd:domme} saves the following results to {cmd: e()}: + +{synoptset 16 tabbed}{...} +{p2col 5 15 19 2: scalars}{p_end} +{synopt:{cmd:e(N)}}number of observations{p_end} +{synopt:{cmd:e(fitstat_o)}}overall fit statistic value{p_end} +{synopt:{cmd:e(fitstat_a)}}fit statistic value associated with variables in {opt all()}{p_end} +{synopt:{cmd:e(fitstat_c)}}fit statistic value computed by default when the constant model is non-zero{p_end} +{p2col 5 15 19 2: macros}{p_end} +{synopt:{cmd:e(cmdline)}}command as typed{p_end} +{synopt:{cmd:e(title)}}{cmd:Dominance analysis for multiple equations}{p_end} +{synopt:{cmd:e(cmd)}}{cmd:domme}{p_end} +{synopt:{cmd:e(fitstat)}}contents of the {opt fitstat()} option{p_end} +{synopt:{cmd:e(reg)}}contents of the {opt reg()} option{p_end} +{synopt:{cmd:e(ropts)}}contents of the {opt ropts()} option{p_end} +{synopt:{cmd:e(properties)}}{cmd:b}{p_end} +{synopt:{cmd:e(set{it:#})}}parameters included in {opt set(#)}{p_end} +{synopt:{cmd:e(all)}}parameters included in {opt all()}{p_end} +{p2col 5 15 19 2: matrices}{p_end} +{synopt:{cmd:e(b)}}general dominance statistics vector{p_end} +{synopt:{cmd:e(std)}}general dominance standardized statistics vector{p_end} +{synopt:{cmd:e(ranking)}}rank ordering based on general dominance statistics vector{p_end} +{synopt:{cmd:e(cdldom)}}conditional dominance statistics matrix{p_end} +{synopt:{cmd:e(cptdom)}}complete dominance designation matrix{p_end} +{p2col 5 15 19 2: functions}{p_end} +{synopt:{cmd:e(sample)}}marks estimation sample{p_end} -{marker remark}{...} -{title:4. Final Remarks} - -{pstd}Any parameter estimates in the model but not in either the initial syntax for -to dominance analyze, the {opt sets()} option, or the {opt all()} option are assumed to act as -a part of the model constant and are in some cases ignored in computing the model fit statistic. -Thus, any parameter estimate not included in some modeling statement will be treated like a -regression constant in most regression models; that is, as a baseline against which the -full model is compared in terms of the log likelihood. "Constant" parameters are omitted entirely -from fit statistic computations for the built in {res:mcf} and {res:est} options but are -reported as a part of the constant model fit statistic for the {res:aic} and {res:bic} -options in {opt fitstat()}. - -{pstd}When not using the built-in options, it is the responsibility of the user to supply -{cmd:domme} with an overall fit statistic that can be validly dominance analyzed. Non-R2 -overall fit statistics can be used however {cmd:domme} assumes that the fit statistic supplied -{it:acts} like an R2 statistic. Thus, {cmd:domin} assumes that better model fit is associated -with increases to the fit statistic and all marginal contributions can be obtained by subtraction. -For model fit statistics that decrease with better fit (i.e., AIC, BIC, deviance), the -interpretation of the dominance relationships need to be reversed (see Example #2). {marker examp}{...} {title:5. Examples} @@ -229,42 +297,54 @@ interpretation of the dominance relationships need to be reversed (see Example # {phang}Example 5: Generalized structural equation model with factor variables{p_end} {phang} {stata sysuse nlsw88, clear} {p_end} {phang} {stata gsem (wage <- union hours, regress) (south <- age ib1.race union, logit)} {p_end} -{phang} {stata domme (wage = union hours) (south = age union 2.race 3.race), reg(gsem (wage <- union hours, regress) (south <- age ib1.race union, logit)) fitstat(e(), mcf)}{p_end} +{phang} {stata domme (wage = union hours) (south = age union), reg(gsem (wage <- union hours, regress) (south <- age ib1.race union, logit)) fitstat(e(), mcf) sets([(south = 2.race 3.race)])}{p_end} {phang}Example 6: Generalized structural equation model with sets to evaluate independent variables{p_end} {phang} {stata gsem (south union <- wage tenure ttl_exp, logit)} {p_end} {phang} {stata domme, reg(gsem ( south smsa union <- wage tenure ttl_exp, logit)) fitstat(e(), mcf) sets( [(south = wage) (union = wage)] [(south = tenure) (union = tenure)] [(south = ttl_exp) (union = ttl_exp)]) } {p_end} -{marker sav}{...} -{title:6. Saved Results} +{phang}Examples 7: Replicating results from {cmd:domin}{p_end} +{pmore}7a: Logit model with factor varaible{p_end} +{pmore} {stata sysuse auto, clear} {p_end} +{pmore} {stata domin foreign price mpg turn trunk, reg(logit) fitstat(e(r2_p)) sets((i.rep78))} {p_end} +{pmore} {stata domme (foreign = price mpg turn trunk), reg(logit foreign price mpg turn trunk ib1.rep78) fitstat(e(), mcf) sets([(foreign = 3.rep78 4.rep78)])} {p_end} -{phang}{cmd:domme} saves the following results to {cmd: e()}: +{pmore}7b: Ordered logit model with covariate{p_end} +{pmore} {stata domin rep78 trunk weight length, reg(ologit) fitstat(e(r2_p)) all(turn)} {p_end} +{pmore} {stata domme (rep78 = trunk weight length), reg(ologit rep78 trunk weight length turn) fitstat(e(), mcf) all((rep78 = turn))} {p_end} + +{pmore}7c: Poisson regression with BIC fitstat and constant-only comparison using reverse{p_end} +{pmore} {stata domin price mpg rep78 headroom, reg(fitdom, fitstat_fd(r(S)[1,6]) reg_fd(poisson) postestimation(estat ic)) fitstat(e(fitstat)) consmodel reverse} {p_end} +{pmore} {stata domme (price = mpg rep78 headroom), reg(poisson price mpg rep78 headroom) fitstat(e(), bic) reverse} {p_end} + +{marker remark}{...} +{title:6. Final Remarks} + +{pstd}See {stata help domin:domin's help file} for an extensive discussion of the role of dominance +analysis as a postestimation method and caveats about its use. All these notes +and considerations apply to {cmd:domme} as well. + +{pstd}Any parameter estimates in the model's {opt reg()} specification but not in +the initial statements, the {opt sets()}, or {opt all()} are considered to be a +part of the to a constant-only model (see Examples #4 and #7c). When using {cmd:domme}'s +built-in fit statistics, parameter estimates in the constant-only model +will be used to compute the baseline model for the {res:mcf} and {res:est} +pseudo-R-squares but will be reported as a part of the constant model for the +{res:aic} and {res:bic}. Other fit statistics supplied to {cmd: domme} will, like +{res:aic} and {res:bic}, be reported as a part of the constant model. + +{pstd}Note that {cmd:domme} does not check to ensure that the parameters supplied +it are in the model and it is the user's responsibility to ensure that the +parameter estimates created in the initial statements, as well as those created by +{opt sets()} and {opt all()}, are valid parameters in the estimated model. +{cmd:domme} also attempts to clean-up parameter constraints it creates but +under certain circumstances, when {cmd:domme} fails to execute in a way that it does +not capture, parameter constraints will remain in memory. {cmd:domme} also will never +overwrite existing parameter estimate constraints and, if there are insufficient parameter +constraints in memory, {cmd:domme} will fail with an error noting insufficient free +constraints. Use {help constraint dir} to list all defined constraints in memory. -{synoptset 16 tabbed}{...} -{p2col 5 15 19 2: scalars}{p_end} -{synopt:{cmd:e(N)}}number of observations{p_end} -{synopt:{cmd:e(fitstat_o)}}overall fit statistic value{p_end} -{synopt:{cmd:e(fitstat_a)}}fit statistic value associated with variables in {opt all()}{p_end} -{synopt:{cmd:e(fitstat_c)}}fit statistic value computed by default when the constant model is non-zero{p_end} -{p2col 5 15 19 2: macros}{p_end} -{synopt:{cmd:e(cmdline)}}command as typed{p_end} -{synopt:{cmd:e(title)}}{cmd:Dominance analysis for multiple equations}{p_end} -{synopt:{cmd:e(cmd)}}{cmd:domme}{p_end} -{synopt:{cmd:e(fitstat)}}contents of the {opt fitstat()} option{p_end} -{synopt:{cmd:e(reg)}}contents of the {opt reg()} option{p_end} -{synopt:{cmd:e(ropts)}}contents of the {opt ropts()} option{p_end} -{synopt:{cmd:e(properties)}}{cmd:b}{p_end} -{synopt:{cmd:e(set{it:#})}}parameters included in {opt set(#)}{p_end} -{synopt:{cmd:e(all)}}parameters included in {opt all()}{p_end} -{p2col 5 15 19 2: matrices}{p_end} -{synopt:{cmd:e(b)}}general dominance statistics vector{p_end} -{synopt:{cmd:e(std)}}general dominance standardized statistics vector{p_end} -{synopt:{cmd:e(ranking)}}rank ordering based on general dominance statistics vector{p_end} -{synopt:{cmd:e(cdldom)}}conditional dominance statistics matrix{p_end} -{synopt:{cmd:e(cptdom)}}complete dominance designation matrix{p_end} -{p2col 5 15 19 2: functions}{p_end} -{synopt:{cmd:e(sample)}}marks estimation sample{p_end} {marker refs}{...} {title:7. References} @@ -272,10 +352,34 @@ interpretation of the dominance relationships need to be reversed (see Example # {p 4 8 2}Luchman, J. N., Lei, X., and Kaplan, S. A. (2020). Relative importance analysis with multivariate models: Shifting the focus from independent variables to parameter estimates. {it:Journal of Applied Structural Equation Modeling, 4(2)}, 40–59.{p_end} +{title:Development Webpage} + +{phang} Additional discussion of results, options, and conceptual issues on: + +{phang}{browse "http://github.com/jluchman/domme/blob/master/README.md"} + +{phang} Please report bugs, requests for features, and contribute to as well as follow on-going development of {cmd:domme} on: + +{phang}{browse "http://github.com/jluchman/domme"} + +{title:Article} + +Please cite as: + +{p 4 8 2}Luchman, J. N. (2021). Determining relative importance in Stata using dominance analysis: domin and domme. {it:The Stata Journal, 21(2)}, 510–538. https://doi.org/10.1177/1536867X211025837{p_end} + + {title:Author} {p 4}Joseph N. Luchman{p_end} -{p 4}Senior Scientist{p_end} -{p 4}Fors Marsh Group LLC{p_end} +{p 4}Principal Scientist{p_end} +{p 4}Fors Marsh{p_end} {p 4}Arlington, VA{p_end} -{p 4}jluchman@forsmarshgroup.com{p_end} +{p 4}jluchman@forsmarsh.com{p_end} + + +{title:See Also} + +{browse "https://CRAN.R-project.org/package=domir":R package domir}, {browse "https://cran.r-project.org/web/packages/domir/vignettes/domir_basics.html":Detailed description of Dominance Analysis} + +