diff --git a/NEWS.md b/NEWS.md index 54f6249e..480f0573 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ NEW FEATURES - make `gofstat()` work with `fitdistcens` objects (giving only AIC and BIC values). - add calculation of the hessian using `optimHess` within `fitdist` when it is not given by `optim`. - compute the asymptotic covariance matrix with MME : Now the theoretical moments `m` should be defined up to an order which equals to twice the maximal order given `order`. +- add a new argument `calcvcov` in order to (dis)able the computation of covariance matrix for any method. - graphics function `*comp()` now return a list of drawn points and/or lines when `plotstyle == "graphics"`. - add a `density` function for `bootdist(cens)` objects. - add DOIs in man pages. diff --git a/R/fitdist.R b/R/fitdist.R index a213ce4b..f4d1efee 100644 --- a/R/fitdist.R +++ b/R/fitdist.R @@ -22,8 +22,8 @@ ### R functions ### -fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), start=NULL, - fix.arg=NULL, discrete, keepdata = TRUE, keepdata.nb=100, ...) +fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), start = NULL, + fix.arg = NULL, discrete, keepdata = TRUE, keepdata.nb = 100, calcvcov = TRUE, ...) { #check argument distr if (!is.character(distr)) @@ -49,6 +49,8 @@ fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), stop("wrong argument 'discrete'.") if(!is.logical(keepdata) || !is.numeric(keepdata.nb) || keepdata.nb < 2) stop("wrong arguments 'keepdata' and 'keepdata.nb'") + if(!is.logical(calcvcov) || length(calcvcov) != 1) + stop("wrong argument 'calcvcov'.") #check argument method if(any(method == "mom")) warning("the name \"mom\" for matching moments is NO MORE used and is replaced by \"mme\"") @@ -104,11 +106,16 @@ fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), stop("moment matching estimation needs an 'order' argument") mme <- mmedist(data, distname, start=arg_startfix$start.arg, - fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, ...) + fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, + calcvcov=calcvcov, ...) varcovar <- mme$vcov - correl <- cov2cor(varcovar) - sd <- sqrt(diag(varcovar)) + if(!is.null(varcovar)) + { + correl <- cov2cor(varcovar) + sd <- sqrt(diag(varcovar)) + }else + correl <- sd <- NULL estimate <- mme$estimate loglik <- mme$loglik @@ -121,29 +128,20 @@ fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), }else if (method == "mle") { mle <- mledist(data, distname, start=arg_startfix$start.arg, - fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, ...) + fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, + calcvcov=calcvcov, ...) if (mle$convergence>0) stop("the function mle failed to estimate the parameters, with the error code ", mle$convergence, "\n") estimate <- mle$estimate - if(!is.null(mle$hessian)){ - #check for NA values and invertible Hessian - if(all(!is.na(mle$hessian)) && qr(mle$hessian)$rank == NCOL(mle$hessian)) - { - varcovar <- solve(mle$hessian) - sd <- sqrt(diag(varcovar)) - correl <- cov2cor(varcovar) - }else - { - varcovar <- NA - sd <- NA - correl <- NA - } - }else{ - varcovar <- NA - sd <- NA - correl <- NA - } + varcovar <- mle$vcov + if(!is.null(varcovar)) + { + correl <- cov2cor(varcovar) + sd <- sqrt(diag(varcovar)) + }else + correl <- sd <- NULL + loglik <- mle$loglik npar <- length(estimate) aic <- -2*loglik+2*npar @@ -157,15 +155,15 @@ fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), stop("quantile matching estimation needs an 'probs' argument") qme <- qmedist(data, distname, start=arg_startfix$start.arg, - fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, ...) + fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, + calcvcov=calcvcov, ...) estimate <- qme$estimate - sd <- NULL loglik <- qme$loglik npar <- length(estimate) aic <- -2*loglik+2*npar bic <- -2*loglik+log(n)*npar - correl <- varcovar <- NULL + sd <- correl <- varcovar <- NULL convergence <- qme$convergence fix.arg <- qme$fix.arg @@ -176,15 +174,15 @@ fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), warning("maximum GOF estimation has a default 'gof' argument set to 'CvM'") mge <- mgedist(data, distname, start=arg_startfix$start.arg, - fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, ...) + fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, + calcvcov=calcvcov, ...) estimate <- mge$estimate - sd <- NULL loglik <- mge$loglik npar <- length(estimate) aic <- -2*loglik+2*npar bic <- -2*loglik+log(n)*npar - correl <- varcovar <- NULL + sd <- correl <- varcovar <- NULL convergence <- mge$convergence fix.arg <- mge$fix.arg @@ -192,22 +190,22 @@ fitdist <- function (data, distr, method = c("mle", "mme", "qme", "mge", "mse"), }else if (method == "mse") { mse <- msedist(data, distname, start=arg_startfix$start.arg, - fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, ...) + fix.arg=arg_startfix$fix.arg, checkstartfix=TRUE, + calcvcov=calcvcov, ...) estimate <- mse$estimate - sd <- NULL loglik <- mse$loglik npar <- length(estimate) aic <- -2*loglik+2*npar bic <- -2*loglik+log(n)*npar - correl <- varcovar <- NULL + sd <- correl <- varcovar <- NULL convergence <- mse$convergence fix.arg <- mse$fix.arg weights <- mse$weights }else { - stop("match.arg() did not work correctly") + stop("match.arg() for 'method' did not work correctly") } #needed for bootstrap diff --git a/R/mgedist.R b/R/mgedist.R index 00ca0c69..207365c1 100644 --- a/R/mgedist.R +++ b/R/mgedist.R @@ -26,7 +26,7 @@ mgedist <- function (data, distr, gof = "CvM", start=NULL, fix.arg=NULL, optim.method="default", lower=-Inf, upper=Inf, custom.optim=NULL, silent=TRUE, gradient=NULL, - checkstartfix=FALSE, ...) + checkstartfix=FALSE, calcvcov=FALSE, ...) # data may correspond to a vector for non censored data or to # a dataframe of two columns named left and right for censored data { @@ -368,6 +368,5 @@ mgedist <- function (data, distr, gof = "CvM", start=NULL, fix.arg=NULL, optim.m counts=opt$counts, optim.message=opt$message, loglik=loglik(opt$par, fix.arg, data, ddistname), gof=gof) } - return(res) } diff --git a/R/mledist.R b/R/mledist.R index 64463e12..ef8f0f41 100644 --- a/R/mledist.R +++ b/R/mledist.R @@ -26,7 +26,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="default", lower=-Inf, upper=Inf, custom.optim=NULL, weights=NULL, silent=TRUE, gradient=NULL, - checkstartfix=FALSE, ...) + checkstartfix=FALSE, calcvcov=FALSE, ...) # data may correspond to a vector for non censored data or to # a dataframe of two columns named left and right for censored data { @@ -298,7 +298,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul #recompute Hessian if Hessian matrix has NA values if(!inherits(opttryerror, "try-error")) { - if(any(is.na(opt$hessian)) || is.null(opt$hessian)) + if(calcvcov && any(is.na(opt$hessian)) || is.null(opt$hessian)) { opthessian <- try(optimHess(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, ddistnam=ddistname), silent=TRUE) if(!inherits(opthessian, "try-error")) @@ -320,7 +320,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul #recompute Hessian if Hessian matrix has NA values if(!inherits(opttryerror, "try-error")) { - if(any(is.na(opt$hessian)) || is.null(opt$hessian)) + if(calcvcov && any(is.na(opt$hessian)) || is.null(opt$hessian)) { opthessian <- try(optimHess(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, ddistnam=ddistname), silent=TRUE) @@ -346,7 +346,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul #recompute Hessian if Hessian matrix has NA values if(!inherits(opttryerror, "try-error")) { - if(any(is.na(opt$hessian)) || is.null(opt$hessian)) + if(calcvcov && any(is.na(opt$hessian)) || is.null(opt$hessian)) { opthessian <- try(optimHess(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, ddistnam=ddistname), silent=TRUE) @@ -362,8 +362,9 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul warnings("The function optim encountered an error and stopped.") if(getOption("show.error.messages")) print(attr(opttryerror, "condition")) return(list(estimate = rep(NA, length(vstart)), convergence = 100, loglik = NA, - hessian = NA, optim.function=opt.fun, fix.arg = fix.arg, - optim.method=meth, fix.arg.fun = fix.arg.fun, counts=c(NA, NA))) + hessian = NA, optim.function = opt.fun, fix.arg = fix.arg, + optim.method=meth, fix.arg.fun = fix.arg.fun, counts=c(NA, NA), + vcov = NULL)) } if (opt$convergence>0) { @@ -372,10 +373,20 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul } if(is.null(names(opt$par))) names(opt$par) <- names(vstart) + if(calcvcov && !is.null(opt$hessian)) + { + #see R/util-mledist-vcov.R + varcovar <- mle.vcov(opt$hessian) + #add names + if(!is.null(varcovar)) + colnames(varcovar) <- rownames(varcovar) <- names(opt$par) + }else + varcovar <- NULL res <- list(estimate = opt$par, convergence = opt$convergence, value=opt$value, hessian = opt$hessian, optim.function=opt.fun, optim.method=meth, fix.arg = fix.arg, fix.arg.fun = fix.arg.fun, weights = weights, - counts=opt$counts, optim.message=opt$message, loglik = -opt$value) + counts=opt$counts, optim.message=opt$message, loglik = -opt$value, + vcov=varcovar) } else # Try to minimize the minus (log-)likelihood using a user-supplied optim function { @@ -384,7 +395,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul opttryerror <- try(opt <- custom.optim(fn=fnobj, fix.arg=fix.arg, obs=data, ddistnam=ddistname, par=vstart, ...), silent=TRUE) else - opttryerror <-try(opt<-custom.optim(fn=fnobjcens, fix.arg=fix.arg, rcens=rcens, + opttryerror <-try(opt <- custom.optim(fn=fnobjcens, fix.arg=fix.arg, rcens=rcens, lcens=lcens, icens=icens, ncens=ncens, ddistnam=ddistname, pdistnam=pdistname, par=vstart, ...), silent=TRUE) options(warn=owarn) @@ -404,12 +415,21 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul } if(is.null(names(opt$par))) names(opt$par) <- names(vstart) + if(calcvcov && !is.null(opt$hessian)) + { + varcovar <- mle.vcov(opt$hessian) + #add names + if(!is.null(varcovar)) + colnames(varcovar) <- rownames(varcovar) <- names(opt$par) + }else + varcovar <- NULL argdot <- list(...) method.cust <- argdot$method res <- list(estimate = opt$par, convergence = opt$convergence, value=opt$value, hessian = opt$hessian, optim.function = custom.optim, optim.method = method.cust, fix.arg = fix.arg, fix.arg.fun = fix.arg.fun, weights = weights, - counts=opt$counts, optim.message=opt$message, loglik = -opt$value) + counts=opt$counts, optim.message=opt$message, loglik = -opt$value, + vcov=varcovar) } return(res) @@ -418,5 +438,5 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul ## old function with previous name for censored data mledistcens <- function(censdata, distr, start=NULL, optim.method="default", lower=-Inf, upper=Inf) { - stop("The function \"mledistcens\" is no more used. Now the same function \"mledist\" must be used both for censored and non censored data.") + stop("The function \"mledistcens\" is deprecated. Now the same function \"mledist\" must be used both for censored and non censored data.") } diff --git a/R/mmedist.R b/R/mmedist.R index 8bb9108e..164a8389 100644 --- a/R/mmedist.R +++ b/R/mmedist.R @@ -23,542 +23,566 @@ ### mmedist <- function (data, distr, order, memp, start=NULL, fix.arg=NULL, - optim.method="default", lower=-Inf, upper=Inf, custom.optim=NULL, - weights=NULL, silent=TRUE, gradient=NULL, checkstartfix=FALSE, ...) + optim.method="default", lower=-Inf, upper=Inf, custom.optim=NULL, + weights=NULL, silent=TRUE, gradient=NULL, checkstartfix=FALSE, + calcvcov=FALSE, ...) { - if (!is.character(distr)) - stop("distr must be a character string naming a distribution") + if (!is.character(distr)) + stop("distr must be a character string naming a distribution") + else + distname <- distr + + if (is.element(distname, c("norm", "lnorm", "pois", "exp", "gamma", "nbinom", "geom", + "beta", "unif", "logis"))) + meth <- "closed formula" + else + meth <- optim.method + + mdistname <- paste("m", distname, sep="") + ddistname <- paste("d", distname, sep="") + argddistname <- names(formals(ddistname)) + + if(is.null(custom.optim)) + optim.method <- match.arg(optim.method, c("default", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN", "Brent")) + + if(!is.null(weights)) + { + if(any(weights < 0)) + stop("weights should be a vector of integers greater than 0") + if(!isallintw(weights)) + stop("weights should be a vector of (strictly) positive integers") + if(length(weights) != NROW(data)) + stop("weights should be a vector with a length equal to the observation number") + warning("weights are not taken into account in the default initial values") + } + + if(meth != "closed formula") + { + if (!exists(mdistname, mode="function")) + stop(paste0("The moment ", mdistname, " function must be defined.")) + # mdistname contains the good name of the theoretical moment function + } + if (!(is.numeric(data) & length(data)>1)) + stop("data must be a numeric vector of length greater than 1") + checkUncensoredNAInfNan(data) + + if(is.null(weights)) + { + loglik <- function(par, fix.arg, obs, ddistnam) + sum(log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) ) + }else + { + loglik <- function(par, fix.arg, obs, ddistnam) + sum(weights * log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) ) + } + + if(meth == "closed formula") + { + n <- length(data) + if(is.null(weights)) + { + m <- mean(data) + v <- (n - 1)/n*var(data) + }else #weighted version from util-wtdstat.R + { + m <- wtdmean(data, weights=weights) + v <- wtdvar(data, weights=weights) + } + + if (!is.null(fix.arg)) + stop("argument fix.arg cannot be used when a closed formula is used.") + # Fitting by matching moments + if (!(is.vector(data) & is.numeric(data) & length(data)>1)) + stop("data must be a numeric vector of length greater than 1") + if (distname == "norm") + { + estimate <- c(mean=m, sd=sqrt(v)) + mnorm <- function(order, mean, sd) + { + if(order == 1) + return(mean) + if(order == 2) + return(sd^2+mean^2) + if(order == 3) + return(3*mean*sd^2+mean^3) + if(order == 4) + return(3*sd^4+6*mean^2*sd^2+mean^4) + } + order <- 1:2 + } + if (distname == "lnorm") { + if (any(data <= 0)) + stop("values must be positive to fit a lognormal distribution") + sd2 <- log(1+v/m^2) + estimate <- c(meanlog=log(m) - sd2/2, sdlog=sqrt(sd2)) + mlnorm <- function(order, meanlog, sdlog) + { + exp(order*meanlog + order^2*sdlog^2/2) + } + order <- 1:2 + } + if (distname == "pois") { + estimate <- c(lambda=m) + mpois <- function(order, lambda) + { + ifelse(order == 1, lambda, lambda+lambda^2) + } + order <- 1 + } + if (distname == "exp") { + estimate <- c(rate=1/m) + mexp <- function(order, rate) + { + ifelse(order == 1, 1/rate, 2/rate^2) + } + order <- 1 + } + if (distname == "gamma" ) { + shape <- m^2/v + rate <- m/v + estimate <- c(shape=shape, rate=rate) + mgamma <- function(order, shape, rate) + { + res <- shape/rate + if(order == 1) + return(res) + res <- res * (shape+1)/rate + if(order == 2) + return(res) + res <- res * (shape+2)/rate + if(order == 3) + return(res) + res <- res * (shape+3)/rate + res + } + order <- 1:2 + } + if (distname == "nbinom" ) { + size <- if (v > m) m^2/(v - m) + else NaN + estimate <- c(size=size, mu=m) + mnbinom <- function(order, size, mu) + { + if(order == 1) + return(mu) + if(order == 2) + return(mu+mu^2/size+mu^2) + prob <- size/(size+mu) + if(order == 3) + { + res <- size*prob^2 + 3*prob*(1-prob)*(size) + (1-prob)^2*(size+size^2) + res <- res * (1-prob)/prob^3 + }else if(order == 4) + { + res <- size*prob^3 + 7*prob^2*(1-prob)*(size) + 6*prob*(1-prob)^2*(size+size^2) + (1-prob)^3*(2*size+3*size^2+size^3) + res <- res * (1-prob)/prob^4 + }else + stop("not implemented") + res + } + order <- 1:2 + } + if (distname == "geom" ) { + prob <- if (m>0) 1/(1+m) + else NaN + estimate <- c(prob=prob) + mcumulgeom <- function(order, prob) #E[N(N-1)..(N-j+1)] = (1-p)^j j! / p^j + { + (1-prob)^order * factorial(order) / prob^order + } + mgeom <- function(order, prob) + { + if(order == 0) + return(1) + m1 <- mcumulgeom(1, prob) + if(order == 1) + return(m1) + m2 <- mcumulgeom(2, prob) + m1 + if(order == 2) + return(m2) + m3 <- mcumulgeom(3, prob) + 3*m2 + 2*m1 + if(order == 3) + return(m3) + m4 <- mcumulgeom(4, prob) + 6*m3 + 11*m2 - 6*m1 + if(order == 4) + return(m4) + else + stop("not yet implemented") + } + order <- 1 + } + if (distname == "beta" ) { + if (any(data < 0) | any(data > 1)) + stop("values must be in [0-1] to fit a beta distribution") + aux <- m*(1-m)/v - 1 + shape1 <- m*aux + shape2 <- (1-m)*aux + estimate <- c(shape1=shape1, shape2=shape2) + mbeta <- function(order, shape1, shape2) + { + stot <- shape1+shape2 + ifelse(order == 1, shape1/stot, + (shape1^3+2*shape1*shape2+shape1^2)/stot^2/(stot+1)) + } + order <- 1:2 + } + if (distname == "unif" ) { + min1 <- m-sqrt(3*v) + max1 <- m+sqrt(3*v) + estimate <- c(min1,max1) + munif <- function(order, min, max) + { + ifelse(order == 1, (min+max)/2, (max^2 + min^2 + max*min)/3) + } + order <- 1:2 + } + if (distname == "logis" ) { + scale <- sqrt(3*v)/pi + estimate <- c(location=m, scale=scale) + mlogis <- function(order, location, scale) + { + ifelse(order == 1, location, scale^2*pi^2/3 + location^2) + } + order <- 1:2 + } + if (exists(ddistname)) + loglikval <- loglik(estimate, fix.arg, data, ddistname) else - distname <- distr + loglikval <- NULL + + #create memp() for closed formula solution + if(meth == "closed formula") + { + if(is.null(weights)) + memp <- function(x, order, weights) mean(x^order) + else + memp <- function(x, order, weights) sum(x^order * weights)/sum(weights) + } + #compute asymptotic covariance matrix proposed by + #I. Ibragimov and R. Has'minskii. Statistical Estimation - Asymptotic Theory. Springer-Verlag, 1981, p368 + #see R/util-mmedist-vcov.R + if(calcvcov) + { + #manually look for the appropriate theoretical raw moment function + if (distname == "norm") + { + varcovar <- mme.vcov(estimate, NULL, order, data, mnorm, memp, weights) + }else if (distname == "lnorm") + { + varcovar <- mme.vcov(estimate, NULL, order, data, mlnorm, memp, weights) + }else if (distname == "pois") + { + varcovar <- mme.vcov(estimate, NULL, order, data, mpois, memp, weights) + }else if (distname == "exp") + { + varcovar <- mme.vcov(estimate, NULL, order, data, mexp, memp, weights) + }else if (distname == "gamma" ) + { + varcovar <- mme.vcov(estimate, NULL, order, data, mgamma, memp, weights) + }else if (distname == "nbinom" ) + { + varcovar <- mme.vcov(estimate, NULL, order, data, mnbinom, memp, weights) + }else if (distname == "geom" ) + { + varcovar <- mme.vcov(estimate, NULL, order, data, mgeom, memp, weights) + }else if (distname == "beta" ) + { + varcovar <- mme.vcov(estimate, NULL, order, data, mbeta, memp, weights) + }else if (distname == "unif" ) + { + varcovar <- mme.vcov(estimate, NULL, order, data, munif, memp, weights) + }else if (distname == "logis" ) + { + varcovar <- mme.vcov(estimate, NULL, order, data, mlogis, memp, weights) + }else + varcovar <- NULL + }else + varcovar <- NULL + #add names + if(!is.null(varcovar)) + colnames(varcovar) <- rownames(varcovar) <- names(estimate) + res <- list(estimate=estimate, convergence=0, value=NULL, hessian=NULL, + optim.function=NULL, opt.meth=NULL, fix.arg=NULL, fix.arg.fun=NULL, + weights=weights, counts=NULL, optim.message=NULL, + loglik= loglikval, method=meth, order=order, memp=NULL, + vcov=varcovar) + }else #an optimimisation has to be done, where fix.arg and start can be a function + { + if(is.vector(start)) #backward compatibility + start <- as.list(start) + + if(!checkstartfix) #pre-check has not been done by fitdist() or bootdist() + { + #cat("checkstartfix is carried out\n") + # manage starting/fixed values: may raise errors or return two named list + arg_startfix <- manageparam(start.arg=start, fix.arg=fix.arg, obs=data, + distname=distname) + + #check inconsistent parameters + hasnodefaultval <- sapply(formals(ddistname), is.name) + arg_startfix <- checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, + argddistname, hasnodefaultval) + #arg_startfix contains two names list (no longer NULL nor function) + + #set fix.arg.fun + if(is.function(fix.arg)) + fix.arg.fun <- fix.arg + else + fix.arg.fun <- NULL + }else #pre-check has been done by fitdist() or bootdist() + { + arg_startfix <- list(start.arg=start, fix.arg=fix.arg) + fix.arg.fun <- NULL + } - if (is.element(distname, c("norm", "lnorm", "pois", "exp", "gamma", "nbinom", "geom", - "beta", "unif", "logis"))) - meth <- "closed formula" - else - meth <- optim.method + #unlist starting values as needed in optim() + vstart <- unlist(arg_startfix$start.arg) + #sanity check + if(is.null(vstart)) + stop("Starting values could not be NULL with checkstartfix=TRUE") - mdistname <- paste("m", distname, sep="") - ddistname <- paste("d", distname, sep="") - argddistname <- names(formals(ddistname)) + #erase user value + #(cannot coerce to vector as there might be different modes: numeric, character...) + fix.arg <- arg_startfix$fix.arg + + if(length(vstart) != length(order)) + stop("wrong dimension for the moment order to match") + if(missing(memp)) + stop("the empirical moment function must be defined") + #backward compatibility when memp is the name of the function and not the function itself + if(is.character(memp)) + memp <- get0(memp, envir=pos.to.env(1)) - if(is.null(custom.optim)) - optim.method <- match.arg(optim.method, c("default", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN", "Brent")) - if(!is.null(weights)) + #check the memp function + if(!is.function(memp)) + stop("the empirical moment must be defined as a function") + if(is.null(weights)) { - if(any(weights < 0)) - stop("weights should be a vector of integers greater than 0") - if(!isallintw(weights)) - stop("weights should be a vector of (strictly) positive integers") - if(length(weights) != NROW(data)) - stop("weights should be a vector with a length equal to the observation number") - warning("weights are not taken into account in the default initial values") - } - - if(meth != "closed formula") + txt <- "the empirical moment function must be a two-argument function of 'x', 'order'" + if(length(formals(memp)) != 2) + stop(txt) + if(any(names(formals(memp)) != c("x", "order"))) + stop(txt) + }else { - if (!exists(mdistname, mode="function")) - stop(paste0("The moment ", mdistname, " function must be defined.")) - # mdistname contains the good name of the theoretical moment function + txt <- "the empirical moment function must be a three-argument function of 'x', 'order', 'weights'" + if(length(formals(memp)) != 3) + stop(txt) + if(any(names(formals(memp)) != c("x", "order", "weights"))) + stop(txt) } - if (!(is.numeric(data) & length(data)>1)) - stop("data must be a numeric vector of length greater than 1") - checkUncensoredNAInfNan(data) + + ############# MME fit using optim or custom.optim ########## + + # definition of the function to minimize : least square (Cramer - von Mises type) if(is.null(weights)) - { - loglik <- function(par, fix.arg, obs, ddistnam) - sum(log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) ) + { + DIFF2 <- function(par, fix.arg, order, obs, mdistnam, memp, weights) + { + momtheo <- do.call(mdistnam, c(as.list(order), as.list(par), as.list(fix.arg)) ) + momemp <- as.numeric(memp(obs, order)) + (momemp - momtheo)^2 + } + fnobj <- function(par, fix.arg, obs, mdistnam, memp, weights) + sum( sapply(order, function(o) DIFF2(par, fix.arg, o, obs, mdistnam, memp)) ) }else { - loglik <- function(par, fix.arg, obs, ddistnam) - sum(weights * log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) ) + DIFF2 <- function(par, fix.arg, order, obs, mdistnam, memp, weights) + { + momtheo <- do.call(mdistnam, c(as.list(order), as.list(par), as.list(fix.arg)) ) + momemp <- as.numeric(memp(obs, order, weights)) + (momemp - momtheo)^2 + } + fnobj <- function(par, fix.arg, obs, mdistnam, memp, weights) + sum( sapply(order, function(o) DIFF2(par, fix.arg, o, obs, mdistnam, memp, weights)) ) } - if(meth == "closed formula") + cens <- FALSE + if(cens) + stop("Moment matching estimation for censored data is not yet available.") + + + owarn <- getOption("warn") + # Try to minimize the stat distance using the base R optim function + if(is.null(custom.optim)) { - n <- length(data) - if(is.null(weights)) + hasbound <- any(is.finite(lower) | is.finite(upper)) + + # Choice of the optimization method + if (optim.method == "default") { - m <- mean(data) - v <- (n - 1)/n*var(data) - }else #weighted version from util-wtdstat.R + opt.meth <- ifelse(length(vstart) > 1, "Nelder-Mead", "BFGS") + }else + opt.meth <- optim.method + + if(opt.meth == "BFGS" && hasbound && is.null(gradient)) { - m <- wtdmean(data, weights=weights) - v <- wtdvar(data, weights=weights) + opt.meth <- "L-BFGS-B" + txt1 <- "The BFGS method cannot be used with bounds without provided the gradient." + txt2 <- "The method is changed to L-BFGS-B." + warning(paste(txt1, txt2)) } - - if (!is.null(fix.arg)) - stop("argument fix.arg cannot be used when a closed formula is used.") - # Fitting by matching moments - if (!(is.vector(data) & is.numeric(data) & length(data)>1)) - stop("data must be a numeric vector of length greater than 1") - if (distname == "norm") - { - estimate <- c(mean=m, sd=sqrt(v)) - mnorm <- function(order, mean, sd) - { - if(order == 1) - return(mean) - if(order == 2) - return(sd^2+mean^2) - if(order == 3) - return(3*mean*sd^2+mean^3) - if(order == 4) - return(3*sd^4+6*mean^2*sd^2+mean^4) - } - order <- 1:2 - } - if (distname == "lnorm") { - if (any(data <= 0)) - stop("values must be positive to fit a lognormal distribution") - sd2 <- log(1+v/m^2) - estimate <- c(meanlog=log(m) - sd2/2, sdlog=sqrt(sd2)) - mlnorm <- function(order, meanlog, sdlog) - { - exp(order*meanlog + order^2*sdlog^2/2) - } - order <- 1:2 - } - if (distname == "pois") { - estimate <- c(lambda=m) - mpois <- function(order, lambda) - { - ifelse(order == 1, lambda, lambda+lambda^2) - } - order <- 1 - } - if (distname == "exp") { - estimate <- c(rate=1/m) - mexp <- function(order, rate) - { - ifelse(order == 1, 1/rate, 2/rate^2) - } - order <- 1 - } - if (distname == "gamma" ) { - shape <- m^2/v - rate <- m/v - estimate <- c(shape=shape, rate=rate) - mgamma <- function(order, shape, rate) - { - res <- shape/rate - if(order == 1) - return(res) - res <- res * (shape+1)/rate - if(order == 2) - return(res) - res <- res * (shape+2)/rate - if(order == 3) - return(res) - res <- res * (shape+3)/rate - res - } - order <- 1:2 - } - if (distname == "nbinom" ) { - size <- if (v > m) m^2/(v - m) - else NaN - estimate <- c(size=size, mu=m) - mnbinom <- function(order, size, mu) - { - if(order == 1) - return(mu) - if(order == 2) - return(mu+mu^2/size+mu^2) - prob <- size/(size+mu) - if(order == 3) - { - res <- size*prob^2 + 3*prob*(1-prob)*(size) + (1-prob)^2*(size+size^2) - res <- res * (1-prob)/prob^3 - }else if(order == 4) - { - res <- size*prob^3 + 7*prob^2*(1-prob)*(size) + 6*prob*(1-prob)^2*(size+size^2) + (1-prob)^3*(2*size+3*size^2+size^3) - res <- res * (1-prob)/prob^4 - }else - stop("not implemented") - res - } - order <- 1:2 - } - if (distname == "geom" ) { - prob <- if (m>0) 1/(1+m) - else NaN - estimate <- c(prob=prob) - mcumulgeom <- function(order, prob) #E[N(N-1)..(N-j+1)] = (1-p)^j j! / p^j - { - (1-prob)^order * factorial(order) / prob^order - } - mgeom <- function(order, prob) - { - if(order == 0) - return(1) - m1 <- mcumulgeom(1, prob) - if(order == 1) - return(m1) - m2 <- mcumulgeom(2, prob) + m1 - if(order == 2) - return(m2) - m3 <- mcumulgeom(3, prob) + 3*m2 + 2*m1 - if(order == 3) - return(m3) - m4 <- mcumulgeom(4, prob) + 6*m3 + 11*m2 - 6*m1 - if(order == 4) - return(m4) - else - stop("not yet implemented") - } - order <- 1 - } - if (distname == "beta" ) { - if (any(data < 0) | any(data > 1)) - stop("values must be in [0-1] to fit a beta distribution") - aux <- m*(1-m)/v - 1 - shape1 <- m*aux - shape2 <- (1-m)*aux - estimate <- c(shape1=shape1, shape2=shape2) - mbeta <- function(order, shape1, shape2) - { - stot <- shape1+shape2 - ifelse(order == 1, shape1/stot, - (shape1^3+2*shape1*shape2+shape1^2)/stot^2/(stot+1)) - } - order <- 1:2 - } - if (distname == "unif" ) { - min1 <- m-sqrt(3*v) - max1 <- m+sqrt(3*v) - estimate <- c(min1,max1) - munif <- function(order, min, max) - { - ifelse(order == 1, (min+max)/2, (max^2 + min^2 + max*min)/3) - } - order <- 1:2 - } - if (distname == "logis" ) { - scale <- sqrt(3*v)/pi - estimate <- c(location=m, scale=scale) - mlogis <- function(order, location, scale) - { - ifelse(order == 1, location, scale^2*pi^2/3 + location^2) - } - order <- 1:2 - } - if (exists(ddistname)) - loglikval <- loglik(estimate, fix.arg, data, ddistname) - else - loglikval <- NULL - res <- list(estimate=estimate, convergence=0, value=NULL, hessian=NULL, - optim.function=NULL, opt.meth=NULL, fix.arg=NULL, fix.arg.fun=NULL, - weights=weights, counts=NULL, optim.message=NULL, - loglik= loglikval, method=meth, order=order, memp=NULL) - - #create memp() for closed formula solution - if(res$method == "closed formula") - { - if(is.null(weights)) - memp <- function(x, order, weights) mean(x^order) - else - memp <- function(x, order, weights) sum(x^order * weights)/sum(weights) - } - #compute asymptotic covariance matrix proposed by - #I. Ibragimov and R. Has'minskii. Statistical Estimation - Asymptotic Theory. Springer-Verlag, 1981, p368 - #see R/util-mmedist-vcov.R - - #manually look for the appropriate theoretical raw moment function - if (distname == "norm") - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mnorm, memp, weights) - }else if (distname == "lnorm") - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mlnorm, memp, weights) - }else if (distname == "pois") - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mpois, memp, weights) - }else if (distname == "exp") - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mexp, memp, weights) - }else if (distname == "gamma" ) - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mgamma, memp, weights) - }else if (distname == "nbinom" ) - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mnbinom, memp, weights) - }else if (distname == "geom" ) - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mgeom, memp, weights) - }else if (distname == "beta" ) - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mbeta, memp, weights) - }else if (distname == "unif" ) - { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, munif, memp, weights) - }else if (distname == "logis" ) + + options(warn=ifelse(silent, -1, 0)) + #select optim or constrOptim + if(hasbound) #finite bounds are provided + { + if(!is.null(gradient)) { - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mlogis, memp, weights) - }else - res$vcov <- NULL - - }else #an optimimisation has to be done, where fix.arg and start can be a function - { - if(is.vector(start)) #backward compatibility - start <- as.list(start) - - if(!checkstartfix) #pre-check has not been done by fitdist() or bootdist() + opt.fun <- "constrOptim" + }else #gradient == NULL { - #cat("checkstartfix is carried out\n") - # manage starting/fixed values: may raise errors or return two named list - arg_startfix <- manageparam(start.arg=start, fix.arg=fix.arg, obs=data, - distname=distname) - - #check inconsistent parameters - hasnodefaultval <- sapply(formals(ddistname), is.name) - arg_startfix <- checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, - argddistname, hasnodefaultval) - #arg_startfix contains two names list (no longer NULL nor function) - - #set fix.arg.fun - if(is.function(fix.arg)) - fix.arg.fun <- fix.arg + if(opt.meth == "Nelder-Mead") + opt.fun <- "constrOptim" + else if(opt.meth %in% c("L-BFGS-B", "Brent")) + opt.fun <- "optim" else - fix.arg.fun <- NULL - }else #pre-check has been done by fitdist() or bootdist() - { - arg_startfix <- list(start.arg=start, fix.arg=fix.arg) - fix.arg.fun <- NULL - } - - #unlist starting values as needed in optim() - vstart <- unlist(arg_startfix$start.arg) - #sanity check - if(is.null(vstart)) - stop("Starting values could not be NULL with checkstartfix=TRUE") - - #erase user value - #(cannot coerce to vector as there might be different modes: numeric, character...) - fix.arg <- arg_startfix$fix.arg - - if(length(vstart) != length(order)) - stop("wrong dimension for the moment order to match") - if(missing(memp)) - stop("the empirical moment function must be defined") - #backward compatibility when memp is the name of the function and not the function itself - if(is.character(memp)) - memp <- get0(memp, envir=pos.to.env(1)) - - - #check the memp function - if(!is.function(memp)) - stop("the empirical moment must be defined as a function") - if(is.null(weights)) - { - txt <- "the empirical moment function must be a two-argument function of 'x', 'order'" - if(length(formals(memp)) != 2) - stop(txt) - if(any(names(formals(memp)) != c("x", "order"))) - stop(txt) - }else - { - txt <- "the empirical moment function must be a three-argument function of 'x', 'order', 'weights'" - if(length(formals(memp)) != 3) - stop(txt) - if(any(names(formals(memp)) != c("x", "order", "weights"))) - stop(txt) - } - - - ############# MME fit using optim or custom.optim ########## - - # definition of the function to minimize : least square (Cramer - von Mises type) - if(is.null(weights)) - { - DIFF2 <- function(par, fix.arg, order, obs, mdistnam, memp, weights) - { - momtheo <- do.call(mdistnam, c(as.list(order), as.list(par), as.list(fix.arg)) ) - momemp <- as.numeric(memp(obs, order)) - (momemp - momtheo)^2 - } - fnobj <- function(par, fix.arg, obs, mdistnam, memp, weights) - sum( sapply(order, function(o) DIFF2(par, fix.arg, o, obs, mdistnam, memp)) ) - }else - { - DIFF2 <- function(par, fix.arg, order, obs, mdistnam, memp, weights) { - momtheo <- do.call(mdistnam, c(as.list(order), as.list(par), as.list(fix.arg)) ) - momemp <- as.numeric(memp(obs, order, weights)) - (momemp - momtheo)^2 + txt1 <- paste("The method", opt.meth, "cannot be used by constrOptim() nor optim() without gradient and bounds.") + txt2 <- "Only optimization methods L-BFGS-B, Brent and Nelder-Mead can be used in such case." + stop(paste(txt1, txt2)) } - fnobj <- function(par, fix.arg, obs, mdistnam, memp, weights) - sum( sapply(order, function(o) DIFF2(par, fix.arg, o, obs, mdistnam, memp, weights)) ) } - - cens <- FALSE - if(cens) - stop("Moment matching estimation for censored data is not yet available.") - - - owarn <- getOption("warn") - # Try to minimize the stat distance using the base R optim function - if(is.null(custom.optim)) + if(opt.fun == "constrOptim") { - hasbound <- any(is.finite(lower) | is.finite(upper)) + #recycle parameters + npar <- length(vstart) #as in optim() line 34 + lower <- as.double(rep_len(lower, npar)) #as in optim() line 64 + upper <- as.double(rep_len(upper, npar)) - # Choice of the optimization method - if (optim.method == "default") - { - opt.meth <- ifelse(length(vstart) > 1, "Nelder-Mead", "BFGS") - }else - opt.meth <- optim.method + # constraints are : Mat %*% theta >= Bnd, i.e. + # +1 * theta[i] >= lower[i]; + # -1 * theta[i] >= -upper[i] - if(opt.meth == "BFGS" && hasbound && is.null(gradient)) - { - opt.meth <- "L-BFGS-B" - txt1 <- "The BFGS method cannot be used with bounds without provided the gradient." - txt2 <- "The method is changed to L-BFGS-B." - warning(paste(txt1, txt2)) - } + #select rows from the identity matrix + haslow <- is.finite(lower) + Mat <- diag(npar)[haslow, ] + #select rows from the opposite of the identity matrix + hasupp <- is.finite(upper) + Mat <- rbind(Mat, -diag(npar)[hasupp, ]) + colnames(Mat) <- names(vstart) + rownames(Mat) <- paste0("constr", 1:NROW(Mat)) - options(warn=ifelse(silent, -1, 0)) - #select optim or constrOptim - if(hasbound) #finite bounds are provided - { - if(!is.null(gradient)) - { - opt.fun <- "constrOptim" - }else #gradient == NULL - { - if(opt.meth == "Nelder-Mead") - opt.fun <- "constrOptim" - else if(opt.meth %in% c("L-BFGS-B", "Brent")) - opt.fun <- "optim" - else - { - txt1 <- paste("The method", opt.meth, "cannot be used by constrOptim() nor optim() without gradient and bounds.") - txt2 <- "Only optimization methods L-BFGS-B, Brent and Nelder-Mead can be used in such case." - stop(paste(txt1, txt2)) - } - } - if(opt.fun == "constrOptim") - { - #recycle parameters - npar <- length(vstart) #as in optim() line 34 - lower <- as.double(rep_len(lower, npar)) #as in optim() line 64 - upper <- as.double(rep_len(upper, npar)) - - # constraints are : Mat %*% theta >= Bnd, i.e. - # +1 * theta[i] >= lower[i]; - # -1 * theta[i] >= -upper[i] - - #select rows from the identity matrix - haslow <- is.finite(lower) - Mat <- diag(npar)[haslow, ] - #select rows from the opposite of the identity matrix - hasupp <- is.finite(upper) - Mat <- rbind(Mat, -diag(npar)[hasupp, ]) - colnames(Mat) <- names(vstart) - rownames(Mat) <- paste0("constr", 1:NROW(Mat)) - - #select the bounds - Bnd <- c(lower[is.finite(lower)], -upper[is.finite(upper)]) - names(Bnd) <- paste0("constr", 1:length(Bnd)) - - initconstr <- Mat %*% vstart - Bnd - if(any(initconstr < 0)) - stop("Starting values must be in the feasible region.") - - opttryerror <- try(opt <- constrOptim(theta=vstart, f=fnobj, ui=Mat, ci=Bnd, grad=gradient, - fix.arg=fix.arg, obs=data, mdistnam=mdistname, memp=memp, - hessian=!is.null(gradient), method=opt.meth, weights=weights, - ...), silent=TRUE) - if(!inherits(opttryerror, "try-error")) - if(length(opt$counts) == 1) #appears when the initial point is a solution - opt$counts <- c(opt$counts, NA) - - }else #opt.fun == "optim" - { - opttryerror <- try(opt <- optim(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, - mdistnam=mdistname, memp=memp, hessian=TRUE, method=opt.meth, - lower=lower, upper=upper, weights=weights, ...), silent=TRUE) - } - - }else #hasbound == FALSE - { - opt.fun <- "optim" - opttryerror <- try(opt <- optim(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, - mdistnam=mdistname, memp=memp, hessian=TRUE, method=opt.meth, lower=lower, - upper=upper, ...), silent=TRUE) - } - options(warn=owarn) - - if (inherits(opttryerror,"try-error")) - { - warnings("The function optim encountered an error and stopped.") - if(getOption("show.error.messages")) print(attr(opttryerror, "condition")) - return(list(estimate = rep(NA,length(vstart)), convergence = 100, value = NA, - hessian = NA)) - } - - if (opt$convergence>0) { - warnings("The function optim failed to converge, with the error code ", - opt$convergence) - } - if(is.null(names(opt$par))) - names(opt$par) <- names(vstart) - res <- list(estimate = opt$par, convergence = opt$convergence, value = opt$value, - hessian = opt$hessian, optim.function=opt.fun, optim.method=opt.meth, - fix.arg=fix.arg, fix.arg.fun=fix.arg.fun, weights=weights, - counts=opt$counts, optim.message=opt$message, - loglik=ifelse(exists(ddistname), loglik(opt$par, fix.arg, data, ddistname), NULL), - method=meth, order=order, memp=memp) - - }else # Try to minimize the stat distance using a user-supplied optim function + #select the bounds + Bnd <- c(lower[is.finite(lower)], -upper[is.finite(upper)]) + names(Bnd) <- paste0("constr", 1:length(Bnd)) + + initconstr <- Mat %*% vstart - Bnd + if(any(initconstr < 0)) + stop("Starting values must be in the feasible region.") + + opttryerror <- try(opt <- constrOptim(theta=vstart, f=fnobj, ui=Mat, ci=Bnd, grad=gradient, + fix.arg=fix.arg, obs=data, mdistnam=mdistname, memp=memp, + hessian=!is.null(gradient), method=opt.meth, weights=weights, + ...), silent=TRUE) + if(!inherits(opttryerror, "try-error")) + if(length(opt$counts) == 1) #appears when the initial point is a solution + opt$counts <- c(opt$counts, NA) + + }else #opt.fun == "optim" { - opt.meth <- NULL - if (!cens) - { - options(warn=ifelse(silent, -1, 0)) - opttryerror <- try(opt <- custom.optim(fn=fnobj, fix.arg=fix.arg, obs=data, mdistnam=mdistname, - memp=memp, par=vstart, weights=weights, ...), silent=TRUE) - options(warn=owarn) - }else - stop("Moment matching estimation for censored data is not yet available.") - - if (inherits(opttryerror,"try-error")) - { - warnings("The customized optimization function encountered an error and stopped.") - if(getOption("show.error.messages")) print(attr(opttryerror, "condition")) - return(list(estimate = rep(NA,length(vstart)), convergence = 100, value = NA, - hessian = NA)) - } - - if (opt$convergence>0) { - warnings("The customized optimization function failed to converge, with the error code ", - opt$convergence) - } - if(is.null(names(opt$par))) - names(opt$par) <- names(vstart) - argdot <- list(...) - method.cust <- argdot$method - res <- list(estimate = opt$par, convergence = opt$convergence, value = opt$value, - hessian = opt$hessian, optim.function=custom.optim, optim.method=method.cust, - fix.arg=fix.arg, fix.arg.fun=fix.arg.fun, weights=weights, - counts=opt$counts, optim.message=opt$message, - loglik=ifelse(exists(ddistname), loglik(opt$par, fix.arg, data, ddistname), NULL), - method=meth, order=order, memp=memp) - } - + opttryerror <- try(opt <- optim(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, + mdistnam=mdistname, memp=memp, hessian=TRUE, method=opt.meth, + lower=lower, upper=upper, weights=weights, ...), silent=TRUE) + } - #compute asymptotic covariance matrix proposed by - #I. Ibragimov and R. Has'minskii. Statistical Estimation - Asymptotic Theory. Springer-Verlag, 1981, p368 - #see R/util-mmedist-vcov.R - res$vcov <- mme.vcov(res$estimate, res$fix.arg, res$order, data, mdistname, memp, weights) - } + }else #hasbound == FALSE + { + opt.fun <- "optim" + opttryerror <- try(opt <- optim(par=vstart, fn=fnobj, fix.arg=fix.arg, obs=data, gr=gradient, + mdistnam=mdistname, memp=memp, hessian=TRUE, method=opt.meth, lower=lower, + upper=upper, ...), silent=TRUE) + } + options(warn=owarn) + + if (inherits(opttryerror,"try-error")) + { + warnings("The function optim encountered an error and stopped.") + if(getOption("show.error.messages")) print(attr(opttryerror, "condition")) + return(list(estimate = rep(NA,length(vstart)), convergence = 100, value = NA, + hessian = NA)) + } + + if (opt$convergence>0) { + warnings("The function optim failed to converge, with the error code ", + opt$convergence) + } + if(is.null(names(opt$par))) + names(opt$par) <- names(vstart) + #compute asymptotic covariance matrix proposed by + #I. Ibragimov and R. Has'minskii. Statistical Estimation - Asymptotic Theory. Springer-Verlag, 1981, p368 + #see R/util-mmedist-vcov.R + if(calcvcov) + { + varcovar <- mme.vcov(opt$par, fix.arg, order, data, mdistname, memp, weights) + #add names + if(!is.null(varcovar)) + colnames(varcovar) <- rownames(varcovar) <- names(opt$par) + }else + varcovar <- NULL + + res <- list(estimate = opt$par, convergence = opt$convergence, value = opt$value, + hessian = opt$hessian, optim.function=opt.fun, optim.method=opt.meth, + fix.arg=fix.arg, fix.arg.fun=fix.arg.fun, weights=weights, + counts=opt$counts, optim.message=opt$message, + loglik=ifelse(exists(ddistname), loglik(opt$par, fix.arg, data, ddistname), NULL), + method=meth, order=order, memp=memp, vcov=varcovar) + + }else # Try to minimize the stat distance using a user-supplied optim function + { + opt.meth <- NULL + if (!cens) + { + options(warn=ifelse(silent, -1, 0)) + opttryerror <- try(opt <- custom.optim(fn=fnobj, fix.arg=fix.arg, obs=data, mdistnam=mdistname, + memp=memp, par=vstart, weights=weights, ...), silent=TRUE) + options(warn=owarn) + }else + stop("Moment matching estimation for censored data is not yet available.") + + if (inherits(opttryerror,"try-error")) + { + warnings("The customized optimization function encountered an error and stopped.") + if(getOption("show.error.messages")) print(attr(opttryerror, "condition")) + return(list(estimate = rep(NA,length(vstart)), convergence = 100, value = NA, + hessian = NA)) + } + + if (opt$convergence>0) { + warnings("The customized optimization function failed to converge, with the error code ", + opt$convergence) + } + if(is.null(names(opt$par))) + names(opt$par) <- names(vstart) + argdot <- list(...) + method.cust <- argdot$method + #compute asymptotic covariance matrix proposed by + #I. Ibragimov and R. Has'minskii. Statistical Estimation - Asymptotic Theory. Springer-Verlag, 1981, p368 + #see R/util-mmedist-vcov.R + if(calcvcov) + { + varcovar <- mme.vcov(opt$par, fix.arg, order, data, mdistname, memp, weights) + #add names + if(!is.null(varcovar)) + colnames(varcovar) <- rownames(varcovar) <- names(opt$par) + }else + varcovar <- NULL - return(res) + res <- list(estimate = opt$par, convergence = opt$convergence, value = opt$value, + hessian = opt$hessian, optim.function=custom.optim, optim.method=method.cust, + fix.arg=fix.arg, fix.arg.fun=fix.arg.fun, weights=weights, + counts=opt$counts, optim.message=opt$message, + loglik=ifelse(exists(ddistname), loglik(opt$par, fix.arg, data, ddistname), NULL), + method=meth, order=order, memp=memp, vcov=varcovar) + } + } + return(res) } ## old function with previous name -momdist<-function (data, distr) +momdist <- function (data, distr) { - stop("the name \"momdist\" for matching moments function is NO MORE used and is replaced by \"mmedist\".") + stop("the name \"momdist\" for matching moments function is deprecated and is replaced by \"mmedist\".") } diff --git a/R/msedist.R b/R/msedist.R index 1bc54c55..e046a948 100644 --- a/R/msedist.R +++ b/R/msedist.R @@ -24,9 +24,9 @@ ### R functions ### -msedist <- function (data, distr, phidiv="KL", power.phidiv=NULL, start=NULL, fix.arg=NULL, optim.method="default", - lower=-Inf, upper=Inf, custom.optim=NULL, weights=NULL, silent=TRUE, gradient=NULL, - checkstartfix=FALSE, ...) +msedist <- function (data, distr, phidiv="KL", power.phidiv=NULL, start=NULL, + fix.arg=NULL, optim.method="default", lower=-Inf, upper=Inf, custom.optim=NULL, + weights=NULL, silent=TRUE, gradient=NULL, checkstartfix=FALSE, calcvcov=FALSE, ...) # data may correspond to a vector for non censored data or to # a dataframe of two columns named left and right for censored data { @@ -385,6 +385,5 @@ msedist <- function (data, distr, phidiv="KL", power.phidiv=NULL, start=NULL, fi loglik=loglik(opt$par, fix.arg, data, ddistname), phidiv=phidiv, power.phidiv=power.phidiv) } - return(res) } diff --git a/R/qmedist.R b/R/qmedist.R index 1cd452ca..274812af 100644 --- a/R/qmedist.R +++ b/R/qmedist.R @@ -24,7 +24,8 @@ qmedist <- function (data, distr, probs, start=NULL, fix.arg=NULL, qtype=7, optim.method="default", lower=-Inf, upper=Inf, custom.optim=NULL, - weights=NULL, silent=TRUE, gradient=NULL, checkstartfix=FALSE, ...) + weights=NULL, silent=TRUE, gradient=NULL, checkstartfix=FALSE, + calcvcov=FALSE, ...) # data may correspond to a vector for non censored data or to # a dataframe of two columns named left and right for censored data { @@ -302,6 +303,5 @@ qmedist <- function (data, distr, probs, start=NULL, fix.arg=NULL, loglik=loglik(opt$par, fix.arg, data, ddistname), probs=probs) } return(res) - } diff --git a/R/util-mledist-vcov.R b/R/util-mledist-vcov.R new file mode 100644 index 00000000..ab51e359 --- /dev/null +++ b/R/util-mledist-vcov.R @@ -0,0 +1,38 @@ +############################################################################# +# Copyright (c) 2024 Christophe Dutang, Marie Laure Delignette-Muller +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA +# +############################################################################# +### Theory of point estimation, Casella & Berger +### +### R functions +### + + +#compute H^{-1} +mle.vcov <- function(myhessian) +{ + + if(all(!is.na(myhessian)) && qr(myhessian)$rank == NCOL(myhessian)) + { + res <- solve(myhessian) + }else + { + res <- NULL + } + res +} diff --git a/R/util-mmedist-vcov.R b/R/util-mmedist-vcov.R index 49710b7c..5f6b492f 100644 --- a/R/util-mmedist-vcov.R +++ b/R/util-mmedist-vcov.R @@ -34,9 +34,14 @@ mme.vcov <- function(par, fix.arg, order, obs, mdistnam, memp, weights, stopifnot(length(epsilon) == 1) stopifnot(epsilon > 0) - Amat <- mme.Ahat(par, fix.arg, order, obs, mdistnam, memp, weights) Jmat <- mme.Jhat(par, fix.arg, order, mdistnam, epsilon) - Jinv <- solve(Jmat) + if(all(!is.na(Jmat)) && qr(Jmat)$rank == NCOL(Jmat)) + { + Jinv <- solve(Jmat) + Amat <- mme.Ahat(par, fix.arg, order, obs, mdistnam, memp, weights) + res <- Jinv %*% Amat %*% t(Jinv) + }else + res <- NULL if(echo) { cat("Amat\n") @@ -46,8 +51,6 @@ mme.vcov <- function(par, fix.arg, order, obs, mdistnam, memp, weights, cat("Jinv\n") print(Jinv) } - res <- Jinv %*% Amat %*% t(Jinv) - res } diff --git a/R/util-startarg.R b/R/util-startarg.R index beeced0e..7ec256a5 100644 --- a/R/util-startarg.R +++ b/R/util-startarg.R @@ -459,10 +459,16 @@ startarg_transgamma_family <- function(x, distr) #same as gamma with shape2=tau=1 m <- mean(x, na.rm = TRUE) v <- var(x, na.rm = TRUE) - alphahat <- m^2/v - thetahat <- v/m - - start <- list(shape=alphahat, scale=thetahat) + if(v > 0) + { + alphahat <- m^2/v + thetahat <- m/v + }else #exponential case + { + alphahat <- 1 + thetahat <- m + } + start <- list(shape=alphahat, rate=1/thetahat) }else if (distr == "weibull") { if (any(x < 0)) diff --git a/R/vcov.R b/R/vcov.R index 51660625..020b2b8f 100644 --- a/R/vcov.R +++ b/R/vcov.R @@ -33,14 +33,11 @@ vcov.fitdist <- function(object, ...) { stopifnot(inherits(object, "fitdist")) - if (object$method != "mle") - warning("The variance-covariance matrix can only be calculated for fits using the mle method") return(object$vcov) } vcov.fitdistcens <- function(object, ...) { stopifnot(inherits(object, "fitdistcens")) - return(object$vcov) } diff --git a/man/fitdist.Rd b/man/fitdist.Rd index 439673d4..87e5dd19 100644 --- a/man/fitdist.Rd +++ b/man/fitdist.Rd @@ -22,7 +22,8 @@ \usage{ fitdist(data, distr, method = c("mle", "mme", "qme", "mge", "mse"), - start=NULL, fix.arg=NULL, discrete, keepdata = TRUE, keepdata.nb=100, \dots) + start=NULL, fix.arg=NULL, discrete, keepdata = TRUE, keepdata.nb=100, + calcvcov=TRUE, \dots) \method{print}{fitdist}(x, \dots) @@ -64,6 +65,8 @@ fitdist(data, distr, method = c("mle", "mme", "qme", "mge", "mse"), \item{keepdata}{a logical. If \code{TRUE}, dataset is returned, otherwise only a sample subset is returned.} \item{keepdata.nb}{When \code{keepdata=FALSE}, the length (>1) of the subset returned.} +\item{calcvcov}{A logical indicating if (asymptotic) covariance matrix is required.} + \item{discrete}{ If TRUE, the distribution is considered as discrete. If \code{discrete} is missing, \code{discrete} is automaticaly set to \code{TRUE} when \code{distr} belongs to diff --git a/man/mgedist.Rd b/man/mgedist.Rd index 9b010ed8..1faab88c 100644 --- a/man/mgedist.Rd +++ b/man/mgedist.Rd @@ -10,7 +10,7 @@ \usage{ mgedist(data, distr, gof = "CvM", start = NULL, fix.arg = NULL, optim.method = "default", lower = -Inf, upper = Inf, custom.optim = NULL, silent = TRUE, gradient = NULL, - checkstartfix=FALSE, \dots) + checkstartfix=FALSE, calcvcov=FALSE, \dots) } @@ -39,6 +39,8 @@ mgedist(data, distr, gof = "CvM", start = NULL, fix.arg = NULL, optim.method = " \item{gradient}{A function to return the gradient of the gof distance for the \code{"BFGS"}, \code{"CG"} and \code{"L-BFGS-B"} methods. If it is \code{NULL}, a finite-difference approximation will be used.} \item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{calcvcov}{A logical indicating if (asymptotic) covariance matrix is required. + (currently ignored)} \item{\dots}{further arguments passed to the \code{\link{optim}}, \code{\link{constrOptim}} or \code{custom.optim} function.} } diff --git a/man/mledist.Rd b/man/mledist.Rd index 253a4024..1940a530 100644 --- a/man/mledist.Rd +++ b/man/mledist.Rd @@ -10,7 +10,7 @@ \usage{ mledist(data, distr, start = NULL, fix.arg = NULL, optim.method = "default", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, - gradient = NULL, checkstartfix=FALSE, \dots) + gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, \dots) } \arguments{ @@ -49,6 +49,7 @@ mledist(data, distr, start = NULL, fix.arg = NULL, optim.method = "default", and \code{"L-BFGS-B"} methods. If it is \code{NULL}, a finite-difference approximation will be used, see details.} \item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{calcvcov}{A logical indicating if (asymptotic) covariance matrix is required.} \item{\dots}{further arguments passed to the \code{\link{optim}}, \code{\link{constrOptim}} or \code{custom.optim} function.} } diff --git a/man/mmedist.Rd b/man/mmedist.Rd index 70270c58..b73bc118 100644 --- a/man/mmedist.Rd +++ b/man/mmedist.Rd @@ -10,7 +10,7 @@ \usage{ mmedist(data, distr, order, memp, start = NULL, fix.arg = NULL, optim.method = "default", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, - gradient = NULL, checkstartfix=FALSE, \dots) + gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, \dots) } \arguments{ @@ -41,7 +41,8 @@ to the number of parameters to estimate.} \item{gradient}{A function to return the gradient of the squared difference for the \code{"BFGS"}, \code{"CG"} and \code{"L-BFGS-B"} methods. If it is \code{NULL}, a finite-difference approximation will be used, see details.} -\item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{calcvcov}{A logical indicating if (asymptotic) covariance matrix is required.} \item{\dots}{further arguments passed to the \code{\link{optim}}, \code{\link{constrOptim}} or \code{custom.optim} function.} } diff --git a/man/msedist.Rd b/man/msedist.Rd index 3b725656..74490433 100644 --- a/man/msedist.Rd +++ b/man/msedist.Rd @@ -10,7 +10,7 @@ \usage{ msedist(data, distr, phidiv="KL", power.phidiv=NULL, start = NULL, fix.arg = NULL, optim.method = "default", lower = -Inf, upper = Inf, custom.optim = NULL, - weights=NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, \dots) + weights=NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, \dots) } @@ -45,7 +45,9 @@ msedist(data, distr, phidiv="KL", power.phidiv=NULL, start = NULL, fix.arg = NUL \item{silent}{A logical to remove or show warnings when bootstraping.} \item{gradient}{A function to return the gradient of the gof distance for the \code{"BFGS"}, \code{"CG"} and \code{"L-BFGS-B"} methods. If it is \code{NULL}, a finite-difference approximation will be used.} -\item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{calcvcov}{A logical indicating if (asymptotic) covariance matrix is required. + (currently ignored)} \item{\dots}{further arguments passed to the \code{\link{optim}}, \code{\link{constrOptim}} or \code{custom.optim} function.} } diff --git a/man/qmedist.Rd b/man/qmedist.Rd index ab7edbc7..9f763cb0 100644 --- a/man/qmedist.Rd +++ b/man/qmedist.Rd @@ -11,7 +11,7 @@ qmedist(data, distr, probs, start = NULL, fix.arg = NULL, qtype = 7, optim.method = "default", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, - checkstartfix=FALSE, \dots) + checkstartfix=FALSE, calcvcov=FALSE, \dots) } @@ -44,7 +44,9 @@ qmedist(data, distr, probs, start = NULL, fix.arg = NULL, qtype = 7, \item{gradient}{A function to return the gradient of the squared difference for the \code{"BFGS"}, \code{"CG"} and \code{"L-BFGS-B"} methods. If it is \code{NULL}, a finite-difference approximation will be used, see details.} -\item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{checkstartfix}{A logical to test starting and fixed values. Do not change it.} +\item{calcvcov}{A logical indicating if (asymptotic) covariance matrix is required. + (currently ignored)} \item{\dots}{further arguments passed to the \code{\link{optim}}, \code{\link{constrOptim}} or \code{custom.optim} function.} diff --git a/tests/t-fitdist-test-arguments.R b/tests/t-fitdist-test-arguments.R new file mode 100644 index 00000000..7544dd20 --- /dev/null +++ b/tests/t-fitdist-test-arguments.R @@ -0,0 +1,75 @@ +library(fitdistrplus) +nbboot <- 100 +nbboot <- 10 + +nsample <- 100 +nsample <- 10 + +visualize <- FALSE # TRUE for manual tests with visualization of results + + +#### sanity check -- data #### + +try(fitdist(c(serving, "a"), "gamma")) +try(fitdist(c(serving, NA), "gamma")) +try(fitdist(c(serving, Inf), "gamma")) +try(fitdist(c(serving, -Inf), "gamma")) +try(fitdist(c(serving, NaN), "gamma")) + +#### sanity check -- distr #### + +try(fitdist(serving, "toto")) + +#### sanity check -- method #### + +try(fitdist(serving, "gamma", method="toto")) +try(fitdist(serving, "gamma", method=1)) + +#### sanity check -- start #### + +try(fitdist(serving, "gamma", start=list("a"=1, b=2))) + +#### sanity check -- fix.arg #### + +try(fitdist(serving, "gamma", fix.arg=list("a"=1, b=2))) +try(fitdist(serving, "gamma", fix.arg=list("shape"=1, rate=2))) + + +#### sanity check -- discrete #### + +try(fitdist(serving, "gamma", discrete=3)) + +#### sanity check -- keepdata #### + +try(fitdist(serving, "gamma", keepdata=3)) +try(fitdist(serving, "gamma", keepdata=TRUE, keepdata.nb = 1)) + + +#### sanity check -- calcvcov #### + +try(fitdist(serving, "gamma", calcvcov=3)) + + +#### check the warning messages when using weights in the fit followed by functions #### +# that do not yet take weights into account +# with an example to be used later to see if weights are well taken into account +# +if(visualize) +{ + x3 <- rnorm(100) # this sample size must be fixed here (see next lines, 50+50) + x3 <- sort(x3) + (f <- fitdist(x3, "norm", method="mle", weights= c(rep(1, 50), rep(2, 50)))) + try(plot(f)) + try(cdfcomp(f)) + (f2 <- fitdist(x3, "logis", method="mle", weights= c(rep(1, 50), rep(2, 50)))) + try(cdfcomp(list(f,f2))) + try(denscomp(f)) + try(denscomp(list(f,f2))) + try(ppcomp(f)) + try(ppcomp(list(f,f2))) + try(qqcomp(f)) + try(qqcomp(list(f,f2))) + try(gofstat(f)) + try(gofstat(list(f,f2))) + try(bootdist(f)) +} \ No newline at end of file diff --git a/tests/t-fitdist.R b/tests/t-fitdist.R index cfdb91c4..8b345e1e 100644 --- a/tests/t-fitdist.R +++ b/tests/t-fitdist.R @@ -26,16 +26,6 @@ if(visualize) { # check ERROR on aarch64-apple-darwin20.4.0 (64-bit) (2021/05/1 names(fitdist(serving, "gamma", optim.method="L-BFGS-B", lower=0)$estimate) } -# (2) sanity check -# - - -try(fitdist(c(serving, "a"), "gamma")) -try(fitdist(c(serving, NA), "gamma")) -try(fitdist(c(serving, Inf), "gamma")) -try(fitdist(c(serving, -Inf), "gamma")) -try(fitdist(c(serving, NaN), "gamma")) - # (7) custom optimization function # @@ -361,27 +351,3 @@ fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2), optim.method fitdist(x, "pois", method = "qme", probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) # should be similar # should give similar results for big samples - -# (25) check the warning messages when using weights in the fit followed by functions -# that do not yet take weights into account -# with an example to be used later to see if weights are well taken into account -# -if(visualize) -{ - x3 <- rnorm(100) # this sample size must be fixed here (see next lines, 50+50) - x3 <- sort(x3) - (f <- fitdist(x3, "norm", method="mle", weights= c(rep(1, 50), rep(2, 50)))) - try(plot(f)) - try(cdfcomp(f)) - (f2 <- fitdist(x3, "logis", method="mle", weights= c(rep(1, 50), rep(2, 50)))) - try(cdfcomp(list(f,f2))) - try(denscomp(f)) - try(denscomp(list(f,f2))) - try(ppcomp(f)) - try(ppcomp(list(f,f2))) - try(qqcomp(f)) - try(qqcomp(list(f,f2))) - try(gofstat(f)) - try(gofstat(list(f,f2))) - try(bootdist(f)) -} \ No newline at end of file diff --git a/tests/t-mledist-asymptotic-vcov.R b/tests/t-mledist-asymptotic-vcov.R new file mode 100644 index 00000000..9f058948 --- /dev/null +++ b/tests/t-mledist-asymptotic-vcov.R @@ -0,0 +1,43 @@ +require(fitdistrplus) +nsample <- 1e6 +nsample <- 10 + +#### (1) Gamma example #### + +truetheta <- c("alpha"=3, "beta"=1/2) +x <- rgamma(nsample, truetheta["alpha"], truetheta["beta"]) +f1 <- mledist(x, "gamma", calcvcov = TRUE) +f1$vcov + +# (2) fit a Pareto distribution +# + +if(any(installed.packages()[, "Package"] == "actuar")) +{ + require(actuar) + #simulate a sample + x4 <- rpareto(nsample, 6, 2) + + #empirical raw moment + memp <- function(x, order) + mean(x^order) + + #fit + res <- mledist(x4, "pareto", start=list(shape=10, scale=10), + lower=1, upper=Inf, calcvcov = TRUE) +} + +# (3) truncated distribution +# + +dtiexp <- function(x, rate, low, upp) +{ + PU <- pexp(upp, rate=rate, lower.tail = FALSE) + PL <- pexp(low, rate=rate) + dexp(x, rate) * (x >= low) * (x <= upp) + PL * (x == low) + PU * (x == upp) +} +ptiexp <- function(q, rate, low, upp) + pexp(q, rate) * (q >= low) * (q <= upp) + 1 * (q > upp) +n <- 100; x <- pmax(pmin(rexp(n), 3), .5) + +mledist(x, "tiexp", start=list(rate=3, low=0, upp=20), calcvcov = TRUE) diff --git a/tests/t-mmedist-asymptotic-vcov.R b/tests/t-mmedist-asymptotic-vcov.R index e96691aa..d9b5b639 100644 --- a/tests/t-mmedist-asymptotic-vcov.R +++ b/tests/t-mmedist-asymptotic-vcov.R @@ -4,16 +4,19 @@ nsample <- 10 #### (1) Gamma example #### -#require(actuar) - truetheta <- c("alpha"=3, "beta"=1/2) x <- rgamma(nsample, truetheta["alpha"], truetheta["beta"]) -f1 <- mmedist(x, "gamma", order=1:2) +f1 <- mmedist(x, "gamma", order=1:2, calcvcov = TRUE) f1$vcov -#fitdistrplus:::mme.vcov(as.numeric(truetheta), fix.arg=NULL, order=1:2, obs=x, mdistnam=mgamma, memp, weights=NULL) +if(FALSE) +{ + memp <- function(x, order) mean(x^order) + require(actuar) + fitdistrplus:::mme.vcov(as.numeric(truetheta), fix.arg=NULL, order=1:2, obs=x, mdistnam=mgamma, memp, weights=NULL) +} -# (4) fit a Pareto distribution +# (2) fit a Pareto distribution # if(any(installed.packages()[, "Package"] == "actuar")) @@ -27,6 +30,6 @@ if(any(installed.packages()[, "Package"] == "actuar")) mean(x^order) #fit - mmedist(x4, "pareto", order=c(1, 2), memp=memp, start=c(shape=10, scale=10), - lower=1, upper=Inf) + res <- mmedist(x4, "pareto", order=c(1, 2), memp=memp, start=c(shape=10, scale=10), + lower=1, upper=Inf, calcvcov = TRUE) } \ No newline at end of file diff --git a/tests/t-startingvalues-othercont-family.R b/tests/t-startingvalues-othercont-family.R index d99fdbf6..dd3f5fce 100644 --- a/tests/t-startingvalues-othercont-family.R +++ b/tests/t-startingvalues-othercont-family.R @@ -23,4 +23,3 @@ fitdist(x, "invgauss") x <- rgenbeta(n, 2, 2, 2, scale=2) fitdistrplus:::startarg_othercontinuous_actuar_family(x, "genbeta") fitdist(x, "genbeta", lower=0) - diff --git a/tests/t-startingvalues-trgamma-family.R b/tests/t-startingvalues-trgamma-family.R index 5efb4d6a..6794e6fa 100644 --- a/tests/t-startingvalues-trgamma-family.R +++ b/tests/t-startingvalues-trgamma-family.R @@ -20,3 +20,24 @@ fitdist(x, "weibull") fitdist(x, "exp") +#weird examples +x <- rep(1, n) + +fitdistrplus:::startarg_transgamma_family(x, "gamma") + +#previous code +n <- length(x) +m <- mean(x) +v <- (n - 1)/n*var(x) +list(shape=m^2/v, rate=m/v) + + +#normal -> weibull +x <- abs(rnorm(n = 20, mean = 150, sd = 10)) +m <- mean(log(x)) +v <- var(log(x)) +shape <- 1.2/sqrt(v) +scale <- exp(m + 0.572/shape) +s <- list(shape=shape, scale=scale) +quantile(x) +fitdistrplus:::startarg_transgamma_family(x, "weibull") diff --git a/vignettes/FAQ.Rmd b/vignettes/FAQ.Rmd index d051994a..981a7833 100644 --- a/vignettes/FAQ.Rmd +++ b/vignettes/FAQ.Rmd @@ -272,7 +272,7 @@ par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5)) curve(ptiexp(x, 1, .5, 3), add=TRUE, col="blue", lty=3) legend("bottomright", lty=1:3, col=c("red", "green", "blue", "black"), - leg=c("full MLE", "MLE fixed arg", "true CDF", "emp. CDF")) + legend=c("full MLE", "MLE fixed arg", "true CDF", "emp. CDF")) ``` diff --git a/vignettes/fitdistrplus_vignette.Rmd b/vignettes/fitdistrplus_vignette.Rmd index 50492ec9..f214ca42 100644 --- a/vignettes/fitdistrplus_vignette.Rmd +++ b/vignettes/fitdistrplus_vignette.Rmd @@ -423,7 +423,7 @@ fdanish.ln.MME <- fitdist(danishuni$Loss, "lnorm", method = "mme", order = 1:2) library(actuar) fdanish.P.MLE <- fitdist(danishuni$Loss, "pareto", start = list(shape = 10, scale = 10), lower = 2+1e-6, upper = Inf) -memp <- function(x, order) sum(x^order) / length(x) +memp <- function(x, order) mean(x^order) fdanish.P.MME <- fitdist(danishuni$Loss, "pareto", method = "mme", order = 1:2, memp = "memp", start = list(shape = 10, scale = 10), lower = c(2+1e-6, 2+1e-6), upper = c(Inf, Inf))