From 9b0b646c4fbd256ca3764acd3787d70465c70063 Mon Sep 17 00:00:00 2001 From: Christophe Dutang Date: Wed, 12 Jun 2024 22:47:21 +0200 Subject: [PATCH] add a density function for bootdist* objects --- NAMESPACE | 14 +- NEWS.md | 1 + R/bootdist.R | 436 +++++++++++++++++++++++++++----------------- R/bootdistcens.R | 104 +++++++++++ man/bootdist.Rd | 39 +++- man/bootdistcens.Rd | 26 ++- share/todolist.md | 2 +- tests/t-bootdist.R | 18 +- vignettes/FAQ.Rmd | 21 +++ 9 files changed, 480 insertions(+), 181 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 414b5a7d..621d01fc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,8 +3,11 @@ import(stats) importFrom("survival", Surv, survfit) importFrom("MASS", kde2d) importFrom("grDevices", colorRampPalette, terrain.colors) -importFrom("graphics", abline, axis, hist, hist.default, legend, lines, par, plot, points, polygon, contour, layout, matlines, segments, stripchart, image, pairs, rect, text) -importFrom("utils", head, stack, modifyList, tail) +importFrom("graphics", abline, axis, hist, hist.default, + legend, lines, par, plot, points, polygon, + contour, layout, matlines, segments, stripchart, + image, pairs, rect, text) +importFrom("utils", head, stack, modifyList, tail, str) importFrom("methods", formalArgs) importFrom("rlang", .data) @@ -44,18 +47,25 @@ export(Surv2fitdistcens) #bootdist class export(bootdist) export(CIcdfplot) +S3method(density, bootdist) S3method(summary, bootdist) S3method(plot, bootdist) +S3method(plot, density.bootdist) S3method(print, bootdist) +S3method(print, density.bootdist) S3method(print, summary.bootdist) S3method(print, quantile.bootdist) S3method(quantile, bootdist) + #bootdistcens class export(bootdistcens) +S3method(density, bootdistcens) S3method(summary, bootdistcens) S3method(plot, bootdistcens) +S3method(plot, density.bootdistcens) S3method(print, bootdistcens) +S3method(print, density.bootdistcens) S3method(print, summary.bootdistcens) S3method(print, quantile.bootdistcens) S3method(quantile, bootdistcens) diff --git a/NEWS.md b/NEWS.md index 5ea47a29..0eba8cba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,7 @@ NEW FEATURES - add calculation of the hessian using `optimHess` within `fitdist` when it is not given by `optim`. - compute the asymptotic covariance matrix with MME. - graphics function `*comp()` now return a list of drawn points and/or lines when `plotstyle == "graphics"`. +- add a `density` function for `bootdist(cens)` objects. BUG FIXES diff --git a/R/bootdist.R b/R/bootdist.R index 827677e3..dc609913 100644 --- a/R/bootdist.R +++ b/R/bootdist.R @@ -26,170 +26,170 @@ bootdist <- function (f, bootmethod="param", niter=1001, silent=TRUE, parallel = c("no", "snow", "multicore"), ncpus) { - if (niter<10) - stop("niter must be an integer above 10") - bootmethod <- match.arg(bootmethod, c("param", "nonparam")) - - parallel <- match.arg(parallel, c("no", "snow", "multicore")) - if (parallel == "multicore" & .Platform$OS.type == "windows") - { - parallel <- "snow" - warning("As the multicore option is not supported on Windows it was replaced by snow") - } - if ((parallel == "snow" | parallel == "multicore") & missing(ncpus)) - stop("You have to specify the number of available processors to parallelize + if (niter<10) + stop("niter must be an integer above 10") + bootmethod <- match.arg(bootmethod, c("param", "nonparam")) + + parallel <- match.arg(parallel, c("no", "snow", "multicore")) + if (parallel == "multicore" & .Platform$OS.type == "windows") + { + parallel <- "snow" + warning("As the multicore option is not supported on Windows it was replaced by snow") + } + if ((parallel == "snow" | parallel == "multicore") & missing(ncpus)) + stop("You have to specify the number of available processors to parallelize the bootstrap") - - if (!inherits(f, "fitdist")) - stop("Use only with 'fitdist' objects") - - if(!is.null(f$weights)) + + if (!inherits(f, "fitdist")) + stop("Use only with 'fitdist' objects") + + if(!is.null(f$weights)) stop("Bootstrap is not yet available when using weights") - - #simulate bootstrap data - if (bootmethod == "param") - { # parametric bootstrap - rdistname <- paste("r", f$distname, sep="") - if (!exists(rdistname, mode="function")) - stop(paste("The ", rdistname, " function must be defined")) - rdata <- do.call(rdistname, c(list(niter*f$n), as.list(f$estimate), f$fix.arg)) - dim(rdata) <- c(f$n, niter) - }else - { # non parametric bootstrap - rdata <- sample(f$data, size=niter*f$n, replace=TRUE) - dim(rdata) <- c(f$n, niter) - } - - #compute bootstrap estimates - foncestim <- switch(f$method, "mle"=mledist, "qme"=qmedist, "mme"=mmedist, - "mge"=mgedist, "mse"=msedist) - start <- as.list(f$estimate) #a named vector is no longer is accepted as starting values. - if(is.function(f$fix.arg.fun)) - fix.arg <- f$fix.arg.fun - else - fix.arg <- f$fix.arg - if (is.null(f$dots) && !is.function(fix.arg)) - { - func <- function(iter) - { - res <- try(do.call(foncestim, list(data=rdata[, iter], distr=f$distname, start=start, - fix.arg=fix.arg, checkstartfix=TRUE)), silent=silent) - if(inherits(res, "try-error")) - return(c(rep(NA, length(start)), 100)) - else - return(c(res$estimate, res$convergence)) - } - }else if (is.null(f$dots) && is.function(fix.arg)) - { - func <- function(iter) - { - fix.arg.iter <- fix.arg(rdata[, iter]) - res <- try(do.call(foncestim, list(data=rdata[, iter], distr=f$distname, start=start, - fix.arg=fix.arg.iter, checkstartfix=TRUE)), silent=silent) - if(inherits(res, "try-error")) - return(c(rep(NA, length(start)), 100)) - else - return(c(res$estimate, res$convergence)) - } - }else if(!is.null(f$dots) && !is.function(fix.arg)) - { - func <- function(iter) - { - res <- try(do.call(foncestim, c(list(data=rdata[, iter], distr=f$distname, start=start, - fix.arg=fix.arg, checkstartfix=TRUE), f$dots)), silent=silent) - if(inherits(res, "try-error")) - return(c(rep(NA, length(start)), 100)) - else - return(c(res$estimate, res$convergence)) - - } - }else if(!is.null(f$dots) && is.function(fix.arg)) - { - func <- function(iter) - { - fix.arg.iter <- fix.arg(rdata[, iter]) - res <- try(do.call(foncestim, c(list(data=rdata[, iter], distr=f$distname, start=start, - fix.arg=fix.arg.iter, checkstartfix=TRUE), f$dots)), silent=silent) - if(inherits(res, "try-error")) - return(c(rep(NA, length(start)), 100)) - else - return(c(res$estimate, res$convergence)) - - } - }else - stop("wrong implementation in bootdist") - - owarn <- getOption("warn") - oerr <- getOption("show.error.messages") - options(warn=ifelse(silent, -1, 0), show.error.messages=!silent) - - # parallel or sequential computation - if (parallel != "no") + + #simulate bootstrap data + if (bootmethod == "param") + { # parametric bootstrap + rdistname <- paste("r", f$distname, sep="") + if (!exists(rdistname, mode="function")) + stop(paste("The ", rdistname, " function must be defined")) + rdata <- do.call(rdistname, c(list(niter*f$n), as.list(f$estimate), f$fix.arg)) + dim(rdata) <- c(f$n, niter) + }else + { # non parametric bootstrap + rdata <- sample(f$data, size=niter*f$n, replace=TRUE) + dim(rdata) <- c(f$n, niter) + } + + #compute bootstrap estimates + foncestim <- switch(f$method, "mle"=mledist, "qme"=qmedist, "mme"=mmedist, + "mge"=mgedist, "mse"=msedist) + start <- as.list(f$estimate) #a named vector is no longer is accepted as starting values. + if(is.function(f$fix.arg.fun)) + fix.arg <- f$fix.arg.fun + else + fix.arg <- f$fix.arg + if (is.null(f$dots) && !is.function(fix.arg)) + { + func <- function(iter) { - if (parallel == "snow") type <- "PSOCK" - else if (parallel == "multicore") type <- "FORK" - clus <- parallel::makeCluster(ncpus, type = type) - resboot <- parallel::parSapply(clus, 1:niter, func) - parallel::stopCluster(clus) + res <- try(do.call(foncestim, list(data=rdata[, iter], distr=f$distname, start=start, + fix.arg=fix.arg, checkstartfix=TRUE)), silent=silent) + if(inherits(res, "try-error")) + return(c(rep(NA, length(start)), 100)) + else + return(c(res$estimate, res$convergence)) } - else + }else if (is.null(f$dots) && is.function(fix.arg)) + { + func <- function(iter) { - resboot <- sapply(1:niter, func) + fix.arg.iter <- fix.arg(rdata[, iter]) + res <- try(do.call(foncestim, list(data=rdata[, iter], distr=f$distname, start=start, + fix.arg=fix.arg.iter, checkstartfix=TRUE)), silent=silent) + if(inherits(res, "try-error")) + return(c(rep(NA, length(start)), 100)) + else + return(c(res$estimate, res$convergence)) } - - options(warn=owarn, show.error.messages=oerr) - - rownames(resboot) <- c(names(start), "convergence") - if (length(resboot[, 1]) > 2) + }else if(!is.null(f$dots) && !is.function(fix.arg)) + { + func <- function(iter) { - estim <- data.frame(t(resboot)[, -length(resboot[, 1])]) - bootCI <- cbind(apply(resboot[-length(resboot[, 1]), ], 1, median, na.rm=TRUE), - apply(resboot[-length(resboot[, 1]), ], 1, quantile, 0.025, na.rm=TRUE), - apply(resboot[-length(resboot[, 1]), ], 1, quantile, 0.975, na.rm=TRUE)) - colnames(bootCI) <- c("Median", "2.5%", "97.5%") - }else + res <- try(do.call(foncestim, c(list(data=rdata[, iter], distr=f$distname, start=start, + fix.arg=fix.arg, checkstartfix=TRUE), f$dots)), silent=silent) + if(inherits(res, "try-error")) + return(c(rep(NA, length(start)), 100)) + else + return(c(res$estimate, res$convergence)) + + } + }else if(!is.null(f$dots) && is.function(fix.arg)) + { + func <- function(iter) { - estim <- as.data.frame(t(resboot)[, -length(resboot[, 1])]) - names(estim) <- names(f$estimate) - bootCI <- c(median(resboot[-length(resboot[, 1]), ], na.rm=TRUE), - quantile(resboot[-length(resboot[, 1]), ], 0.025, na.rm=TRUE), - quantile(resboot[-length(resboot[, 1]), ], 0.975, na.rm=TRUE)) - names(bootCI) <- c("Median", "2.5%", "97.5%") - } - - # code of convergence of the optimization function for each iteration - converg <- t(resboot)[, length(resboot[, 1])] - - res <- structure(list(estim=estim, converg=converg, - method=bootmethod, nbboot=niter, CI=bootCI, fitpart=f), - class="bootdist") - res + fix.arg.iter <- fix.arg(rdata[, iter]) + res <- try(do.call(foncestim, c(list(data=rdata[, iter], distr=f$distname, start=start, + fix.arg=fix.arg.iter, checkstartfix=TRUE), f$dots)), silent=silent) + if(inherits(res, "try-error")) + return(c(rep(NA, length(start)), 100)) + else + return(c(res$estimate, res$convergence)) + + } + }else + stop("wrong implementation in bootdist") + + owarn <- getOption("warn") + oerr <- getOption("show.error.messages") + options(warn=ifelse(silent, -1, 0), show.error.messages=!silent) + + # parallel or sequential computation + if (parallel != "no") + { + if (parallel == "snow") type <- "PSOCK" + else if (parallel == "multicore") type <- "FORK" + clus <- parallel::makeCluster(ncpus, type = type) + resboot <- parallel::parSapply(clus, 1:niter, func) + parallel::stopCluster(clus) + } + else + { + resboot <- sapply(1:niter, func) + } + + options(warn=owarn, show.error.messages=oerr) + + rownames(resboot) <- c(names(start), "convergence") + if (length(resboot[, 1]) > 2) + { + estim <- data.frame(t(resboot)[, -length(resboot[, 1])]) + bootCI <- cbind(apply(resboot[-length(resboot[, 1]), ], 1, median, na.rm=TRUE), + apply(resboot[-length(resboot[, 1]), ], 1, quantile, 0.025, na.rm=TRUE), + apply(resboot[-length(resboot[, 1]), ], 1, quantile, 0.975, na.rm=TRUE)) + colnames(bootCI) <- c("Median", "2.5%", "97.5%") + }else + { + estim <- as.data.frame(t(resboot)[, -length(resboot[, 1])]) + names(estim) <- names(f$estimate) + bootCI <- c(median(resboot[-length(resboot[, 1]), ], na.rm=TRUE), + quantile(resboot[-length(resboot[, 1]), ], 0.025, na.rm=TRUE), + quantile(resboot[-length(resboot[, 1]), ], 0.975, na.rm=TRUE)) + names(bootCI) <- c("Median", "2.5%", "97.5%") + } + + # code of convergence of the optimization function for each iteration + converg <- t(resboot)[, length(resboot[, 1])] + + res <- structure(list(estim=estim, converg=converg, + method=bootmethod, nbboot=niter, CI=bootCI, fitpart=f), + class="bootdist") + res } print.bootdist <- function(x, ...) { - if (!inherits(x, "bootdist")) - stop("Use only with 'bootdist' objects") - if (x$method=="param") - cat("Parameter values obtained with parametric bootstrap \n") - else - cat("Parameter values obtained with nonparametric bootstrap \n") - print(head(x$estim), ...) - nconverg <- length(x$converg[x$converg==0]) - if (nconverg < length(x$converg)) - { - cat("\n") - cat("The estimation method converged only for", nconverg, "among", - length(x$converg), "iterations \n") - } - + if (!inherits(x, "bootdist")) + stop("Use only with 'bootdist' objects") + if (x$method=="param") + cat("Parameter values obtained with parametric bootstrap \n") + else + cat("Parameter values obtained with nonparametric bootstrap \n") + print(head(x$estim), ...) + nconverg <- length(x$converg[x$converg==0]) + if (nconverg < length(x$converg)) + { + cat("\n") + cat("The estimation method converged only for", nconverg, "among", + length(x$converg), "iterations \n") + } + } plot.bootdist <- function(x, main="Bootstrapped values of parameters", enhance=FALSE, - trueval=NULL, rampcol=NULL, nbgrid = 100, nbcol = 100, ...) + trueval=NULL, rampcol=NULL, nbgrid = 100, nbcol = 100, ...) { - if (!inherits(x, "bootdist")) - stop("Use only with 'bootdist' objects") + if (!inherits(x, "bootdist")) + stop("Use only with 'bootdist' objects") if (dim(x$estim)[2]==1) { stripchart(x$estim, method="jitter", main=main, @@ -218,30 +218,134 @@ plot.bootdist <- function(x, main="Bootstrapped values of parameters", enhance=F summary.bootdist <- function(object, ...) { - if (!inherits(object, "bootdist")) - stop("Use only with 'bootdist' objects") - - class(object) <- c("summary.bootdist", class(object)) - object + if (!inherits(object, "bootdist")) + stop("Use only with 'bootdist' objects") + + class(object) <- c("summary.bootdist", class(object)) + object } print.summary.bootdist <- function(x, ...) { - - if (!inherits(x, "summary.bootdist")) - stop("Use only with 'summary.bootdist' objects") + + if (!inherits(x, "summary.bootdist")) + stop("Use only with 'summary.bootdist' objects") + + if (x$method=="param") + cat("Parametric bootstrap medians and 95% percentile CI \n") + else + cat("Nonparametric bootstrap medians and 95% percentile CI \n") + print(x$CI) + + nconverg <- length(x$converg[x$converg==0]) + if (nconverg < length(x$converg)) + { + cat("\n") + cat("The estimation method converged only for", nconverg, "among", + length(x$converg), "iterations \n") + } +} + +density.bootdist <- function(..., bw = "nrd0", adjust = 1, kernel = "gaussian") +{ + x <- list(...) + if(inherits(x, "bootdist")) + { + x <- list(x) + }else if(!is.list(x)) + { + stop("argument x must be a list of 'bootdist' objects") + }else + { + if(any(sapply(x, function(y) !inherits(y, "bootdist")))) + stop("argument x must be a list of 'bootdist' objects") + } + print(str(x)) + nx <- length(x) + nbpar <- NCOL(x[[1]]$estim) + denslist <- lapply(1:nx, function(j) + { + dres <- lapply(1:nbpar, function(i) + { + #compute empirical density, mean, sd + res <- density(x[[j]]$estim[,i], bw=bw, adjust=adjust, kernel=kernel) + res$mean <- mean(x[[j]]$estim[,i], na.rm=TRUE) + res$sd <- sd(x[[j]]$estim[,i], na.rm=TRUE) + res$distname <- x[[j]]$fitpart$distname + res + } + ) + #name list of densities + names(dres) <- colnames(x[[j]]$estim) + dres + } + ) + nbboot <- sapply(1:nx, function(j) x[[j]]$nbboot) + + structure(denslist, + distname=x[[1]]$fitpart$distname, + nbobject=nx, + nbboot=nbboot, + n=x[[1]]$fitpart$n, + class="density.bootdist") +} - if (x$method=="param") - cat("Parametric bootstrap medians and 95% percentile CI \n") - else - cat("Nonparametric bootstrap medians and 95% percentile CI \n") - print(x$CI) +plot.density.bootdist <- function(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, ...) +{ + if (!inherits(x, "density.bootdist")) + stop("Use only with 'density.bootdist' objects") + nbpar <- length(x[[1]]) + nft <- length(x) + if(is.null(lty)) + lty <- 1:nft + else + lty <- rep(lty, length.out=nft) + if(is.null(col)) + col <- 1:nft + else + col <- rep(col, length.out=nft) + if(is.null(lwd)) + lwd <- 1.5 + else + lwd <- rep(lwd, length.out=nft) + xname <- names(x[[1]]) + m <- ceiling(nbpar/2) + par(mfrow=c(m, 2), mar=mar) + for(i in 1:nbpar) + { + xlim <- range(sapply(x, function(d) d[[i]]$x)) + ylim <- range(sapply(x, function(d) d[[i]]$y)) + nbboot <- sapply(x, function(d) d[[i]]$n) + mymean <- signif(sapply(x, function(d) d[[i]]$mean), 3) + mysd <- signif(sapply(x, function(d) d[[i]]$sd), 3) + ylim[2] <- ylim[2]*1.2 - nconverg <- length(x$converg[x$converg==0]) - if (nconverg < length(x$converg)) + main <- paste0(x[[1]][[1]]$distname, " distribution - ", xname[i]) + plot(x[[1]][[i]], xlim=xlim, ylim=ylim, lwd=lwd[1], main=main, xlab=xname[i], + col=col[1], lty=lty[1], ylab="Density of bootstrapped values") + if(nft > 1) { - cat("\n") - cat("The estimation method converged only for", nconverg, "among", - length(x$converg), "iterations \n") + for(j in 2:nft) + { + lines(x[[j]][[i]], lty=lty[j], lwd=lwd*(1+(j-1)/10), col=col[j]) + } + myleg <- paste("n=", nbboot, ", mean=", mymean, ", sd=", mysd) + legend("topleft", lty=lty, col=col, lwd=lwd, legend=myleg, bty="n") } + } + invisible(NULL) } + +print.density.bootdist <- function(x, ...) +{ + if (!inherits(x, "density.bootdist")) + stop("Use only with 'density.bootdist' objects") + + nbboot <- paste(attr(x, "nbboot"), collapse=", ") + cat("\nBootstrap values for: ", attr(x, "distname"), " for ", + attr(x, "nbobject"), " object(s) with ", + nbboot, " bootstrap values (original sample size ", + attr(x, "n"), ").", sep = "") + invisible(x) +} + diff --git a/R/bootdistcens.R b/R/bootdistcens.R index 01136202..7f8bf282 100644 --- a/R/bootdistcens.R +++ b/R/bootdistcens.R @@ -209,3 +209,107 @@ print.summary.bootdistcens <- function(x, ...){ length(x$converg), "iterations \n") } } + +density.bootdistcens <- function(..., bw = "nrd0", adjust = 1, kernel = "gaussian") +{ + x <- list(...) + if(inherits(x, "bootdistcens")) + { + x <- list(x) + }else if(!is.list(x)) + { + stop("argument x must be a list of 'bootdistcens' objects") + }else + { + if(any(sapply(x, function(y) !inherits(y, "bootdistcens")))) + stop("argument x must be a list of 'bootdistcens' objects") + } + print(str(x)) + nx <- length(x) + nbpar <- NCOL(x[[1]]$estim) + denslist <- lapply(1:nx, function(j) + { + dres <- lapply(1:nbpar, function(i) + { + #compute empirical density, mean, sd + res <- density(x[[j]]$estim[,i], bw=bw, adjust=adjust, kernel=kernel) + res$mean <- mean(x[[j]]$estim[,i], na.rm=TRUE) + res$sd <- sd(x[[j]]$estim[,i], na.rm=TRUE) + res$distname <- x[[j]]$fitpart$distname + res + } + ) + #name list of densities + names(dres) <- colnames(x[[j]]$estim) + dres + } + ) + nbboot <- sapply(1:nx, function(j) x[[j]]$nbboot) + + structure(denslist, + distname=x[[1]]$fitpart$distname, + nbobject=nx, + nbboot=nbboot, + n=x[[1]]$fitpart$n, + class="density.bootdistcens") +} + + +plot.density.bootdistcens <- function(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, ...) +{ + if (!inherits(x, "density.bootdistcens")) + stop("Use only with 'density.bootdistcens' objects") + nbpar <- length(x[[1]]) + nft <- length(x) + if(is.null(lty)) + lty <- 1:nft + else + lty <- rep(lty, length.out=nft) + if(is.null(col)) + col <- 1:nft + else + col <- rep(col, length.out=nft) + if(is.null(lwd)) + lwd <- 1.5 + else + lwd <- rep(lwd, length.out=nft) + xname <- names(x[[1]]) + m <- ceiling(nbpar/2) + par(mfrow=c(m, 2), mar=mar) + for(i in 1:nbpar) + { + xlim <- range(sapply(x, function(d) d[[i]]$x)) + ylim <- range(sapply(x, function(d) d[[i]]$y)) + nbboot <- sapply(x, function(d) d[[i]]$n) + mymean <- signif(sapply(x, function(d) d[[i]]$mean), 3) + mysd <- signif(sapply(x, function(d) d[[i]]$sd), 3) + ylim[2] <- ylim[2]*1.2 + + main <- paste0(x[[1]][[1]]$distname, " distribution - ", xname[i]) + plot(x[[1]][[i]], xlim=xlim, ylim=ylim, lwd=lwd[1], main=main, xlab=xname[i], + col=col[1], lty=lty[1], ylab="Density of bootstrapped values") + if(nft > 1) + { + for(j in 2:nft) + { + lines(x[[j]][[i]], lty=lty[j], lwd=lwd*(1+(j-1)/10), col=col[j]) + } + myleg <- paste("n=", nbboot, ", mean=", mymean, ", sd=", mysd) + legend("topleft", lty=lty, col=col, lwd=lwd, legend=myleg, bty="n") + } + } + invisible(NULL) +} + +print.density.bootdistcens <- function(x, ...) +{ + if (!inherits(x, "density.bootdistcens")) + stop("Use only with 'density.bootdistcens' objects") + + nbboot <- paste(attr(x, "nbboot"), collapse=", ") + cat("\nBootstrap values for: ", attr(x, "distname"), " for ", + attr(x, "nbobject"), " object(s) with ", + nbboot, " bootstrap values (original sample size ", + attr(x, "n"), ").", sep = "") + invisible(x) +} diff --git a/man/bootdist.Rd b/man/bootdist.Rd index 5d1b1257..e6e160c6 100644 --- a/man/bootdist.Rd +++ b/man/bootdist.Rd @@ -3,6 +3,9 @@ \alias{plot.bootdist} \alias{print.bootdist} \alias{summary.bootdist} +\alias{density.bootdist} +\alias{plot.density.bootdist} +\alias{print.density.bootdist} \title{ Bootstrap simulation of uncertainty for non-censored data} @@ -18,6 +21,9 @@ bootdist(f, bootmethod = "param", niter = 1001, silent = TRUE, \method{plot}{bootdist}(x, main = "Bootstrapped values of parameters", enhance = FALSE, trueval = NULL, rampcol = NULL, nbgrid = 100, nbcol = 100, \dots) \method{summary}{bootdist}(object, \dots) +\method{density}{bootdist}(\dots, bw = nrd0, adjust = 1, kernel = "gaussian") +\method{plot}{density.bootdist}(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, \dots) +\method{print}{density.bootdist}(x, \dots) } @@ -33,18 +39,26 @@ bootdist(f, bootmethod = "param", niter = 1001, silent = TRUE, \item{ncpus}{Number of processes to be used in parallel operation : typically one would fix it to the number of available CPUs.} -\item{x}{ An object of class \code{"bootdist"}. } +\item{x}{ An object of class \code{"bootdist"} or \code{"density.bootdist"}. } \item{object}{ An object of class \code{"bootdist"}. } \item{main}{an overall title for the plot: see \code{\link{title}}, -default to \code{"Bootstrapped values of parameters"}.} + default to \code{"Bootstrapped values of parameters"}.} \item{enhance}{a logical to get an enhanced plot.} \item{trueval}{when relevant, a numeric vector with the true value of -parameters (for backfitting purposes).} + parameters (for backfitting purposes).} \item{rampcol}{colors to interpolate; must be a valid argument to \code{\link[grDevices]{colorRampPalette}()}.} -\item{nbgrid}{Number of grid points in each direction. Can be scalar or a length-2 integer vector.} -\item{nbcol}{an integer argument, the required number of colors} -\item{\dots}{ Further arguments to be passed to generic methods } +\item{nbgrid}{Number of grid points in each direction. Can be scalar or a + length-2 integer vector.} +\item{nbcol}{An integer argument, the required number of colors} +\item{\dots}{Further arguments to be passed to generic methods or \code{"bootdist"} + objects for \code{density}.} +\item{bw, adjust, kernel}{resp. the smoothing bandwidth, the scaling factor, + the kernel used, see \code{\link{density}}.} +\item{mar}{A numerical vector of the form \code{c(bottom, left, top, right)}, + see \code{\link{par}}.} +\item{lty, col, lwd}{resp. the line type, the color, the line width, + see \code{\link{par}}.} } @@ -84,6 +98,11 @@ parameters (for backfitting purposes).} It is possible to accelerate the bootstrap using parallelization. We recommend you to use \code{parallel = "multicore"}, or \code{parallel = "snow"} if you work on Windows, and to fix \code{ncpus} to the number of available processors. + + \code{density} computes the empirical density of \code{bootdist} objects using the + \code{\link{density}} function (with Gaussian kernel by default). + It returns an object of class \code{density.bootdist} for which \code{print} + and \code{plot} methods are provided. } \value{ @@ -115,8 +134,10 @@ Generic functions: The plot shows the bootstrap estimates with \code{\link{stripchart}} function for univariate parameters and \code{\link{plot}} function for multivariate parameters. } - -} + \item{\code{density}}{ + The density computes empirical densities and return an object of class \code{density.bootdist}. + } + } } @@ -166,6 +187,8 @@ plot(b1, enhance=TRUE) summary(b1) quantile(b1) CIcdfplot(b1, CI.output = "quantile") +density(b1) +plot(density(b1)) # (2) non parametric bootstrap on the same fit # diff --git a/man/bootdistcens.Rd b/man/bootdistcens.Rd index f6e3be86..0c863d93 100644 --- a/man/bootdistcens.Rd +++ b/man/bootdistcens.Rd @@ -3,6 +3,9 @@ \alias{plot.bootdistcens} \alias{print.bootdistcens} \alias{summary.bootdistcens} +\alias{density.bootdistcens} +\alias{plot.density.bootdistcens} +\alias{print.density.bootdistcens} \title{ Bootstrap simulation of uncertainty for censored data} @@ -17,6 +20,9 @@ bootdistcens(f, niter = 1001, silent = TRUE, \method{print}{bootdistcens}(x, \dots) \method{plot}{bootdistcens}(x, \dots) \method{summary}{bootdistcens}(object, \dots) +\method{density}{bootdistcens}(\dots, bw = nrd0, adjust = 1, kernel = "gaussian") +\method{plot}{density.bootdistcens}(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, \dots) +\method{print}{density.bootdistcens}(x, \dots) } @@ -32,7 +38,14 @@ bootdistcens(f, niter = 1001, silent = TRUE, \item{x}{ An object of class \code{"bootdistcens"}.} \item{object}{ An object of class \code{"bootdistcens"}.} -\item{\dots}{ Further arguments to be passed to generic methods.} +\item{\dots}{Further arguments to be passed to generic methods or \code{"bootdistcens"} + objects for \code{density}.} +\item{bw, adjust, kernel}{resp. the smoothing bandwidth, the scaling factor, + the kernel used, see \code{\link{density}}.} +\item{mar}{A numerical vector of the form \code{c(bottom, left, top, right)}, + see \code{\link{par}}.} +\item{lty, col, lwd}{resp. the line type, the color, the line width, + see \code{\link{par}}.} } \details{ @@ -52,10 +65,14 @@ bootdistcens(f, niter = 1001, silent = TRUE, In these last cases, it provides a representation of the joint uncertainty distribution of the fitted parameters. - It is possible to accelerate the bootstrap using parallelization. We recommend you to + It is possible to accelerate the bootstrap using parallelization. We recommend you to use \code{parallel = "multicore"}, or \code{parallel = "snow"} if you work on Windows, and to fix \code{ncpus} to the number of available processors. + \code{density} computes the empirical density of \code{bootdistcens} objects using the + \code{\link{density}} function (with Gaussian kernel by default). + It returns an object of class \code{density.bootdistcens} for which \code{print} + and \code{plot} methods are provided. } \value{ @@ -87,7 +104,9 @@ Generic functions: The plot shows the bootstrap estimates with the \code{\link{stripchart}} function for univariate parameters and \code{\link{plot}} function for multivariate parameters. } - + \item{\code{density}}{ + The density computes empirical densities and return an object of class \code{density.bootdistcens}. + } } } @@ -133,6 +152,7 @@ summary(b1) plot(b1) quantile(b1) CIcdfplot(b1, CI.output = "quantile") +plot(density(b1)) # (2) Estimation of the mean of the normal distribution # by maximum likelihood with the standard deviation fixed at 1 diff --git a/share/todolist.md b/share/todolist.md index 8a423f4e..faee778c 100644 --- a/share/todolist.md +++ b/share/todolist.md @@ -5,7 +5,7 @@ 1. [ ] take into account weights for definition of starting values 2. [ ] take into account weights in every functions and add examples in fitdist, gofstat, plotdist, plotdistcens, descdist, and all plotting functions 3. [x] ajouter des choix de valeurs initiales pour des lois de actuar, vgam (packages considérés comme centro dans la task view (mention core dans lis à la fin) et bien utilisés avec le format classique, actuar, vgam, gamlss.dist) et faire une FAQ associée avec un tableau lisant les dist prises en compte et la méthode associées (moments ou quantiles), voire la formule -4. [ ] Traiter dans la FAQ la question du choix du nombre d'itérations bootstrap - compléter avec un 4.4 en donnant un exemple où on fait varier le nb d'itérations (faut que ça se stabilise) +4. [x] Traiter dans la FAQ la question du choix du nombre d'itérations bootstrap - compléter avec un 4.4 en donnant un exemple où on fait varier le nb d'itérations (faut que ça se stabilise) 5. [ ] Add stats for fits on censored data and the corresponding gofstat function : look at recent papers 6. [ ] consider the method MSE for censored data ? 7. [ ] explore Cullen and Frey for various dist with trimmed linear moments as well as other common distributions diff --git a/tests/t-bootdist.R b/tests/t-bootdist.R index 5d906098..2cc18bb3 100644 --- a/tests/t-bootdist.R +++ b/tests/t-bootdist.R @@ -24,7 +24,7 @@ print(lapply(b1, head)) plot(b1) summary(b1) -# (1) bis test new plot arguments +# (2) new plot arguments #for new graph functions f1 <- fitdist(rgamma(nsample, 2, 3), "gamma") b1 <- bootdist(f1, niter=nbboot, silent=TRUE) @@ -299,4 +299,20 @@ xval <- sort(unique(x)) summary(bootdist(f1, niter = nbboot)) try(summary(bootdist(f2, niter = nbboot))) # not yet developed +# (18) density of bootdist() +# +x <- rlnorm(1e3) +b0 <- bootdist(fitdist(x, "lnorm"), niter = 50) +b1 <- bootdist(fitdist(x, "lnorm"), niter = 100) +b2 <- bootdist(fitdist(x, "lnorm"), niter = 200) + +#d1 <- fitdistrplus:::density.bootdist(b0, b1, b2) +d1 <- density(b0, b1, b2) +str(d1) +plot(d1) +print(d1) +d1 <- density(b1) +str(d1) +plot(d1) +print(d1) \ No newline at end of file diff --git a/vignettes/FAQ.Rmd b/vignettes/FAQ.Rmd index edec83a6..d051994a 100644 --- a/vignettes/FAQ.Rmd +++ b/vignettes/FAQ.Rmd @@ -1410,6 +1410,27 @@ we recommend the use of the package `mc2d` which aims at making such calculation easy and which gives extensive examples of use of such bootstrap samples of parameters estimated using functions of the package `fitdistrplus`. +## How do we choose the bootstrap number? + +Generally, we do not need to choose a number of bootstrap values +as high as the original sample size. +We search a number for which the mean and the standard values become +stable. +In the log-normal example below, it is enough to have 100 bootstrap values. + +```{r, fig.height=4, fig.width=7, warning = FALSE} +f.ln.MME <- fitdist(rlnorm(1000), "lnorm", method = "mme", order = 1:2) +# Bootstrap +b.ln.50 <- bootdist(f.ln.MME, niter = 50) +b.ln.100 <- bootdist(f.ln.MME, niter = 100) +b.ln.200 <- bootdist(f.ln.MME, niter = 200) +b.ln.500 <- bootdist(f.ln.MME, niter = 500) + +d1 <- density(b.ln.50, b.ln.100, b.ln.200, b.ln.500) +plot(d1) +``` + + # How to personalize plots ## Can I personalize the default plot given for an object of class `fitdist` or `fitdistcens`?