Skip to content

Commit

Permalink
format the FAQ
Browse files Browse the repository at this point in the history
  • Loading branch information
dutangc committed Jan 12, 2025
1 parent 261e08c commit f167e85
Showing 1 changed file with 46 additions and 23 deletions.
69 changes: 46 additions & 23 deletions vignettes/FAQ.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ ptexp <- function(q, rate, low, upp)
}
n <- 200
x <- rexp(n); x <- x[x > .5 & x < 3]
f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))
f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=.5, upp=3))
f1 <- fitdist(x, "texp", method="mle", start=list(rate=3),
fix.arg=list(low=min(x), upp=max(x)))
f2 <- fitdist(x, "texp", method="mle", start=list(rate=3),
fix.arg=list(low=.5, upp=3))
gofstat(list(f1, f2))
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))
Expand Down Expand Up @@ -259,8 +261,10 @@ ptiexp <- function(q, rate, low, upp)
n <- 100; x <- pmax(pmin(rexp(n), 3), .5)
# the loglikelihood has a discontinous point at the solution
par(mar=c(4,4,2,1), mfrow=1:2)
llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200)
llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200)
llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5),
min.arg=0, max.arg=.5, lseq=200)
llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0),
min.arg=3, max.arg=4, lseq=200)
```

The first method directly maximizes the log-likelihood $L(l, \theta, u)$; the second
Expand All @@ -269,7 +273,8 @@ Inside $[0.5,3]$, the CDF are correctly estimated in both methods but the first
does not succeed to estimate the true value of the bounds $l,u$.
```{r, fig.height=3.5, fig.width=7}
(f1 <- fitdist(x, "tiexp", method="mle", start=list(rate=3, low=0, upp=20)))
(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))))
(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3),
fix.arg=list(low=min(x), upp=max(x))))
gofstat(list(f1, f2))
par(mfrow=c(1,1), mar=c(4,4,2,1))
cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5))
Expand Down Expand Up @@ -731,25 +736,28 @@ try(fitdist(danishuni$Loss, "burr", upper=1000))
```
Using another algorithm such as the BFGS algorithm helps the convergence.
```{r}
try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000,
optim.method="L-BFGS-B"))
```
The fitted values have the same magnitude and the fits are appropriate.
```{r, fig.height=3.5, fig.width=7}
cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE)
cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE, fitlwd = 2)
sapply(list(fitBurr_cvg1, fitBurr_cvg2), coef)
```

The `llplot()` function helps in understanding how good is the fit.
```{r, warning=FALSE, message=FALSE, fig.height=6, fig.width=6, echo=FALSE}
llplot(fitBurr_cvg1, fit.show = TRUE)
llplot(fitBurr_cvg2, fit.show = TRUE)
```

The log-likelihood surface is rather flat around the fitted values in shape1/shape2 spaces.
We observe a certain dependency so that the product of shape parameters is almost constant.
```{r, warning=FALSE}
print(prod(coef(fitBurr_cvg1)[1:2]), digits=5)
print(prod(coef(fitBurr_cvg2)[1:2]), digits=5)
```
In terms of computation time, we retrieve that the Nelder-Mead algorithm is slow.
In terms of computation time, we retrieve that the Nelder-Mead algorithm is slower.
```{r}
system.time(fitdist(danishuni$Loss, "burr", upper=100))
system.time(fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
Expand All @@ -767,11 +775,16 @@ $\delta$ a boundary parameter.
Let us fit this distribution on the dataset `y` by MLE.
We define two functions for the densities with and without a `log` argument.
```{r}
dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log)
pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p)
qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift
dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift)
pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift)
dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE)
dlnorm(x+shift, mean, sigma, log=log)
pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE)
plnorm(q+shift, mean, sigma, log.p=log.p)
qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE)
qlnorm(p, mean, sigma, log.p=log.p)-shift
dshiftlnorm_no <- function(x, mean, sigma, shift)
dshiftlnorm(x, mean, sigma, shift)
pshiftlnorm_no <- function(q, mean, sigma, shift)
pshiftlnorm(q, mean, sigma, shift)
```
We now optimize the minus log-likelihood.
```{r}
Expand Down Expand Up @@ -845,7 +858,8 @@ we use the `constrOptim` wrapping `optim` to take into account linear constraint
This allows also to use other optimization methods than L-BFGS-B
(low-memory BFGS bounded) used in optim.
```{r}
f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)), optim.method="Nelder-Mead")
f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)),
optim.method="Nelder-Mead")
summary(f2)
print(cbind(BFGS=f$estimate, NelderMead=f2$estimate))
Expand Down Expand Up @@ -1006,7 +1020,8 @@ require("GeneralizedHyperbolic")
myoptim <- function(fn, par, ui, ci, ...)
{
res <- constrOptim(f=fn, theta=par, method="Nelder-Mead", ui=ui, ci=ci, ...)
c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian)
c(res, convergence=res$convergence, value=res$objective,
par=res$minimum, hessian=res$hessian)
}
x <- rnig(1000, 3, 1/2, 1/2, 1/4)
ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1))
Expand Down Expand Up @@ -1075,14 +1090,17 @@ L2 <- function(p)
(qgeom(1/2, p) - median(x))^2
L2(1/3) #theoretical value
par(mfrow=c(1,1), mar=c(4,4,2,1))
curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), main="squared differences", n=301)
curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)),
main="squared differences", type="s")
```

Any value between [1/3, 5/9] minimizes the squared differences.
Therefore, `fitdist()` may be sensitive to the chosen initial value with deterministic optimization algorithm.
```{r}
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2), control=list(trace=1, REPORT=1))
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20), control=list(trace=1, REPORT=1))
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2),
control=list(trace=1, REPORT=1))
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20),
control=list(trace=1, REPORT=1))
```
The solution is to use a stochastic algorithm such as simulated annealing (SANN).
```{r}
Expand Down Expand Up @@ -1120,7 +1138,8 @@ x <- rpois(100, lambda=7.5)
L2 <- function(lam)
(qpois(1/2, lambda = lam) - median(x))^2
par(mfrow=c(1,1), mar=c(4,4,2,1))
curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), main="squared differences", n=201)
curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)),
main="squared differences", type="s")
```

Therefore, using `fitdist()` may be sensitive to the chosen initial value.
Expand Down Expand Up @@ -1493,10 +1512,14 @@ data(groundbeef)
serving <- groundbeef$serving
fit <- fitdist(serving, "gamma")
par(mfrow = c(2,2), mar = c(4, 4, 1, 1))
denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange")
qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange", lines01 = TRUE)
ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)",
fitcol = "orange")
qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey",
line01lty = 2)
cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)",
fitcol = "orange", lines01 = TRUE)
ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey",
line01lty = 2)
```

In a similar way, the default plot of an object of class `fitdistcens` can be easily personalized
Expand Down

0 comments on commit f167e85

Please sign in to comment.