Skip to content

Commit f167e85

Browse files
committed
format the FAQ
1 parent 261e08c commit f167e85

File tree

1 file changed

+46
-23
lines changed

1 file changed

+46
-23
lines changed

vignettes/FAQ.Rmd

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,10 @@ ptexp <- function(q, rate, low, upp)
205205
}
206206
n <- 200
207207
x <- rexp(n); x <- x[x > .5 & x < 3]
208-
f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))
209-
f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=.5, upp=3))
208+
f1 <- fitdist(x, "texp", method="mle", start=list(rate=3),
209+
fix.arg=list(low=min(x), upp=max(x)))
210+
f2 <- fitdist(x, "texp", method="mle", start=list(rate=3),
211+
fix.arg=list(low=.5, upp=3))
210212
gofstat(list(f1, f2))
211213
par(mfrow=c(1,1), mar=c(4,4,2,1))
212214
cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))
@@ -259,8 +261,10 @@ ptiexp <- function(q, rate, low, upp)
259261
n <- 100; x <- pmax(pmin(rexp(n), 3), .5)
260262
# the loglikelihood has a discontinous point at the solution
261263
par(mar=c(4,4,2,1), mfrow=1:2)
262-
llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200)
263-
llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200)
264+
llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5),
265+
min.arg=0, max.arg=.5, lseq=200)
266+
llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0),
267+
min.arg=3, max.arg=4, lseq=200)
264268
```
265269

266270
The first method directly maximizes the log-likelihood $L(l, \theta, u)$; the second
@@ -269,7 +273,8 @@ Inside $[0.5,3]$, the CDF are correctly estimated in both methods but the first
269273
does not succeed to estimate the true value of the bounds $l,u$.
270274
```{r, fig.height=3.5, fig.width=7}
271275
(f1 <- fitdist(x, "tiexp", method="mle", start=list(rate=3, low=0, upp=20)))
272-
(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))))
276+
(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3),
277+
fix.arg=list(low=min(x), upp=max(x))))
273278
gofstat(list(f1, f2))
274279
par(mfrow=c(1,1), mar=c(4,4,2,1))
275280
cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5))
@@ -731,25 +736,28 @@ try(fitdist(danishuni$Loss, "burr", upper=1000))
731736
```
732737
Using another algorithm such as the BFGS algorithm helps the convergence.
733738
```{r}
734-
try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
739+
try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000,
740+
optim.method="L-BFGS-B"))
735741
```
736742
The fitted values have the same magnitude and the fits are appropriate.
737743
```{r, fig.height=3.5, fig.width=7}
738-
cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE)
744+
cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE, fitlwd = 2)
739745
sapply(list(fitBurr_cvg1, fitBurr_cvg2), coef)
740746
```
747+
741748
The `llplot()` function helps in understanding how good is the fit.
742749
```{r, warning=FALSE, message=FALSE, fig.height=6, fig.width=6, echo=FALSE}
743750
llplot(fitBurr_cvg1, fit.show = TRUE)
744751
llplot(fitBurr_cvg2, fit.show = TRUE)
745752
```
753+
746754
The log-likelihood surface is rather flat around the fitted values in shape1/shape2 spaces.
747755
We observe a certain dependency so that the product of shape parameters is almost constant.
748756
```{r, warning=FALSE}
749757
print(prod(coef(fitBurr_cvg1)[1:2]), digits=5)
750758
print(prod(coef(fitBurr_cvg2)[1:2]), digits=5)
751759
```
752-
In terms of computation time, we retrieve that the Nelder-Mead algorithm is slow.
760+
In terms of computation time, we retrieve that the Nelder-Mead algorithm is slower.
753761
```{r}
754762
system.time(fitdist(danishuni$Loss, "burr", upper=100))
755763
system.time(fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
@@ -767,11 +775,16 @@ $\delta$ a boundary parameter.
767775
Let us fit this distribution on the dataset `y` by MLE.
768776
We define two functions for the densities with and without a `log` argument.
769777
```{r}
770-
dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log)
771-
pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p)
772-
qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift
773-
dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift)
774-
pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift)
778+
dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE)
779+
dlnorm(x+shift, mean, sigma, log=log)
780+
pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE)
781+
plnorm(q+shift, mean, sigma, log.p=log.p)
782+
qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE)
783+
qlnorm(p, mean, sigma, log.p=log.p)-shift
784+
dshiftlnorm_no <- function(x, mean, sigma, shift)
785+
dshiftlnorm(x, mean, sigma, shift)
786+
pshiftlnorm_no <- function(q, mean, sigma, shift)
787+
pshiftlnorm(q, mean, sigma, shift)
775788
```
776789
We now optimize the minus log-likelihood.
777790
```{r}
@@ -845,7 +858,8 @@ we use the `constrOptim` wrapping `optim` to take into account linear constraint
845858
This allows also to use other optimization methods than L-BFGS-B
846859
(low-memory BFGS bounded) used in optim.
847860
```{r}
848-
f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)), optim.method="Nelder-Mead")
861+
f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)),
862+
optim.method="Nelder-Mead")
849863
summary(f2)
850864
print(cbind(BFGS=f$estimate, NelderMead=f2$estimate))
851865
@@ -1006,7 +1020,8 @@ require("GeneralizedHyperbolic")
10061020
myoptim <- function(fn, par, ui, ci, ...)
10071021
{
10081022
res <- constrOptim(f=fn, theta=par, method="Nelder-Mead", ui=ui, ci=ci, ...)
1009-
c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian)
1023+
c(res, convergence=res$convergence, value=res$objective,
1024+
par=res$minimum, hessian=res$hessian)
10101025
}
10111026
x <- rnig(1000, 3, 1/2, 1/2, 1/4)
10121027
ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1))
@@ -1075,14 +1090,17 @@ L2 <- function(p)
10751090
(qgeom(1/2, p) - median(x))^2
10761091
L2(1/3) #theoretical value
10771092
par(mfrow=c(1,1), mar=c(4,4,2,1))
1078-
curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), main="squared differences", n=301)
1093+
curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)),
1094+
main="squared differences", type="s")
10791095
```
10801096

10811097
Any value between [1/3, 5/9] minimizes the squared differences.
10821098
Therefore, `fitdist()` may be sensitive to the chosen initial value with deterministic optimization algorithm.
10831099
```{r}
1084-
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2), control=list(trace=1, REPORT=1))
1085-
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20), control=list(trace=1, REPORT=1))
1100+
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2),
1101+
control=list(trace=1, REPORT=1))
1102+
fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20),
1103+
control=list(trace=1, REPORT=1))
10861104
```
10871105
The solution is to use a stochastic algorithm such as simulated annealing (SANN).
10881106
```{r}
@@ -1120,7 +1138,8 @@ x <- rpois(100, lambda=7.5)
11201138
L2 <- function(lam)
11211139
(qpois(1/2, lambda = lam) - median(x))^2
11221140
par(mfrow=c(1,1), mar=c(4,4,2,1))
1123-
curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), main="squared differences", n=201)
1141+
curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)),
1142+
main="squared differences", type="s")
11241143
```
11251144

11261145
Therefore, using `fitdist()` may be sensitive to the chosen initial value.
@@ -1493,10 +1512,14 @@ data(groundbeef)
14931512
serving <- groundbeef$serving
14941513
fit <- fitdist(serving, "gamma")
14951514
par(mfrow = c(2,2), mar = c(4, 4, 1, 1))
1496-
denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange")
1497-
qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
1498-
cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange", lines01 = TRUE)
1499-
ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
1515+
denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)",
1516+
fitcol = "orange")
1517+
qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey",
1518+
line01lty = 2)
1519+
cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)",
1520+
fitcol = "orange", lines01 = TRUE)
1521+
ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey",
1522+
line01lty = 2)
15001523
```
15011524

15021525
In a similar way, the default plot of an object of class `fitdistcens` can be easily personalized

0 commit comments

Comments
 (0)