format the FAQ

dutangc · dutangc · commit f167e85c3eb6 · 2025-01-12T09:06:12.000+01:00
diff --git a/vignettes/FAQ.Rmd b/vignettes/FAQ.Rmd
@@ -205,8 +205,10 @@ ptexp <- function(q, rate, low, upp)
 }
 n <- 200
 x <- rexp(n); x <- x[x > .5 & x < 3]
-f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))
-f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=.5, upp=3))
+f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), 
+              fix.arg=list(low=min(x), upp=max(x)))
+f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), 
+              fix.arg=list(low=.5, upp=3))
 gofstat(list(f1, f2))
 par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))
@@ -259,8 +261,10 @@ ptiexp <- function(q, rate, low, upp)
 n <- 100; x <- pmax(pmin(rexp(n), 3), .5)
 # the loglikelihood has a discontinous point at the solution
 par(mar=c(4,4,2,1), mfrow=1:2)
-llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200)
-llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200)
+llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5), 
+        min.arg=0, max.arg=.5, lseq=200)
+llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0), 
+        min.arg=3, max.arg=4, lseq=200)
 ```
 
 The first method directly maximizes the log-likelihood $L(l, \theta, u)$; the second
@@ -269,7 +273,8 @@ Inside $[0.5,3]$, the CDF are correctly estimated in both methods but the first
 does not succeed to estimate the true value of the bounds $l,u$.
 ```{r, fig.height=3.5, fig.width=7}
 (f1 <- fitdist(x, "tiexp", method="mle", start=list(rate=3, low=0, upp=20)))
-(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))))
+(f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), 
+               fix.arg=list(low=min(x), upp=max(x))))
 gofstat(list(f1, f2))
 par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5))
@@ -731,25 +736,28 @@ try(fitdist(danishuni$Loss, "burr", upper=1000))
 ```
 Using another algorithm such as the BFGS algorithm helps the convergence. 
 ```{r}
-try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
+try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000, 
+                            optim.method="L-BFGS-B"))
 ```
 The fitted values have the same magnitude and the fits are appropriate.
 ```{r, fig.height=3.5, fig.width=7}
-cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE)
+cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE, fitlwd = 2)
 sapply(list(fitBurr_cvg1, fitBurr_cvg2), coef)
 ```
+
 The `llplot()` function helps in understanding how good is the fit.
 ```{r, warning=FALSE, message=FALSE, fig.height=6, fig.width=6, echo=FALSE}
 llplot(fitBurr_cvg1, fit.show = TRUE)
 llplot(fitBurr_cvg2, fit.show = TRUE)
 ```
+
 The log-likelihood surface is rather flat around the fitted values in shape1/shape2 spaces. 
 We observe a certain dependency so that the product of shape parameters is almost constant.
 ```{r, warning=FALSE}
 print(prod(coef(fitBurr_cvg1)[1:2]), digits=5)
 print(prod(coef(fitBurr_cvg2)[1:2]), digits=5)
 ```
-In terms of computation time, we retrieve that the Nelder-Mead algorithm is slow.
+In terms of computation time, we retrieve that the Nelder-Mead algorithm is slower.
 ```{r}
 system.time(fitdist(danishuni$Loss, "burr", upper=100))
 system.time(fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
@@ -767,11 +775,16 @@ $\delta$ a boundary parameter.
 Let us fit this distribution on the dataset `y` by MLE.
 We define two functions for the densities with and without a `log` argument.
 ```{r}
-dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log)
-pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p)
-qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift
-dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift)
-pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift)
+dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) 
+  dlnorm(x+shift, mean, sigma, log=log)
+pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) 
+  plnorm(q+shift, mean, sigma, log.p=log.p)
+qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) 
+  qlnorm(p, mean, sigma, log.p=log.p)-shift
+dshiftlnorm_no <- function(x, mean, sigma, shift) 
+  dshiftlnorm(x, mean, sigma, shift)
+pshiftlnorm_no <- function(q, mean, sigma, shift) 
+  pshiftlnorm(q, mean, sigma, shift)
 ```
 We now optimize the minus log-likelihood.
 ```{r}
@@ -845,7 +858,8 @@ we use the `constrOptim` wrapping `optim` to take into account linear constraint
 This allows also to use other optimization methods than L-BFGS-B 
 (low-memory BFGS bounded) used in optim.
 ```{r}
-f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)), optim.method="Nelder-Mead")
+f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)), 
+              optim.method="Nelder-Mead")
 summary(f2)
 print(cbind(BFGS=f$estimate, NelderMead=f2$estimate))
 
@@ -1006,7 +1020,8 @@ require("GeneralizedHyperbolic")
 myoptim <- function(fn, par, ui, ci, ...)
 {
   res <- constrOptim(f=fn, theta=par, method="Nelder-Mead", ui=ui, ci=ci, ...)
-  c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian)
+  c(res, convergence=res$convergence, value=res$objective, 
+    par=res$minimum, hessian=res$hessian)
 }
 x <- rnig(1000, 3, 1/2, 1/2, 1/4)
 ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1))
@@ -1075,14 +1090,17 @@ L2 <- function(p)
   (qgeom(1/2, p) - median(x))^2
 L2(1/3) #theoretical value
 par(mfrow=c(1,1), mar=c(4,4,2,1))
-curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), main="squared differences", n=301)
+curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), 
+      main="squared differences", type="s")
 ```
 
 Any value between [1/3, 5/9] minimizes the squared differences.
 Therefore, `fitdist()` may be sensitive to the chosen initial value with deterministic optimization algorithm.
 ```{r}
-fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2), control=list(trace=1, REPORT=1))
-fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20), control=list(trace=1, REPORT=1))
+fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2), 
+        control=list(trace=1, REPORT=1))
+fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20), 
+        control=list(trace=1, REPORT=1))
 ```
 The solution is to use a stochastic algorithm such as simulated annealing (SANN).
 ```{r}
@@ -1120,7 +1138,8 @@ x <- rpois(100, lambda=7.5)
 L2 <- function(lam)
   (qpois(1/2, lambda = lam) - median(x))^2
 par(mfrow=c(1,1), mar=c(4,4,2,1))
-curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), main="squared differences", n=201)
+curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), 
+      main="squared differences", type="s")
 ```
 
 Therefore, using `fitdist()` may be sensitive to the chosen initial value.
@@ -1493,10 +1512,14 @@ data(groundbeef)
 serving <- groundbeef$serving
 fit <- fitdist(serving, "gamma")
 par(mfrow = c(2,2), mar = c(4, 4, 1, 1))
-denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange")
-qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
-cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange", lines01 = TRUE)
-ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
+denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", 
+         fitcol = "orange")
+qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", 
+       line01lty = 2)
+cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", 
+        fitcol = "orange", lines01 = TRUE)
+ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", 
+       line01lty = 2)
 ```
 
 In a similar way, the default plot of an object of class `fitdistcens` can be easily personalized