@@ -205,8 +205,10 @@ ptexp <- function(q, rate, low, upp)
205
205
}
206
206
n <- 200
207
207
x <- rexp(n); x <- x[x > .5 & x < 3]
208
- f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))
209
- f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=.5, upp=3))
208
+ f1 <- fitdist(x, "texp", method="mle", start=list(rate=3),
209
+ fix.arg=list(low=min(x), upp=max(x)))
210
+ f2 <- fitdist(x, "texp", method="mle", start=list(rate=3),
211
+ fix.arg=list(low=.5, upp=3))
210
212
gofstat(list(f1, f2))
211
213
par(mfrow=c(1,1), mar=c(4,4,2,1))
212
214
cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))
@@ -259,8 +261,10 @@ ptiexp <- function(q, rate, low, upp)
259
261
n <- 100; x <- pmax(pmin(rexp(n), 3), .5)
260
262
# the loglikelihood has a discontinous point at the solution
261
263
par(mar=c(4,4,2,1), mfrow=1:2)
262
- llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200)
263
- llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200)
264
+ llcurve(x, "tiexp", plot.arg="low", fix.arg = list(rate=2, upp=5),
265
+ min.arg=0, max.arg=.5, lseq=200)
266
+ llcurve(x, "tiexp", plot.arg="upp", fix.arg = list(rate=2, low=0),
267
+ min.arg=3, max.arg=4, lseq=200)
264
268
```
265
269
266
270
The first method directly maximizes the log-likelihood $L(l, \theta, u)$; the second
@@ -269,7 +273,8 @@ Inside $[0.5,3]$, the CDF are correctly estimated in both methods but the first
269
273
does not succeed to estimate the true value of the bounds $l,u$.
270
274
``` {r, fig.height=3.5, fig.width=7}
271
275
(f1 <- fitdist(x, "tiexp", method="mle", start=list(rate=3, low=0, upp=20)))
272
- (f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))))
276
+ (f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3),
277
+ fix.arg=list(low=min(x), upp=max(x))))
273
278
gofstat(list(f1, f2))
274
279
par(mfrow=c(1,1), mar=c(4,4,2,1))
275
280
cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5))
@@ -731,25 +736,28 @@ try(fitdist(danishuni$Loss, "burr", upper=1000))
731
736
```
732
737
Using another algorithm such as the BFGS algorithm helps the convergence.
733
738
``` {r}
734
- try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
739
+ try(fitBurr_cvg2 <- fitdist(danishuni$Loss, "burr", upper=1000,
740
+ optim.method="L-BFGS-B"))
735
741
```
736
742
The fitted values have the same magnitude and the fits are appropriate.
737
743
``` {r, fig.height=3.5, fig.width=7}
738
- cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE)
744
+ cdfcomp(list(fitBurr_cvg1, fitBurr_cvg2), xlogscale = TRUE, fitlwd = 2 )
739
745
sapply(list(fitBurr_cvg1, fitBurr_cvg2), coef)
740
746
```
747
+
741
748
The ` llplot() ` function helps in understanding how good is the fit.
742
749
``` {r, warning=FALSE, message=FALSE, fig.height=6, fig.width=6, echo=FALSE}
743
750
llplot(fitBurr_cvg1, fit.show = TRUE)
744
751
llplot(fitBurr_cvg2, fit.show = TRUE)
745
752
```
753
+
746
754
The log-likelihood surface is rather flat around the fitted values in shape1/shape2 spaces.
747
755
We observe a certain dependency so that the product of shape parameters is almost constant.
748
756
``` {r, warning=FALSE}
749
757
print(prod(coef(fitBurr_cvg1)[1:2]), digits=5)
750
758
print(prod(coef(fitBurr_cvg2)[1:2]), digits=5)
751
759
```
752
- In terms of computation time, we retrieve that the Nelder-Mead algorithm is slow .
760
+ In terms of computation time, we retrieve that the Nelder-Mead algorithm is slower .
753
761
``` {r}
754
762
system.time(fitdist(danishuni$Loss, "burr", upper=100))
755
763
system.time(fitdist(danishuni$Loss, "burr", upper=1000, optim.method="L-BFGS-B"))
@@ -767,11 +775,16 @@ $\delta$ a boundary parameter.
767
775
Let us fit this distribution on the dataset ` y ` by MLE.
768
776
We define two functions for the densities with and without a ` log ` argument.
769
777
``` {r}
770
- dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log)
771
- pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p)
772
- qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift
773
- dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift)
774
- pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift)
778
+ dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE)
779
+ dlnorm(x+shift, mean, sigma, log=log)
780
+ pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE)
781
+ plnorm(q+shift, mean, sigma, log.p=log.p)
782
+ qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE)
783
+ qlnorm(p, mean, sigma, log.p=log.p)-shift
784
+ dshiftlnorm_no <- function(x, mean, sigma, shift)
785
+ dshiftlnorm(x, mean, sigma, shift)
786
+ pshiftlnorm_no <- function(q, mean, sigma, shift)
787
+ pshiftlnorm(q, mean, sigma, shift)
775
788
```
776
789
We now optimize the minus log-likelihood.
777
790
``` {r}
@@ -845,7 +858,8 @@ we use the `constrOptim` wrapping `optim` to take into account linear constraint
845
858
This allows also to use other optimization methods than L-BFGS-B
846
859
(low-memory BFGS bounded) used in optim.
847
860
``` {r}
848
- f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)), optim.method="Nelder-Mead")
861
+ f2 <- fitdist(y, "shiftlnorm", start=start, lower=c(-Inf, 0, -min(y)),
862
+ optim.method="Nelder-Mead")
849
863
summary(f2)
850
864
print(cbind(BFGS=f$estimate, NelderMead=f2$estimate))
851
865
@@ -1006,7 +1020,8 @@ require("GeneralizedHyperbolic")
1006
1020
myoptim <- function(fn, par, ui, ci, ...)
1007
1021
{
1008
1022
res <- constrOptim(f=fn, theta=par, method="Nelder-Mead", ui=ui, ci=ci, ...)
1009
- c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian)
1023
+ c(res, convergence=res$convergence, value=res$objective,
1024
+ par=res$minimum, hessian=res$hessian)
1010
1025
}
1011
1026
x <- rnig(1000, 3, 1/2, 1/2, 1/4)
1012
1027
ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1))
@@ -1075,14 +1090,17 @@ L2 <- function(p)
1075
1090
(qgeom(1/2, p) - median(x))^2
1076
1091
L2(1/3) #theoretical value
1077
1092
par(mfrow=c(1,1), mar=c(4,4,2,1))
1078
- curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), main="squared differences", n=301)
1093
+ curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)),
1094
+ main="squared differences", type="s")
1079
1095
```
1080
1096
1081
1097
Any value between [ 1/3, 5/9] minimizes the squared differences.
1082
1098
Therefore, ` fitdist() ` may be sensitive to the chosen initial value with deterministic optimization algorithm.
1083
1099
``` {r}
1084
- fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2), control=list(trace=1, REPORT=1))
1085
- fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20), control=list(trace=1, REPORT=1))
1100
+ fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/2),
1101
+ control=list(trace=1, REPORT=1))
1102
+ fitdist(x, "geom", method="qme", probs=1/2, start=list(prob=1/20),
1103
+ control=list(trace=1, REPORT=1))
1086
1104
```
1087
1105
The solution is to use a stochastic algorithm such as simulated annealing (SANN).
1088
1106
``` {r}
@@ -1120,7 +1138,8 @@ x <- rpois(100, lambda=7.5)
1120
1138
L2 <- function(lam)
1121
1139
(qpois(1/2, lambda = lam) - median(x))^2
1122
1140
par(mfrow=c(1,1), mar=c(4,4,2,1))
1123
- curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), main="squared differences", n=201)
1141
+ curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)),
1142
+ main="squared differences", type="s")
1124
1143
```
1125
1144
1126
1145
Therefore, using ` fitdist() ` may be sensitive to the chosen initial value.
@@ -1493,10 +1512,14 @@ data(groundbeef)
1493
1512
serving <- groundbeef$serving
1494
1513
fit <- fitdist(serving, "gamma")
1495
1514
par(mfrow = c(2,2), mar = c(4, 4, 1, 1))
1496
- denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange")
1497
- qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
1498
- cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)", fitcol = "orange", lines01 = TRUE)
1499
- ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey", line01lty = 2)
1515
+ denscomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)",
1516
+ fitcol = "orange")
1517
+ qqcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey",
1518
+ line01lty = 2)
1519
+ cdfcomp(fit, addlegend = FALSE, main = "", xlab = "serving sizes (g)",
1520
+ fitcol = "orange", lines01 = TRUE)
1521
+ ppcomp(fit, addlegend = FALSE, main = "", fitpch = 16, fitcol = "grey",
1522
+ line01lty = 2)
1500
1523
```
1501
1524
1502
1525
In a similar way, the default plot of an object of class ` fitdistcens ` can be easily personalized
0 commit comments