|
301 | 301 | Identity <- ifelse(is.null(Identity), isTRUE(uni), Identity)
|
302 | 302 | x.names <- colnames(X)
|
303 | 303 | if(!multi) {
|
304 |
| - mNs <- toupper(modelNames) |
305 |
| - if(any(sX <- grepl("X", mNs))) { |
306 |
| - mNs <- gsub("X", "E", mNs) |
| 304 | + MS <- toupper(modelNames) |
| 305 | + if(any(sX <- grepl("X", MS))) { |
| 306 | + MS <- gsub("X", "E", MS) |
307 | 307 | if(verbose &&
|
308 |
| - all(is.element(mNs, mfg))) message(paste0("'modelNames' which contain 'X' coerced to ", paste(shQuote(mNs[sX]), collapse=" + "), "\n")) |
| 308 | + all(is.element(MS, mfg))) message(paste0("'modelNames' which contain 'X' coerced to ", paste(shQuote(MS[sX]), collapse=" + "), "\n")) |
309 | 309 | }
|
310 |
| - if(Gany && any(!is.element(mNs, mfg))) stop(paste0("Invalid 'modelNames'", ifelse(uni, " for univariate data", ifelse(low.dim, "", " for high-dimensional data")), "!"), call.=FALSE) |
311 |
| - mfg <- mNs |
312 |
| - if(!Gall) { |
313 |
| - if(any(sZ <- !is.element(mNs, mf1))){ |
314 |
| - mf1 <- tryCatch(unname(vapply(mNs, function(x) switch(EXPR=x, E=, V="E", EII=, VII="EII", EEI=, VEI=, EVI=, VVI="EEI", EEE=, EVE=, VEE=, VVE=, EEV=, VEV=, EVV=, VVV="EEE"), character(1L))), |
| 310 | + if(Gany && any(!is.element(MS, mfg))) stop(paste0("Invalid 'modelNames'", ifelse(uni, " for univariate data", ifelse(low.dim, "", " for high-dimensional data")), "!"), call.=FALSE) |
| 311 | + mfg <- MS |
| 312 | + if(anyg1) { |
| 313 | + if(any(sZ <- !is.element(MS, mf1))) { |
| 314 | + mf1 <- tryCatch(unname(vapply(MS, function(x) switch(EXPR=x, E=, V="E", EII=, VII="EII", EEI=, VEI=, EVI=, VVI="EEI", EEE=, EVE=, VEE=, VVE=, EEV=, VEV=, EVV=, VVV="EEE"), character(1L))), |
315 | 315 | error=function(e) { e$message <- paste0("Invalid 'modelNames' for single component models", ifelse(uni, " for univariate data", ifelse(low.dim, "", " for high-dimensional data")), "!")
|
316 | 316 | stop(e, call.=FALSE) } )
|
317 |
| - if(isTRUE(verbose)) message(paste0("'modelNames'", ifelse(any(sX), " further", ""), " coerced from ", paste(shQuote(mNs[sZ]), collapse=" + "), " to ", paste(shQuote(mf1[sZ]), collapse=" + "), " where G=1\n")) |
318 |
| - } |
319 |
| - } else mf1 <- mfg |
| 317 | + if(isTRUE(verbose)) message(paste0("'modelNames'", ifelse(any(sX), " further", ""), " coerced from ", paste(shQuote(MS[sZ]), collapse=" + "), " to ", paste(shQuote(mf1[sZ]), collapse=" + "), " where G=1\n")) |
| 318 | + } else mf1 <- mfg |
| 319 | + } |
320 | 320 | }
|
321 | 321 | mf1 <- unique(mf1)
|
322 | 322 | mfg <- unique(mfg)
|
323 |
| - all.mod <- if(all(multi, !uni, Gany)) mclust.options("emModelNames") else unique(c(if(anyg0) mf0, if(anyg1) mf1, if(any(G > 1)) mfg)) |
| 323 | + all.mod <- if(all(multi, !uni, Gany)) mclust.options("emModelNames") else unique(c(if(anyg0) mf0, if(anyg1) mf1, if(Gany) mfg)) |
324 | 324 | multi <- length(all.mod) > 1L
|
325 | 325 | if(!miss.list) {
|
326 | 326 | if(length(z.list) != len.G) stop(paste0("'z.list' must be a list of length ", len.G), call.=FALSE)
|
|
895 | 895 | ERR <- inherits(Mstep, "try-error") || attr(Mstep, "returnCode") < 0
|
896 | 896 | }
|
897 | 897 | if(g > 0 && !ERR) {
|
898 |
| - mus <- if(exp.g) muX else Mstep$parameters$mean |
| 898 | + mus <- if(init.exp) muX else Mstep$parameters$mean |
899 | 899 | vari <- Mstep$parameters$variance
|
900 | 900 | } else {
|
901 | 901 | mus <- matrix(NA, nrow=n, ncol=0L)
|
|
1653 | 1653 | #'
|
1654 | 1654 | #' If \code{model} is an object of class \code{"MoEClust"} with \code{G} components, the number of parameters for the \code{gating.pen} and \code{expert.pen} are \code{length(coef(model$gating))} and \code{G * length(coef(model$expert[[1]]))}, respectively.
|
1655 | 1655 | #'
|
1656 |
| -#' Models with a noise component are facilitated here too provided the extra number of parameters are accounted for by the user. |
| 1656 | +#' Models with a noise component are facilitated here too, provided the extra number of parameters are accounted for by the user. |
1657 | 1657 | #' @importFrom matrixStats "rowMaxs"
|
1658 | 1658 | #' @importFrom mclust "mclustModelNames" "nVarParams"
|
1659 | 1659 | #' @return A simplified array containing the BIC, AIC, number of estimated parameters (\code{df}) and, if \code{z} is supplied, also the ICL, for each of the given input arguments.
|
|
1695 | 1695 | #'
|
1696 | 1696 | #' # Make the same comparison with the known number of estimated parameters
|
1697 | 1697 | #' (bic3 <- MoE_crit(loglik=ll, n=n, df=model$df, z=z)["bic",])
|
1698 |
| -#' identical(bic3, bic2) #TRUE |
| 1698 | +#' identical(unname(bic3), bic2) #TRUE |
1699 | 1699 | MoE_crit <- Vectorize(function(modelName, loglik, n, d, G, gating.pen = G - 1L, expert.pen = G * d, z = NULL, df = NULL) {
|
1700 | 1700 | df <- ifelse(!missing(df), df, nVarParams(modelName=modelName, d=d, G=G) + expert.pen + gating.pen)
|
1701 | 1701 | double.ll <- 2 * loglik
|
@@ -3655,7 +3655,7 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
|
3655 | 3655 | #' @param ... Catches unused arguments.
|
3656 | 3656 | #'
|
3657 | 3657 | #' @details This function is used internally by \code{\link{MoE_gpairs}}, \code{\link{plot.MoEClust}(x, what="gpairs")}, and \code{\link[=as.Mclust.MoEClust]{as.Mclust}}, for visualisation purposes.
|
3658 |
| -#' @note The \code{modelName} of the resulting \code{variance} object may not correspond to the model name of the \code{"MoEClust"} object, in particular scale, shape, &/or orientation may no longer be constrained across clusters. Usually, the \code{modelName} of the transformed \code{variance} object will be \code{"VVV"}. |
| 3658 | +#' @note The \code{modelName} of the resulting \code{variance} object may not correspond to the model name of the \code{"MoEClust"} object, in particular \code{scale}, \code{shape}, &/or \code{orientation} may no longer be constrained across clusters, and \code{cholsigma}, if it was in the input, will be discarded from the output. Usually, the \code{modelName} of the transformed \code{variance} object will be \code{"VVV"}. Furthermore, the output will drop certain row and column names from the output. |
3659 | 3659 | #' @return The \code{variance} component only from the \code{parameters} list from the output of a call to \code{\link{MoE_clust}}, modified accordingly.
|
3660 | 3660 | #' @seealso \code{\link{MoE_clust}}, \code{\link{MoE_gpairs}}, \code{\link{plot.MoEClust}}, \code{\link[=as.Mclust.MoEClust]{as.Mclust}}
|
3661 | 3661 | #' @references Murphy, K. and Murphy, T. B. (2020). Gaussian parsimonious clustering models with covariates and a noise component. \emph{Advances in Data Analysis and Classification}, 14(2): 293-325. <\doi{10.1007/s11634-019-00373-8}>.
|
@@ -3965,7 +3965,7 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
|
3965 | 3965 | stop("Invalid 'resids': must be coercible to a matrix", call.=FALSE) })
|
3966 | 3966 | if(!is.numeric(resids) ||
|
3967 | 3967 | anyNA(resids)) stop("Invalid 'resids': must be numeric and contain no missing values", call.=FALSE)
|
3968 |
| - if(length(squared) > 1 || |
| 3968 | + if(length(squared) > 1 || |
3969 | 3969 | !is.logical(squared)) stop("'squared' must be a single logical indicator", call.=FALSE)
|
3970 | 3970 | identity <- ifelse(is.null(identity), isFALSE(inherits(fit, "mlm")), identity)
|
3971 | 3971 | if(length(identity) > 1 ||
|
@@ -3995,6 +3995,44 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
|
3995 | 3995 | }
|
3996 | 3996 | }
|
3997 | 3997 | }
|
| 3998 | + |
| 3999 | +#' Entropy of a fitted MoEClust model |
| 4000 | +#' |
| 4001 | +#' Calculates the normalised entropy of a fitted MoEClust model. |
| 4002 | +#' @param x An object of class \code{"MoEClust"} generated by \code{\link{MoE_clust}}, or an object of class \code{"MoECompare"} generated by \code{\link{MoE_compare}}. Models with gating and/or expert covariates and/or a noise component are facilitated here too. |
| 4003 | +#' |
| 4004 | +#' @details This function calculates the normalised entropy via \deqn{H=-\frac{1}{n\log(G)}\sum_{i=1}^n\sum_{g=1}^G\hat{z}_{ig}\log(\hat{z}_{ig}),} |
| 4005 | +#' where \eqn{n} and \eqn{G} are the sample size and number of components, respectively, and \eqn{\hat{z}_{ig}} is the estimated posterior probability at convergence that observation \eqn{i} belongs to component \eqn{g}. |
| 4006 | +#' @return A single number, given by \eqn{1-H}, in the range [0,1], such that \emph{larger} values indicate clearer separation of the clusters. |
| 4007 | +#' @note This function will always return a normalised entropy of \code{1} for models fitted using the \code{"CEM"} algorithm (see \code{\link{MoE_control}}), or models with only one component. |
| 4008 | +#' @seealso \code{\link{MoE_clust}}, \code{\link{MoE_control}} |
| 4009 | +#' @references Murphy, K. and Murphy, T. B. (2020). Gaussian parsimonious clustering models with covariates and a noise component. \emph{Advances in Data Analysis and Classification}, 14(2): 293-325. <\doi{10.1007/s11634-019-00373-8}>. |
| 4010 | +#' @author Keefe Murphy - <\email{keefe.murphy@@mu.ie}> |
| 4011 | +#' @keywords utility |
| 4012 | +#' @usage |
| 4013 | +#' MoE_entropy(x) |
| 4014 | +#' @export |
| 4015 | +#' |
| 4016 | +#' @examples |
| 4017 | +#' data(ais) |
| 4018 | +#' res <- MoE_clust(ais[,3:7], G=3, gating= ~ BMI + sex, |
| 4019 | +#' modelNames="EEE", network.data=ais) |
| 4020 | +#' |
| 4021 | +#' # Calculate the normalised entropy |
| 4022 | +#' MoE_entropy(res) |
| 4023 | + MoE_entropy <- function(x) { |
| 4024 | + UseMethod("MoE_entropy") |
| 4025 | + } |
| 4026 | + |
| 4027 | +#' @method MoE_entropy MoEClust |
| 4028 | +#' @export |
| 4029 | + MoE_entropy.MoEClust <- function(x) { |
| 4030 | + x <- if(inherits(x, "MoECompare")) x$optimal else x |
| 4031 | + z <- x$z |
| 4032 | + G <- ncol(z) |
| 4033 | + n <- nrow(z) |
| 4034 | + ifelse(attr(x, "Algo") == "CEM" || G == 1, 1L, pmax(0L, 1 - .entropy(z)/(n * log(G)))) |
| 4035 | + } |
3998 | 4036 |
|
3999 | 4037 | #' Approximate Hypervolume Estimate
|
4000 | 4038 | #'
|
@@ -4108,6 +4146,11 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
|
4108 | 4146 | unlist(lapply(seq_along(x), function(i) stats::setNames(x[[i]], paste0(names(x[i]), "|", names(x[[i]])))))
|
4109 | 4147 | }
|
4110 | 4148 |
|
| 4149 | + .entropy <- function(p) { |
| 4150 | + p <- p[p > 0] |
| 4151 | + sum(-p * log(p)) |
| 4152 | + } |
| 4153 | + |
4111 | 4154 | .listof_exp <- function(x, ...) {
|
4112 | 4155 | nn <- names(x)
|
4113 | 4156 | ll <- length(x)
|
|
0 commit comments