Skip to content

Commit 23260fb

Browse files
committed
New function MoE_entropy added. Fixed minor bug when supplying modelNames when G=1 only. Latest CRAN release.
1 parent c43f7b7 commit 23260fb

File tree

9 files changed

+126
-32
lines changed

9 files changed

+126
-32
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: MoEClust
22
Type: Package
3-
Date: 2021-12-19
3+
Date: 2022-03-28
44
Title: Gaussian Parsimonious Clustering Models with Covariates and a Noise Component
5-
Version: 1.4.2
5+
Version: 1.5.0
66
Authors@R: c(person("Keefe", "Murphy", email = "keefe.murphy@mu.ie", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7709-3159")),
77
person("Thomas Brendan", "Murphy", email = "brendan.murphy@ucd.ie", role = "ctb", comment = c(ORCID = "0000-0002-5668-7046")))
88
Description: Clustering via parsimonious Gaussian Mixtures of Experts using the MoEClust models introduced by Murphy and Murphy (2020) <doi:10.1007/s11634-019-00373-8>. This package fits finite Gaussian mixture models with a formula interface for supplying gating and/or expert network covariates using a range of parsimonious covariance parameterisations from the GPCM family via the EM/CEM algorithm. Visualisation of the results of such models using generalised pairs plots and the inclusion of an additional noise component is also facilitated. A greedy forward stepwise search algorithm is provided for identifying the optimal model in terms of the number of components, the GPCM covariance parameterisation, and the subsets of gating/expert network covariates.

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method(MoE_Uncertainty,MoEClust)
4+
S3method(MoE_entropy,MoEClust)
45
S3method(MoE_gpairs,MoEClust)
56
S3method(MoE_plotCrit,MoEClust)
67
S3method(MoE_plotGate,MoEClust)
@@ -41,6 +42,7 @@ export(MoE_control)
4142
export(MoE_crit)
4243
export(MoE_cstep)
4344
export(MoE_dens)
45+
export(MoE_entropy)
4446
export(MoE_estep)
4547
export(MoE_gpairs)
4648
export(MoE_mahala)

R/Functions.R

Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -301,26 +301,26 @@
301301
Identity <- ifelse(is.null(Identity), isTRUE(uni), Identity)
302302
x.names <- colnames(X)
303303
if(!multi) {
304-
mNs <- toupper(modelNames)
305-
if(any(sX <- grepl("X", mNs))) {
306-
mNs <- gsub("X", "E", mNs)
304+
MS <- toupper(modelNames)
305+
if(any(sX <- grepl("X", MS))) {
306+
MS <- gsub("X", "E", MS)
307307
if(verbose &&
308-
all(is.element(mNs, mfg))) message(paste0("'modelNames' which contain 'X' coerced to ", paste(shQuote(mNs[sX]), collapse=" + "), "\n"))
308+
all(is.element(MS, mfg))) message(paste0("'modelNames' which contain 'X' coerced to ", paste(shQuote(MS[sX]), collapse=" + "), "\n"))
309309
}
310-
if(Gany && any(!is.element(mNs, mfg))) stop(paste0("Invalid 'modelNames'", ifelse(uni, " for univariate data", ifelse(low.dim, "", " for high-dimensional data")), "!"), call.=FALSE)
311-
mfg <- mNs
312-
if(!Gall) {
313-
if(any(sZ <- !is.element(mNs, mf1))){
314-
mf1 <- tryCatch(unname(vapply(mNs, function(x) switch(EXPR=x, E=, V="E", EII=, VII="EII", EEI=, VEI=, EVI=, VVI="EEI", EEE=, EVE=, VEE=, VVE=, EEV=, VEV=, EVV=, VVV="EEE"), character(1L))),
310+
if(Gany && any(!is.element(MS, mfg))) stop(paste0("Invalid 'modelNames'", ifelse(uni, " for univariate data", ifelse(low.dim, "", " for high-dimensional data")), "!"), call.=FALSE)
311+
mfg <- MS
312+
if(anyg1) {
313+
if(any(sZ <- !is.element(MS, mf1))) {
314+
mf1 <- tryCatch(unname(vapply(MS, function(x) switch(EXPR=x, E=, V="E", EII=, VII="EII", EEI=, VEI=, EVI=, VVI="EEI", EEE=, EVE=, VEE=, VVE=, EEV=, VEV=, EVV=, VVV="EEE"), character(1L))),
315315
error=function(e) { e$message <- paste0("Invalid 'modelNames' for single component models", ifelse(uni, " for univariate data", ifelse(low.dim, "", " for high-dimensional data")), "!")
316316
stop(e, call.=FALSE) } )
317-
if(isTRUE(verbose)) message(paste0("'modelNames'", ifelse(any(sX), " further", ""), " coerced from ", paste(shQuote(mNs[sZ]), collapse=" + "), " to ", paste(shQuote(mf1[sZ]), collapse=" + "), " where G=1\n"))
318-
}
319-
} else mf1 <- mfg
317+
if(isTRUE(verbose)) message(paste0("'modelNames'", ifelse(any(sX), " further", ""), " coerced from ", paste(shQuote(MS[sZ]), collapse=" + "), " to ", paste(shQuote(mf1[sZ]), collapse=" + "), " where G=1\n"))
318+
} else mf1 <- mfg
319+
}
320320
}
321321
mf1 <- unique(mf1)
322322
mfg <- unique(mfg)
323-
all.mod <- if(all(multi, !uni, Gany)) mclust.options("emModelNames") else unique(c(if(anyg0) mf0, if(anyg1) mf1, if(any(G > 1)) mfg))
323+
all.mod <- if(all(multi, !uni, Gany)) mclust.options("emModelNames") else unique(c(if(anyg0) mf0, if(anyg1) mf1, if(Gany) mfg))
324324
multi <- length(all.mod) > 1L
325325
if(!miss.list) {
326326
if(length(z.list) != len.G) stop(paste0("'z.list' must be a list of length ", len.G), call.=FALSE)
@@ -895,7 +895,7 @@
895895
ERR <- inherits(Mstep, "try-error") || attr(Mstep, "returnCode") < 0
896896
}
897897
if(g > 0 && !ERR) {
898-
mus <- if(exp.g) muX else Mstep$parameters$mean
898+
mus <- if(init.exp) muX else Mstep$parameters$mean
899899
vari <- Mstep$parameters$variance
900900
} else {
901901
mus <- matrix(NA, nrow=n, ncol=0L)
@@ -1653,7 +1653,7 @@
16531653
#'
16541654
#' If \code{model} is an object of class \code{"MoEClust"} with \code{G} components, the number of parameters for the \code{gating.pen} and \code{expert.pen} are \code{length(coef(model$gating))} and \code{G * length(coef(model$expert[[1]]))}, respectively.
16551655
#'
1656-
#' Models with a noise component are facilitated here too provided the extra number of parameters are accounted for by the user.
1656+
#' Models with a noise component are facilitated here too, provided the extra number of parameters are accounted for by the user.
16571657
#' @importFrom matrixStats "rowMaxs"
16581658
#' @importFrom mclust "mclustModelNames" "nVarParams"
16591659
#' @return A simplified array containing the BIC, AIC, number of estimated parameters (\code{df}) and, if \code{z} is supplied, also the ICL, for each of the given input arguments.
@@ -1695,7 +1695,7 @@
16951695
#'
16961696
#' # Make the same comparison with the known number of estimated parameters
16971697
#' (bic3 <- MoE_crit(loglik=ll, n=n, df=model$df, z=z)["bic",])
1698-
#' identical(bic3, bic2) #TRUE
1698+
#' identical(unname(bic3), bic2) #TRUE
16991699
MoE_crit <- Vectorize(function(modelName, loglik, n, d, G, gating.pen = G - 1L, expert.pen = G * d, z = NULL, df = NULL) {
17001700
df <- ifelse(!missing(df), df, nVarParams(modelName=modelName, d=d, G=G) + expert.pen + gating.pen)
17011701
double.ll <- 2 * loglik
@@ -3655,7 +3655,7 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
36553655
#' @param ... Catches unused arguments.
36563656
#'
36573657
#' @details This function is used internally by \code{\link{MoE_gpairs}}, \code{\link{plot.MoEClust}(x, what="gpairs")}, and \code{\link[=as.Mclust.MoEClust]{as.Mclust}}, for visualisation purposes.
3658-
#' @note The \code{modelName} of the resulting \code{variance} object may not correspond to the model name of the \code{"MoEClust"} object, in particular scale, shape, &/or orientation may no longer be constrained across clusters. Usually, the \code{modelName} of the transformed \code{variance} object will be \code{"VVV"}.
3658+
#' @note The \code{modelName} of the resulting \code{variance} object may not correspond to the model name of the \code{"MoEClust"} object, in particular \code{scale}, \code{shape}, &/or \code{orientation} may no longer be constrained across clusters, and \code{cholsigma}, if it was in the input, will be discarded from the output. Usually, the \code{modelName} of the transformed \code{variance} object will be \code{"VVV"}. Furthermore, the output will drop certain row and column names from the output.
36593659
#' @return The \code{variance} component only from the \code{parameters} list from the output of a call to \code{\link{MoE_clust}}, modified accordingly.
36603660
#' @seealso \code{\link{MoE_clust}}, \code{\link{MoE_gpairs}}, \code{\link{plot.MoEClust}}, \code{\link[=as.Mclust.MoEClust]{as.Mclust}}
36613661
#' @references Murphy, K. and Murphy, T. B. (2020). Gaussian parsimonious clustering models with covariates and a noise component. \emph{Advances in Data Analysis and Classification}, 14(2): 293-325. <\doi{10.1007/s11634-019-00373-8}>.
@@ -3965,7 +3965,7 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
39653965
stop("Invalid 'resids': must be coercible to a matrix", call.=FALSE) })
39663966
if(!is.numeric(resids) ||
39673967
anyNA(resids)) stop("Invalid 'resids': must be numeric and contain no missing values", call.=FALSE)
3968-
if(length(squared) > 1 ||
3968+
if(length(squared) > 1 ||
39693969
!is.logical(squared)) stop("'squared' must be a single logical indicator", call.=FALSE)
39703970
identity <- ifelse(is.null(identity), isFALSE(inherits(fit, "mlm")), identity)
39713971
if(length(identity) > 1 ||
@@ -3995,6 +3995,44 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
39953995
}
39963996
}
39973997
}
3998+
3999+
#' Entropy of a fitted MoEClust model
4000+
#'
4001+
#' Calculates the normalised entropy of a fitted MoEClust model.
4002+
#' @param x An object of class \code{"MoEClust"} generated by \code{\link{MoE_clust}}, or an object of class \code{"MoECompare"} generated by \code{\link{MoE_compare}}. Models with gating and/or expert covariates and/or a noise component are facilitated here too.
4003+
#'
4004+
#' @details This function calculates the normalised entropy via \deqn{H=-\frac{1}{n\log(G)}\sum_{i=1}^n\sum_{g=1}^G\hat{z}_{ig}\log(\hat{z}_{ig}),}
4005+
#' where \eqn{n} and \eqn{G} are the sample size and number of components, respectively, and \eqn{\hat{z}_{ig}} is the estimated posterior probability at convergence that observation \eqn{i} belongs to component \eqn{g}.
4006+
#' @return A single number, given by \eqn{1-H}, in the range [0,1], such that \emph{larger} values indicate clearer separation of the clusters.
4007+
#' @note This function will always return a normalised entropy of \code{1} for models fitted using the \code{"CEM"} algorithm (see \code{\link{MoE_control}}), or models with only one component.
4008+
#' @seealso \code{\link{MoE_clust}}, \code{\link{MoE_control}}
4009+
#' @references Murphy, K. and Murphy, T. B. (2020). Gaussian parsimonious clustering models with covariates and a noise component. \emph{Advances in Data Analysis and Classification}, 14(2): 293-325. <\doi{10.1007/s11634-019-00373-8}>.
4010+
#' @author Keefe Murphy - <\email{keefe.murphy@@mu.ie}>
4011+
#' @keywords utility
4012+
#' @usage
4013+
#' MoE_entropy(x)
4014+
#' @export
4015+
#'
4016+
#' @examples
4017+
#' data(ais)
4018+
#' res <- MoE_clust(ais[,3:7], G=3, gating= ~ BMI + sex,
4019+
#' modelNames="EEE", network.data=ais)
4020+
#'
4021+
#' # Calculate the normalised entropy
4022+
#' MoE_entropy(res)
4023+
MoE_entropy <- function(x) {
4024+
UseMethod("MoE_entropy")
4025+
}
4026+
4027+
#' @method MoE_entropy MoEClust
4028+
#' @export
4029+
MoE_entropy.MoEClust <- function(x) {
4030+
x <- if(inherits(x, "MoECompare")) x$optimal else x
4031+
z <- x$z
4032+
G <- ncol(z)
4033+
n <- nrow(z)
4034+
ifelse(attr(x, "Algo") == "CEM" || G == 1, 1L, pmax(0L, 1 - .entropy(z)/(n * log(G))))
4035+
}
39984036

39994037
#' Approximate Hypervolume Estimate
40004038
#'
@@ -4108,6 +4146,11 @@ predict.MoEClust <- function(object, newdata = list(...), resid = FALSE, discar
41084146
unlist(lapply(seq_along(x), function(i) stats::setNames(x[[i]], paste0(names(x[i]), "|", names(x[[i]])))))
41094147
}
41104148

4149+
.entropy <- function(p) {
4150+
p <- p[p > 0]
4151+
sum(-p * log(p))
4152+
}
4153+
41114154
.listof_exp <- function(x, ...) {
41124155
nn <- names(x)
41134156
ll <- length(x)

R/MoEClust.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
#' \itemize{
2525
#' \item{Type: }{Package}
2626
#' \item{Package: }{MoEClust}
27-
#' \item{Version: }{1.4.2}
28-
#' \item{Date: }{2021-12-19 (this version), 2017-11-28 (original release)}
27+
#' \item{Version: }{1.5.0}
28+
#' \item{Date: }{2022-03-28 (this version), 2017-11-28 (original release)}
2929
#' \item{Licence: }{GPL (>=2)}
3030
#' }
3131
#'

inst/NEWS.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,22 @@ __with Gating and Expert Network Covariates__
55
__and a Noise Component__
66
=======================================================
77

8-
### New Features, Improvements, Big Fixes, & Miscellaneous Edits
8+
## MoEClust v1.5.0 - (_15<sup>th</sup> release [minor update]: 2022-03-28_)
9+
### Significant User-Visible Changes
910
* Checks/fixes for empty components extended to components w/ `<=1` observations (or equivalent):
1011
__important__ --- some rare cases which previously would not converge will now converge!
11-
* Fixed two bugs introduced in v1.4.1:
12-
* Fixed _significant_ bug related to `exp.init$malanabis=TRUE` (the default),
13-
to restore correct behaviour whenever multiple `modelNames` are being fitted.
14-
* Allowed `G=0:X` in `MoE_clust` without adding noise for `G>0`, unless specifying models w/ noise.
12+
* Fixed _significant_ bugs related to `exp.init$malanabis=TRUE` (the default) introduced in v1.4.1,
13+
__important__ --- restored correct behaviour, especially when multiple `modelNames` are being fitted!
14+
15+
### New Features & Improvements
16+
* New function `MoE_entropy` added.
1517
* Added `summary` (and related `print`) methods for `MoECriterion` objects.
1618
* Minor speed-up to E-step for `"EEE"` & `"VVV"` models.
19+
20+
### Big Fixes & Miscellaneous Edits
21+
* Allowed `G=0:X` in `MoE_clust` without adding noise for `G>0`, unless
22+
specifying models w/ noise, undoing another bug introduced in v1.4.1.
23+
* Fixed minor bug when supplying `modelNames` when `G=1` only.
1724
* Fixed check on validity of `hc.meth` arg. in `MoE_control`.
1825
* Minor documentation clarifications re: `z.list` in `MoE_control`.
1926

man/MoEClust-package.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/MoE_crit.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/MoE_entropy.Rd

Lines changed: 42 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/expert_covar.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)