From 321468244514287703aae93d902b182475de7072 Mon Sep 17 00:00:00 2001 From: philchalmers Date: Sat, 9 Nov 2024 23:15:40 -0500 Subject: [PATCH] M2 no longer needs row-removal --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/M2.R | 63 +++++++++++++++++++---------------------------------- R/utils.R | 9 ++++++++ man/M2.Rd | 7 +----- man/mirt.Rd | 2 +- 6 files changed, 39 insertions(+), 48 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0d82c0c33..9c81394e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: mirt -Version: 1.42.5 +Version: 1.42.6 Type: Package Title: Multidimensional Item Response Theory Authors@R: c( person("Phil", family="Chalmers", email = diff --git a/NEWS.md b/NEWS.md index b647045ee..db83d5801 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # Changes in mirt 1.43 +- `M2()` family no longer requires row-wise removal of missing data to behave + correctly. As such, the `na.rm` argument has been removed as it is no longer + required (requested by Ulrich Schroeders) + - Added support for latent regression ACOV/SE estimation with Oakes method in `mirt()` diff --git a/R/M2.R b/R/M2.R index 6707d884e..4e01a3339 100644 --- a/R/M2.R +++ b/R/M2.R @@ -24,8 +24,6 @@ #' @param quadpts number of quadrature points to use during estimation. If \code{NULL}, #' a suitable value will be chosen based #' on the rubric found in \code{\link{fscores}} -#' @param na.rm logical; remove rows with any missing values? The M2 family of statistics -#' requires a complete dataset in order to be well defined #' @param calcNull logical; calculate statistics for the null model as well? #' Allows for statistics such as the limited information TLI and CFI. Only valid when items all #' have a suitable null model (e.g., those created via \code{\link{createItem}} will not) @@ -77,8 +75,7 @@ #' # M2 with missing data present #' dat[sample(1:prod(dim(dat)), 250)] <- NA #' mod2 <- mirt(dat, 1) -#' # Compute stats by removing missing data row-wise -#' M2(mod2, na.rm = TRUE) +#' M2(mod2) #' #' # C2 statistic (useful when polytomous IRT models have too few df) #' pmod <- mirt(Science, 1) @@ -88,7 +85,7 @@ #' M2(pmod, type = 'C2') #' #' } -M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, theta_lim = c(-6, 6), +M2 <- function(obj, type="M2*", calcNull = TRUE, quadpts = NULL, theta_lim = c(-6, 6), CI = .9, residmat = FALSE, QMC = FALSE, suppress = 1, ...){ impute <- 0 if(is(obj, 'MixtureModel')) @@ -110,14 +107,15 @@ M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, th ret <- list() group <- if(is.null(attr(obj, 'MG'))) 1 else attr(obj, 'MG') nitems <- ncol(obj@Data$data) - if(any(is.na(obj@Data$data))) - stop('M2 can not be calculated for data with missing values.', call.=FALSE) + # if(any(is.na(obj@Data$data))) + # stop('M2 can not be calculated for data with missing values.', call.=FALSE) adj <- obj@Data$mins dat <- t(t(obj@Data$data) - adj) - N <- nrow(dat) - p <- colMeans(dat) - cross <- crossprod(dat, dat) - p <- c(p, cross[lower.tri(cross)]/N) + N <- colSums(!is.na(dat)) + cN <- crossprod_miss(!is.na(dat), !is.na(dat)) + p <- colMeans(dat, na.rm = TRUE) + cross <- crossprod_miss(dat, dat) + p <- c(p, cross[lower.tri(cross)]/cN[lower.tri(cross)]) prodlist <- attr(obj@ParObjects$pars, 'prodlist') K <- obj@Data$K pars <- obj@ParObjects$pars @@ -220,7 +218,7 @@ M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, th E2[is.na(E2)] <- 0 E2 <- E2 + t(E2) diag(E2) <- E11 - R <- cov2cor(cross/N - outer(colMeans(dat), colMeans(dat))) + R <- cov2cor(cross/cN - outer(colMeans(dat, na.rm=TRUE), colMeans(dat, na.rm=TRUE))) Kr <- cov2cor(E2 - outer(E1, E1)) SRMSR <- sqrt( sum((R[lower.tri(R)] - Kr[lower.tri(Kr)])^2) / sum(lower.tri(R))) if(residmat){ @@ -293,7 +291,7 @@ M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, th E2[is.na(E2)] <- 0 E2 <- E2 + t(E2) diag(E2) <- E11 - R <- cov2cor(cross/N - outer(colMeans(dat), colMeans(dat))) + R <- cov2cor(cross/cN - outer(colMeans(dat, na.rm=TRUE), colMeans(dat, na.rm=TRUE))) Kr <- cov2cor(E2 - outer(E1, E1)) SRMSR <- sqrt( sum((R[lower.tri(R)] - Kr[lower.tri(Kr)])^2) / sum(lower.tri(R))) } else SRMSR <- NULL @@ -325,8 +323,15 @@ M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, th } itemloc <- obj@Model$itemloc itemloc <- itemloc[-length(itemloc)] - p <- c(colMeans(obj@Data$fulldata[[1L]][,-itemloc]), - cross[lower.tri(cross)]/N) + was_na <- is.na(extract.mirt(obj, 'data')) + fulldata <- obj@Data$fulldata[[1L]] + for(i in 1:(nitems)){ + pick <- if(i == nitems) c(itemloc[i], ncol(fulldata)) + else c(itemloc[i], itemloc[i+1]-1) + fulldata[was_na[,i], pick] <- NA + } + p <- c(colMeans(fulldata[,-itemloc], na.rm=TRUE), + cross[lower.tri(cross)]/cN[lower.tri(cross)]) } else { # M2 TODO } @@ -338,7 +343,9 @@ M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, th } else .Call('buildXi2els_C2', nrow(delta1), nrow(delta2), ncol(PIs), nitems, PIs, EIs, EIs2, Prior, abcats, abcats2) Xi2 <- rbind(cbind(Xi2els$Xi11, Xi2els$Xi12), cbind(t(Xi2els$Xi12), Xi2els$Xi22)) - ret <- list(Xi2=Xi2, delta=delta, estpars=estpars, p=sqrt(N)*p, e=sqrt(N)*e, SRMSR=SRMSR) + Nstar <- c(N, cN[lower.tri(cN)]) + ret <- list(Xi2=Xi2, delta=delta, estpars=estpars, + p=sqrt(Nstar)*p, e=sqrt(Nstar)*e, SRMSR=SRMSR, N=Nstar) ret } @@ -355,31 +362,7 @@ M2 <- function(obj, type="M2*", calcNull = TRUE, na.rm=FALSE, quadpts = NULL, th if(is(obj, 'MixtureClass')) stop('MixtureClass objects are not yet supported', call.=FALSE) if(QMC && is.null(quadpts)) quadpts <- 5000L - if(na.rm) obj <- removeMissing(obj) - if(na.rm) message('Sample size after row-wise response data removal: ', - nrow(extract.mirt(obj, 'data'))) if(nrow(extract.mirt(obj, 'data')) == 0L) stop('No data!', call.=FALSE) - if(any(is.na(obj@Data$data))){ - if(impute == 0) - stop('Fit statistics cannot be computed when there are missing data. - Remove cases row-wise by passing na.rm=TRUE', call.=FALSE) - if(residmat) stop('residmat not supported when imputing data', call.=FALSE) - Theta <- fscores(obj, plausible.draws = impute, QMC=QMC, leave_missing=TRUE, ...) - collect <- myLapply(Theta, fn, obj=obj, calcNull=calcNull, - quadpts=quadpts, QMC=QMC, theta_lim=theta_lim) - ave <- SD <- collect[[1L]] - ave[ave!= 0] <- SD[SD!=0] <- 0 - for(i in seq_len(impute)) - ave <- ave + collect[[i]] - ave <- ave/impute - vars <- 0 - for(i in seq_len(impute)) - vars <- vars + (ave - collect[[i]])^2 - SD <- sqrt(vars/impute) - ret <- rbind(ave, SD) - rownames(ret) <- c('stats', 'SD_stats') - return(ret) - } discrete <- FALSE if(is(obj, 'DiscreteClass')){ discrete <- TRUE diff --git a/R/utils.R b/R/utils.R index 16ef9e34e..1416b6af5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2888,6 +2888,15 @@ mySapply <- function(X, FUN, progress = FALSE, ...){ } } +crossprod_miss <- function(x, y){ + mat <- matrix(0, ncol(x), ncol(x)) + for(i in 1:ncol(x)) + for(j in 1:ncol(x)) + if(i <= j) + mat[i,j] <- mat[j,i] <- sum(x[,i] * y[,j], na.rm=TRUE) + mat +} + printf <- function(...) { catf(sprintf(...)) } diff --git a/man/M2.Rd b/man/M2.Rd index 52910d136..12d3fb0a7 100644 --- a/man/M2.Rd +++ b/man/M2.Rd @@ -8,7 +8,6 @@ M2( obj, type = "M2*", calcNull = TRUE, - na.rm = FALSE, quadpts = NULL, theta_lim = c(-6, 6), CI = 0.9, @@ -30,9 +29,6 @@ M2 and M2* where only the bivariate moments are collapsed} Allows for statistics such as the limited information TLI and CFI. Only valid when items all have a suitable null model (e.g., those created via \code{\link{createItem}} will not)} -\item{na.rm}{logical; remove rows with any missing values? The M2 family of statistics -requires a complete dataset in order to be well defined} - \item{quadpts}{number of quadrature points to use during estimation. If \code{NULL}, a suitable value will be chosen based on the rubric found in \code{\link{fscores}}} @@ -85,8 +81,7 @@ summary(resids[lower.tri(resids)]) # M2 with missing data present dat[sample(1:prod(dim(dat)), 250)] <- NA mod2 <- mirt(dat, 1) -# Compute stats by removing missing data row-wise -M2(mod2, na.rm = TRUE) +M2(mod2) # C2 statistic (useful when polytomous IRT models have too few df) pmod <- mirt(Science, 1) diff --git a/man/mirt.Rd b/man/mirt.Rd index 6a56ef78e..6b38d8561 100644 --- a/man/mirt.Rd +++ b/man/mirt.Rd @@ -1343,7 +1343,7 @@ ds <- rbind(cbind(d1=NA, d2=d), cbind(d1, d2)) (pars <- data.frame(a=as, d=ds)) dat <- simdata(as, ds, 2500, itemtype = c(rep('dich', 18), rep('partcomp', 12))) -colMeans(dat) +itemstats(dat) # unconditional model syntax <- "theta1 = 1-9, 19-30