diff --git a/.github/.gitignore b/.github/.gitignore
old mode 100644
new mode 100755
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
old mode 100644
new mode 100755
diff --git a/.github/workflows/check-standard.yml b/.github/workflows/check-standard.yml
index 0533354..653f3a6 100644
--- a/.github/workflows/check-standard.yml
+++ b/.github/workflows/check-standard.yml
@@ -1,44 +1,44 @@
-on:
-  push:
-    branches: [main, master]
-  pull_request:
-    branches: [main, master]
-
-name: R-CMD-check
-
-jobs:
-  R-CMD-check:
-    runs-on: ${{ matrix.config.os }}
-
-    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
-
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - {os: macOS-latest,   r: 'release'}
-          - {os: windows-latest, r: 'release'}
-          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
-          - {os: ubuntu-latest,   r: 'release'}
-          - {os: ubuntu-latest,   r: 'oldrel-1'}
-
-    env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-      R_KEEP_PKG_SOURCE: yes
-
-    steps:
-      - uses: actions/checkout@v2
-
-      - uses: r-lib/actions/setup-pandoc@v1
-
-      - uses: r-lib/actions/setup-r@v1
-        with:
-          r-version: ${{ matrix.config.r }}
-          http-user-agent: ${{ matrix.config.http-user-agent }}
-          use-public-rspm: true
-
-      - uses: r-lib/actions/setup-r-dependencies@v2
-        with:
-          extra-packages: rcmdcheck
-
-      - uses: r-lib/actions/check-r-package@v2
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: macOS-latest,   r: 'release'}
+          - {os: windows-latest, r: 'release'}
+          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}
+          - {os: ubuntu-latest,   r: 'oldrel-1'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-pandoc@v1
+
+      - uses: r-lib/actions/setup-r@v1
+        with:
+          r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: rcmdcheck
+
+      - uses: r-lib/actions/check-r-package@v2
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
old mode 100644
new mode 100755
diff --git a/DESCRIPTION b/DESCRIPTION
index 011e241..1f5b93b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,3 +26,4 @@ URL: https://github.com/smartdata-analysis-and-statistics/metamisc,
 RoxygenNote: 7.2.3
 NeedsCompilation: no
 VignetteBuilder: knitr
+BugReports: https://github.com/smartdata-analysis-and-statistics/metamisc/issues
diff --git a/R/ccalc.r b/R/ccalc.r
index 615df05..f170396 100644
--- a/R/ccalc.r
+++ b/R/ccalc.r
@@ -1,306 +1,306 @@
-#' Calculate the concordance statistic
-#'
-#' The function calculates (transformed versions of) the concordance (c-)
-#' statistic with the corresponding sampling variance. 
-#' 
-#' @param cstat vector to specify the estimated c-statistics.
-#' @param cstat.se Optional vector to specify the corresponding standard errors.
-#' @param cstat.cilb Optional vector to specify the lower limits of the confidence interval.
-#' @param cstat.ciub Optional vector to specify the upper limits of the confidence interval.
-#' @param cstat.cilv Optional vector to specify the levels of aformentioned confidence interval limits. 
-#' (default: 0.95, which corresponds to the 95\% confidence interval).
-#' @param sd.LP Optional vector to specify the standard deviations of the linear predictor (prognostic index).
-#' @param N Optional vector to specify the sample/group sizes.
-#' @param O Optional vector to specify the total number of observed events.
-#' @param Po Optional vector to specify the observed event probabilities.
-#' @param data Optional data frame containing the variables given to the arguments above.
-#' @param slab Optional vector with labels for the studies.
-#' @param subset Optional vector indicating the subset of studies that should be used. This can be a logical vector or a numeric vector indicating the indices of the studies to include.
-#' @param g a quoted string that is the function to transform estimates of the c-statistic; see the details below.
-#' @param level Optional numeric to specify the level for the confidence interval, default \code{0.95}.
-#' @param approx.se.method integer specifying which method should be used for estimating the standard error of the
-#' c-statistic (Newcombe, 2006). So far, only method \code{2} and method \code{4} (default) have been implemented.
-#' @param \ldots Additional arguments.
-#' 
-#' @details 
-#' The c-statistic is a measure of discrimination, and indicates the ability of a prediction model to 
-#' distinguish between patients developing and not developing the outcome. The c-statistic typically ranges 
-#' from 0.5 (no discriminative ability) to 1 (perfect discriminative ability). 
-#' 
-#' By default, the function \code{ccalc} will derive the c-statistic of each study, together with 
-#' the corresponding standard error and 95\% confidence interval. However, it is also possible to calculate transformed 
-#' versions of the c-statistic. Appropriate standard errors are then derived using the Delta method. 
-#' For instance, the logit transformation can be applied by specifying \code{g="log(cstat/(1-cstat))"}. 
-#' 
-#' \subsection{Restoring the c-statistic}{
-#' For studies where the c-statistic is missing, it is estimated from the standard deviation of the linear predictor 
-#' (\code{theta.source="std.dev(LP)"}). The corresponding method is described by White et al. (2015).
-#' }
-#' 
-#' \subsection{Restoring the standard error of the c-statistic}{
-#' When missing, the standard error of the c-statistic can be estimated from the confidence interval. Alternatively, 
-#' the standard error can be approximated from a combination of the reported c-statistic, the total sample size and 
-#' the total number of events (Newcombe, 2006). This can be achieved by adopting (a modification of) the method 
-#' proposed by Hanley and McNeil, as specified in \code{approx.se.method}.
-#' }
-#' 
-#' @references 
-#' Debray TPA, Damen JAAG, Snell KIE, Ensor J, Hooft L, Reitsma JB, et al. A guide to systematic review and meta-analysis of prediction model performance. BMJ. 2017;356:i6460. 
-#' 
-#' Debray TPA, Damen JAAG, Riley R, Snell KIE, Reitsma JB, Hooft L, et al. A framework for meta-analysis of  prediction model studies with binary and time-to-event outcomes. Stat Methods Med Res. 2018; In press. 
-#' 
-#' Hanley JA, McNeil BJ. The meaning and use of the area under a receiver operating characteristic (ROC) 
-#' curve. \emph{Radiology}. 1982; 143(1):29--36.
-#' 
-#' Newcombe RG. Confidence intervals for an effect size measure based on the Mann-Whitney statistic. 
-#' Part 2: asymptotic methods and evaluation. \emph{Stat Med}. 2006; 25(4):559--73.
-#' 
-#' Snell KI, Ensor J, Debray TP, Moons KG, Riley RD. Meta-analysis of prediction model performance across 
-#' multiple studies: Which scale helps ensure between-study normality for the C -statistic and calibration measures? 
-#' \emph{Statistical Methods in Medical Research}. 2017. 
-#' 
-#' White IR, Rapsomaniki E, the Emerging Risk Factors Collaboration. Covariate-adjusted measures of discrimination 
-#' for survival data. \emph{Biom J}. 2015;57(4):592--613. 
-#' 
-#' 
-#' @return An object of class c("mm_perf","data.frame") with the following columns:
-#' \describe{
-##'  \item{"theta"}{The (transformed) c-statistics. }
-##'  \item{"theta.se"}{Standard errors of the (transformed) c-statistics.}
-##'  \item{"theta.cilb"}{Lower confidence interval of the (transformed) c-statistics. The level is specified in
-##'  \code{level}. Intervals are calculated on the same scale as \code{theta} by assuming a Normal distribution.}
-##'  \item{"theta.ciub"}{Upper confidence interval of the (transformed) c-statistics. The level is specified in
-##'  \code{level}. Intervals are calculated on the same scale as \code{theta} by assuming a Normal distribution.}
-##'  \item{"theta.source"}{Method used for calculating the (transformed) c-statistic.}
-##'  \item{"theta.se.source"}{Method used for calculating the standard error of the (transformed) c-statistic.}
-##' }
-#' 
-#' @examples 
-#' ######### Validation of prediction models with a binary outcome #########
-#' data(EuroSCORE)
-#' 
-#' # Calculate the c-statistic and its standard error
-#' est1 <- ccalc(cstat = c.index, cstat.se = se.c.index, cstat.cilb = c.index.95CIl, 
-#'               cstat.ciub = c.index.95CIu, N = n, O = n.events, data = EuroSCORE, slab = Study)
-#' est1
-#'   
-#' # Calculate the logit c-statistic and its standard error
-#' est2 <- ccalc(cstat = c.index, cstat.se = se.c.index, cstat.cilb = c.index.95CIl, 
-#'               cstat.ciub = c.index.95CIu, N = n, O = n.events, data = EuroSCORE, slab = Study, 
-#'               g = "log(cstat/(1-cstat))")
-#' est2
-#'       
-#' # Display the results of all studies in a forest plot
-#' plot(est1)
-#'                                                             
-#' @keywords meta-analysis discrimination concordance statistic performance
-#' 
-#' @author Thomas Debray <thomas.debray@gmail.com>
-#' 
-#' @export
-#' 
-ccalc <- function(cstat, cstat.se, cstat.cilb, cstat.ciub, cstat.cilv, sd.LP, N, O, Po, data, slab, subset,
-                  g = NULL, level = 0.95, approx.se.method = 4, ...) {
-  
-  ### check if data argument has been specified
-  if (missing(data))
-    data <- NULL
-  
-  ### need this at the end to check if append=TRUE can actually be done
-  no.data <- is.null(data)
-  
-  ### check if data argument has been specified
-  if (is.null(data)) {
-    data <- sys.frame(sys.parent())
-  } else {
-    if (!is.data.frame(data))
-      data <- data.frame(data)
-  }
-  
-  #######################################################################################
-  # Retrieve all data
-  #######################################################################################
-  mf <- match.call()
-  
-  mf.slab       <- mf[[match("slab",   names(mf))]]
-  slab          <- eval(mf.slab,   data, enclos=sys.frame(sys.parent()))
-  mf.subset     <- mf[[match("subset", names(mf))]]
-  subset        <- eval(mf.subset, data, enclos=sys.frame(sys.parent()))
-  mf.cstat      <- mf[[match("cstat", names(mf))]]
-  cstat         <- eval(mf.cstat, data, enclos=sys.frame(sys.parent()))
-  mf.cstat.se   <- mf[[match("cstat.se", names(mf))]]
-  cstat.se      <- eval(mf.cstat.se, data, enclos=sys.frame(sys.parent()))
-  mf.cstat.cilb <- mf[[match("cstat.cilb", names(mf))]]
-  cstat.cilb    <- eval(mf.cstat.cilb, data, enclos=sys.frame(sys.parent()))
-  mf.cstat.ciub <- mf[[match("cstat.ciub", names(mf))]]
-  cstat.ciub    <- eval(mf.cstat.ciub, data, enclos=sys.frame(sys.parent()))
-  mf.cstat.cilv <- mf[[match("cstat.cilv", names(mf))]]
-  cstat.cilv    <- eval(mf.cstat.cilv, data, enclos=sys.frame(sys.parent()))
-  mf.sd.LP      <- mf[[match("sd.LP", names(mf))]]
-  sd.LP         <- eval(mf.sd.LP, data, enclos=sys.frame(sys.parent()))
-  mf.N          <- mf[[match("N", names(mf))]]
-  N             <- eval(mf.N, data, enclos=sys.frame(sys.parent()))
-  mf.O          <- mf[[match("O", names(mf))]]
-  O             <- eval(mf.O, data, enclos=sys.frame(sys.parent()))
-  mf.Po         <- mf[[match("Po", names(mf))]]
-  Po            <- eval(mf.Po, data, enclos=sys.frame(sys.parent()))
-  
-  
-  
-  #######################################################################################
-  # Count number of studies
-  #######################################################################################
-  k <- 0
-  
-  if (!no.data) {
-    k <- dim(data)[1]
-  } else if (!is.null(cstat)) {
-    k <- length(cstat)
-  } else if (!is.null(cstat.se)) {
-    k <- length(cstat.se)
-  } else if (!is.null(cstat.cilb)) {
-    k <- length(cstat.cilb)
-  } else if (!is.null(cstat.ciub)) {
-    k <- length(cstat.ciub)
-  } else if (!is.null(sd.LP)) {
-    k <- length(sd.LP)
-  } else if (!is.null(N)) {
-    k <- length(N)
-  } else if (!is.null(O)) {
-    k <- length(O)
-  } else if (!is.null(Po)) {
-    k <- length(Po)
-  }
-  
-  if (k<1) stop("No data provided!")
-  
-  if(is.null(cstat)) {
-    cstat <- rep(NA, times=k)
-  }
-  
-  #######################################################################################
-  # Assign study labels
-  # taken from escalc
-  #######################################################################################
-  if (!is.null(slab)) {
-    
-    if (!is.null(subset))
-      slab <- slab[subset]
-    
-    if (anyNA(slab))
-      stop("NAs in study labels.")
-    
-    if (inherits(slab, "factor")) {
-      slab <- as.character(slab)
-    }
-    
-    ### check if study labels are unique; if not, make them unique
-    
-    if (anyDuplicated(slab))
-      slab <- make.unique(slab)
-    
-    if (length(slab) != k)
-      stop("Study labels not of same length as data.")
-    
-    ### add slab attribute to the cstat vector
-    attr(cstat, "slab") <- slab
-  }
-  
-  ### if a subset of studies is specified (note: subsetting of other parts already done above, so yi/vi/ni.u/slab are already subsetted)
-  if (!is.null(subset)) {
-    if (!no.data)
-      data <- data[subset,,drop=FALSE]
-  }
-  
-  #######################################################################################
-  # Prepare data
-  #######################################################################################
-  if (is.null(O))  O <- rep(NA, length=k)
-  if (is.null(Po)) Po <- rep(NA, length=k)
-  if (is.null(N))  N <- rep(NA, length=k)
-  if (is.null(cstat.cilb)) cstat.cilb <- rep(NA, length=k)
-  if (is.null(cstat.ciub)) cstat.ciub <- rep(NA, length=k)
-  if (is.null(cstat.cilv)) cstat.cilv <- rep(0.95, length=k)
-  if (is.null(cstat.se)) cstat.se <- array(NA, dim=k)
-  if (is.null(sd.LP)) sd.LP <- rep(NA, k)
-  
-  if (sum(cstat.cilv>1 | cstat.cilv<0) > 0) {
-    stop("Invalid level(s) specified for 'cstat.cilv'!")
-  }
-  if (is.na(level) | level<0 | level>1) {
-    stop("Invalid value for 'level'!")
-  }
-  if (!approx.se.method %in% c(2,4)) {
-    stop("Invalid method for restoring the SE of the c-statistic!")
-  }
-  
-  # Calculate O and N from other information if possible
-  O <- ifelse(is.na(O), Po*N, O)
-  N <- ifelse(is.na(N), O/Po, N)
-  
-  theta.cil <- theta.ciu <- rep(NA, k)
-  
-  # Restore c-statistic
-  te.method <- c("c-statistic", "std.dev(LP)")
-  te.orig   <- calculate.cstat.theta(cstat = cstat, g = g)
-  te.white  <- calculate.cstat.sdPI(sdPI = sd.LP, g = g)
-  
-  te.dat <- cbind(te.orig, te.white)
-  
-  # For each study, find the first colum without missing
-  myfun = function(dat) { which.min(is.na(dat)) }
-  
-  sel.cstat <- apply(te.dat, 1, myfun)
-  theta <- te.dat[cbind(seq_along(sel.cstat), sel.cstat)]                            
-  theta.source <-  te.method[sel.cstat]
-  
-  # Directly transform the provided confidence limits for studies where the reported level is equal to the requested level
-  if (is.null(g)) {
-    theta.cil[cstat.cilv==level] <- cstat.cilb[cstat.cilv==level]
-    theta.ciu[cstat.cilv==level] <- cstat.ciub[cstat.cilv==level]
-  } else {
-    theta.cil[cstat.cilv==level] <- eval(parse(text=g), list(cstat = cstat.cilb[cstat.cilv==level]))
-    theta.ciu[cstat.cilv==level] <- eval(parse(text=g), list(cstat = cstat.ciub[cstat.cilv==level]))
-  } 
-  
-  # Calculate all the possible variations of var(theta)
-  tv.method  <- c("Standard Error", "Confidence Interval", paste("Newcombe (Method ", approx.se.method, ")", sep=""), paste("Newcombe (Method ", approx.se.method, ")", sep=""))
-  tv.se      <- restore.c.var.se(cstat=cstat, c.se=cstat.se, g=g) # Derived from standard error
-  tv.ci      <- restore.c.var.ci(cil=cstat.cilb, ciu=cstat.ciub, level=cstat.cilv, g=g) # Derived from 95% confidence interval
-  tv.hanley  <- restore.c.var.hanley(cstat=cstat, N.subjects=N, N.events=O, restore.method=approx.se.method, g=g)
-  tv.hanley2 <- restore.c.var.hanley2(sd.LP=sd.LP, N.subjects=N, N.events=O, restore.method=approx.se.method, g=g)
-  
-  # Save all estimated variances. The order of the columns indicates the priority             
-  dat <-cbind(tv.se, tv.ci, tv.hanley, tv.hanley2)  
-  
-  sel.var <- apply(dat, 1, myfun)
-  theta.var <- dat[cbind(seq_along(sel.var), sel.var)]                            
-  theta.var.source <-  tv.method[sel.var]
-  
-  # Calculate the desired confidence intervals
-  theta.cil[is.na(theta.cil)] <- (theta+qnorm((1-level)/2)*sqrt(theta.var))[is.na(theta.cil)]
-  theta.ciu[is.na(theta.ciu)] <- (theta+qnorm((1+level)/2)*sqrt(theta.var))[is.na(theta.ciu)]
-  
-  
-  # Store results, and method for calculating SE
-  ds <- data.frame(theta=theta, theta.se=sqrt(theta.var), theta.cilb=theta.cil, theta.ciub=theta.ciu, 
-                   theta.source=theta.source, theta.se.source=theta.var.source)
-  
-  if(is.null(slab) & !no.data) {
-    slab <- rownames(data)
-    rownames(ds) <- slab
-  } else if (!is.null(slab)) {
-    slab <- make.unique(as.character(slab))
-    rownames(ds) <- slab
-  }
-  
-  # Add some attributes specifying the nature of the (untransformed) estimatess
-  attr(ds, 'estimand') <- "c-statistic"
-  attr(ds, 'theta_scale') <- g
-  attr(ds, 'plot_refline') <- 0.5
-  attr(ds, 'plot_lim') <- c(0,1)
-  
-  class(ds) <- c("mm_perf", class(ds))
-  
-  return(ds)
+#' Calculate the concordance statistic
+#'
+#' The function calculates (transformed versions of) the concordance (c-)
+#' statistic with the corresponding sampling variance. 
+#' 
+#' @param cstat vector to specify the estimated c-statistics.
+#' @param cstat.se Optional vector to specify the corresponding standard errors.
+#' @param cstat.cilb Optional vector to specify the lower limits of the confidence interval.
+#' @param cstat.ciub Optional vector to specify the upper limits of the confidence interval.
+#' @param cstat.cilv Optional vector to specify the levels of aformentioned confidence interval limits. 
+#' (default: 0.95, which corresponds to the 95\% confidence interval).
+#' @param sd.LP Optional vector to specify the standard deviations of the linear predictor (prognostic index).
+#' @param N Optional vector to specify the sample/group sizes.
+#' @param O Optional vector to specify the total number of observed events.
+#' @param Po Optional vector to specify the observed event probabilities.
+#' @param data Optional data frame containing the variables given to the arguments above.
+#' @param slab Optional vector with labels for the studies.
+#' @param subset Optional vector indicating the subset of studies that should be used. This can be a logical vector or a numeric vector indicating the indices of the studies to include.
+#' @param g a quoted string that is the function to transform estimates of the c-statistic; see the details below.
+#' @param level Optional numeric to specify the level for the confidence interval, default \code{0.95}.
+#' @param approx.se.method integer specifying which method should be used for estimating the standard error of the
+#' c-statistic (Newcombe, 2006). So far, only method \code{2} and method \code{4} (default) have been implemented.
+#' @param \ldots Additional arguments.
+#' 
+#' @details 
+#' The c-statistic is a measure of discrimination, and indicates the ability of a prediction model to 
+#' distinguish between patients developing and not developing the outcome. The c-statistic typically ranges 
+#' from 0.5 (no discriminative ability) to 1 (perfect discriminative ability). 
+#' 
+#' By default, the function \code{ccalc} will derive the c-statistic of each study, together with 
+#' the corresponding standard error and 95\% confidence interval. However, it is also possible to calculate transformed 
+#' versions of the c-statistic. Appropriate standard errors are then derived using the Delta method. 
+#' For instance, the logit transformation can be applied by specifying \code{g="log(cstat/(1-cstat))"}. 
+#' 
+#' \subsection{Restoring the c-statistic}{
+#' For studies where the c-statistic is missing, it is estimated from the standard deviation of the linear predictor 
+#' (\code{theta.source="std.dev(LP)"}). The corresponding method is described by White et al. (2015).
+#' }
+#' 
+#' \subsection{Restoring the standard error of the c-statistic}{
+#' When missing, the standard error of the c-statistic can be estimated from the confidence interval. Alternatively, 
+#' the standard error can be approximated from a combination of the reported c-statistic, the total sample size and 
+#' the total number of events (Newcombe, 2006). This can be achieved by adopting (a modification of) the method 
+#' proposed by Hanley and McNeil, as specified in \code{approx.se.method}.
+#' }
+#' 
+#' @references 
+#' Debray TPA, Damen JAAG, Snell KIE, Ensor J, Hooft L, Reitsma JB, et al. A guide to systematic review and meta-analysis of prediction model performance. BMJ. 2017;356:i6460. 
+#' 
+#' Debray TPA, Damen JAAG, Riley R, Snell KIE, Reitsma JB, Hooft L, et al. A framework for meta-analysis of  prediction model studies with binary and time-to-event outcomes. Stat Methods Med Res. 2018; In press. 
+#' 
+#' Hanley JA, McNeil BJ. The meaning and use of the area under a receiver operating characteristic (ROC) 
+#' curve. \emph{Radiology}. 1982; 143(1):29--36.
+#' 
+#' Newcombe RG. Confidence intervals for an effect size measure based on the Mann-Whitney statistic. 
+#' Part 2: asymptotic methods and evaluation. \emph{Stat Med}. 2006; 25(4):559--73.
+#' 
+#' Snell KI, Ensor J, Debray TP, Moons KG, Riley RD. Meta-analysis of prediction model performance across 
+#' multiple studies: Which scale helps ensure between-study normality for the C -statistic and calibration measures? 
+#' \emph{Statistical Methods in Medical Research}. 2017. 
+#' 
+#' White IR, Rapsomaniki E, the Emerging Risk Factors Collaboration. Covariate-adjusted measures of discrimination 
+#' for survival data. \emph{Biom J}. 2015;57(4):592--613. 
+#' 
+#' 
+#' @return An object of class c("mm_perf","data.frame") with the following columns:
+#' \describe{
+##'  \item{"theta"}{The (transformed) c-statistics. }
+##'  \item{"theta.se"}{Standard errors of the (transformed) c-statistics.}
+##'  \item{"theta.cilb"}{Lower confidence interval of the (transformed) c-statistics. The level is specified in
+##'  \code{level}. Intervals are calculated on the same scale as \code{theta} by assuming a Normal distribution.}
+##'  \item{"theta.ciub"}{Upper confidence interval of the (transformed) c-statistics. The level is specified in
+##'  \code{level}. Intervals are calculated on the same scale as \code{theta} by assuming a Normal distribution.}
+##'  \item{"theta.source"}{Method used for calculating the (transformed) c-statistic.}
+##'  \item{"theta.se.source"}{Method used for calculating the standard error of the (transformed) c-statistic.}
+##' }
+#' 
+#' @examples 
+#' ######### Validation of prediction models with a binary outcome #########
+#' data(EuroSCORE)
+#' 
+#' # Calculate the c-statistic and its standard error
+#' est1 <- ccalc(cstat = c.index, cstat.se = se.c.index, cstat.cilb = c.index.95CIl, 
+#'               cstat.ciub = c.index.95CIu, N = n, O = n.events, data = EuroSCORE, slab = Study)
+#' est1
+#'   
+#' # Calculate the logit c-statistic and its standard error
+#' est2 <- ccalc(cstat = c.index, cstat.se = se.c.index, cstat.cilb = c.index.95CIl, 
+#'               cstat.ciub = c.index.95CIu, N = n, O = n.events, data = EuroSCORE, slab = Study, 
+#'               g = "log(cstat/(1-cstat))")
+#' est2
+#'       
+#' # Display the results of all studies in a forest plot
+#' plot(est1)
+#'                                                             
+#' @keywords meta-analysis discrimination concordance statistic performance
+#' 
+#' @author Thomas Debray <thomas.debray@gmail.com>
+#' 
+#' @export
+#' 
+ccalc <- function(cstat, cstat.se, cstat.cilb, cstat.ciub, cstat.cilv, sd.LP, N, O, Po, data, slab, subset,
+                  g = NULL, level = 0.95, approx.se.method = 4, ...) {
+  
+  ### check if data argument has been specified
+  if (missing(data))
+    data <- NULL
+  
+  ### need this at the end to check if append=TRUE can actually be done
+  no.data <- is.null(data)
+  
+  ### check if data argument has been specified
+  if (is.null(data)) {
+    data <- sys.frame(sys.parent())
+  } else {
+    if (!is.data.frame(data))
+      data <- data.frame(data)
+  }
+  
+  #######################################################################################
+  # Retrieve all data
+  #######################################################################################
+  mf <- match.call()
+  
+  mf.slab       <- mf[[match("slab",   names(mf))]]
+  slab          <- eval(mf.slab,   data, enclos=sys.frame(sys.parent()))
+  mf.subset     <- mf[[match("subset", names(mf))]]
+  subset        <- eval(mf.subset, data, enclos=sys.frame(sys.parent()))
+  mf.cstat      <- mf[[match("cstat", names(mf))]]
+  cstat         <- eval(mf.cstat, data, enclos=sys.frame(sys.parent()))
+  mf.cstat.se   <- mf[[match("cstat.se", names(mf))]]
+  cstat.se      <- eval(mf.cstat.se, data, enclos=sys.frame(sys.parent()))
+  mf.cstat.cilb <- mf[[match("cstat.cilb", names(mf))]]
+  cstat.cilb    <- eval(mf.cstat.cilb, data, enclos=sys.frame(sys.parent()))
+  mf.cstat.ciub <- mf[[match("cstat.ciub", names(mf))]]
+  cstat.ciub    <- eval(mf.cstat.ciub, data, enclos=sys.frame(sys.parent()))
+  mf.cstat.cilv <- mf[[match("cstat.cilv", names(mf))]]
+  cstat.cilv    <- eval(mf.cstat.cilv, data, enclos=sys.frame(sys.parent()))
+  mf.sd.LP      <- mf[[match("sd.LP", names(mf))]]
+  sd.LP         <- eval(mf.sd.LP, data, enclos=sys.frame(sys.parent()))
+  mf.N          <- mf[[match("N", names(mf))]]
+  N             <- eval(mf.N, data, enclos=sys.frame(sys.parent()))
+  mf.O          <- mf[[match("O", names(mf))]]
+  O             <- eval(mf.O, data, enclos=sys.frame(sys.parent()))
+  mf.Po         <- mf[[match("Po", names(mf))]]
+  Po            <- eval(mf.Po, data, enclos=sys.frame(sys.parent()))
+  
+  
+  
+  #######################################################################################
+  # Count number of studies
+  #######################################################################################
+  k <- 0
+  
+  if (!no.data) {
+    k <- dim(data)[1]
+  } else if (!is.null(cstat)) {
+    k <- length(cstat)
+  } else if (!is.null(cstat.se)) {
+    k <- length(cstat.se)
+  } else if (!is.null(cstat.cilb)) {
+    k <- length(cstat.cilb)
+  } else if (!is.null(cstat.ciub)) {
+    k <- length(cstat.ciub)
+  } else if (!is.null(sd.LP)) {
+    k <- length(sd.LP)
+  } else if (!is.null(N)) {
+    k <- length(N)
+  } else if (!is.null(O)) {
+    k <- length(O)
+  } else if (!is.null(Po)) {
+    k <- length(Po)
+  }
+  
+  if (k<1) stop("No data provided!")
+  
+  if(is.null(cstat)) {
+    cstat <- rep(NA, times=k)
+  }
+  
+  #######################################################################################
+  # Assign study labels
+  # taken from escalc
+  #######################################################################################
+  if (!is.null(slab)) {
+    
+    if (!is.null(subset))
+      slab <- slab[subset]
+    
+    if (anyNA(slab))
+      stop("NAs in study labels.")
+    
+    if (inherits(slab, "factor")) {
+      slab <- as.character(slab)
+    }
+    
+    ### check if study labels are unique; if not, make them unique
+    
+    if (anyDuplicated(slab))
+      slab <- make.unique(slab)
+    
+    if (length(slab) != k)
+      stop("Study labels not of same length as data.")
+    
+    ### add slab attribute to the cstat vector
+    attr(cstat, "slab") <- slab
+  }
+  
+  ### if a subset of studies is specified (note: subsetting of other parts already done above, so yi/vi/ni.u/slab are already subsetted)
+  if (!is.null(subset)) {
+    if (!no.data)
+      data <- data[subset,,drop=FALSE]
+  }
+  
+  #######################################################################################
+  # Prepare data
+  #######################################################################################
+  if (is.null(O))  O <- rep(NA, length=k)
+  if (is.null(Po)) Po <- rep(NA, length=k)
+  if (is.null(N))  N <- rep(NA, length=k)
+  if (is.null(cstat.cilb)) cstat.cilb <- rep(NA, length=k)
+  if (is.null(cstat.ciub)) cstat.ciub <- rep(NA, length=k)
+  if (is.null(cstat.cilv)) cstat.cilv <- rep(0.95, length=k)
+  if (is.null(cstat.se)) cstat.se <- array(NA, dim=k)
+  if (is.null(sd.LP)) sd.LP <- rep(NA, k)
+  
+  if (sum(cstat.cilv>1 | cstat.cilv<0) > 0) {
+    stop("Invalid level(s) specified for 'cstat.cilv'!")
+  }
+  if (is.na(level) | level<0 | level>1) {
+    stop("Invalid value for 'level'!")
+  }
+  if (!approx.se.method %in% c(2,4)) {
+    stop("Invalid method for restoring the SE of the c-statistic!")
+  }
+  
+  # Calculate O and N from other information if possible
+  O <- ifelse(is.na(O), Po*N, O)
+  N <- ifelse(is.na(N), O/Po, N)
+  
+  theta.cil <- theta.ciu <- rep(NA, k)
+  
+  # Restore c-statistic
+  te.method <- c("c-statistic", "std.dev(LP)")
+  te.orig   <- calculate.cstat.theta(cstat = cstat, g = g)
+  te.white  <- calculate.cstat.sdPI(sdPI = sd.LP, g = g)
+  
+  te.dat <- cbind(te.orig, te.white)
+  
+  # For each study, find the first colum without missing
+  myfun = function(dat) { which.min(is.na(dat)) }
+  
+  sel.cstat <- apply(te.dat, 1, myfun)
+  theta <- te.dat[cbind(seq_along(sel.cstat), sel.cstat)]                            
+  theta.source <-  te.method[sel.cstat]
+  
+  # Directly transform the provided confidence limits for studies where the reported level is equal to the requested level
+  if (is.null(g)) {
+    theta.cil[cstat.cilv==level] <- cstat.cilb[cstat.cilv==level]
+    theta.ciu[cstat.cilv==level] <- cstat.ciub[cstat.cilv==level]
+  } else {
+    theta.cil[cstat.cilv==level] <- eval(parse(text=g), list(cstat = cstat.cilb[cstat.cilv==level]))
+    theta.ciu[cstat.cilv==level] <- eval(parse(text=g), list(cstat = cstat.ciub[cstat.cilv==level]))
+  } 
+  
+  # Calculate all the possible variations of var(theta)
+  tv.method  <- c("Standard Error", "Confidence Interval", paste("Newcombe (Method ", approx.se.method, ")", sep=""), paste("Newcombe (Method ", approx.se.method, ")", sep=""))
+  tv.se      <- restore.c.var.se(cstat=cstat, c.se=cstat.se, g=g) # Derived from standard error
+  tv.ci      <- restore.c.var.ci(cil=cstat.cilb, ciu=cstat.ciub, level=cstat.cilv, g=g) # Derived from 95% confidence interval
+  tv.hanley  <- restore.c.var.hanley(cstat=cstat, N.subjects=N, N.events=O, restore.method=approx.se.method, g=g)
+  tv.hanley2 <- restore.c.var.hanley2(sd.LP=sd.LP, N.subjects=N, N.events=O, restore.method=approx.se.method, g=g)
+  
+  # Save all estimated variances. The order of the columns indicates the priority             
+  dat <-cbind(tv.se, tv.ci, tv.hanley, tv.hanley2)  
+  
+  sel.var <- apply(dat, 1, myfun)
+  theta.var <- dat[cbind(seq_along(sel.var), sel.var)]                            
+  theta.var.source <-  tv.method[sel.var]
+  
+  # Calculate the desired confidence intervals
+  theta.cil[is.na(theta.cil)] <- (theta+qnorm((1-level)/2)*sqrt(theta.var))[is.na(theta.cil)]
+  theta.ciu[is.na(theta.ciu)] <- (theta+qnorm((1+level)/2)*sqrt(theta.var))[is.na(theta.ciu)]
+  
+  
+  # Store results, and method for calculating SE
+  ds <- data.frame(theta=theta, theta.se=sqrt(theta.var), theta.cilb=theta.cil, theta.ciub=theta.ciu, 
+                   theta.source=theta.source, theta.se.source=theta.var.source)
+  
+  if(is.null(slab) & !no.data) {
+    slab <- rownames(data)
+    rownames(ds) <- slab
+  } else if (!is.null(slab)) {
+    slab <- make.unique(as.character(slab))
+    rownames(ds) <- slab
+  }
+  
+  # Add some attributes specifying the nature of the (untransformed) estimatess
+  attr(ds, 'estimand') <- "c-statistic"
+  attr(ds, 'theta_scale') <- g
+  attr(ds, 'plot_refline') <- 0.5
+  attr(ds, 'plot_lim') <- c(0,1)
+  
+  class(ds) <- c("mm_perf", class(ds))
+  
+  return(ds)
 }
\ No newline at end of file
diff --git a/R/valmeta_utils.r b/R/valmeta_utils.r
index 08649ba..9ad33e1 100644
--- a/R/valmeta_utils.r
+++ b/R/valmeta_utils.r
@@ -1,249 +1,249 @@
-
-
-# Adapted from R package 'car' to avoid package loading issues in R-forge
-deltaMethod <- function(object, g, vcov., func = g, constants, level = 0.95, ...) {
-  if (!is.character(g)) 
-    stop("The argument 'g' must be a character string")
-  
-  para <- object         
-  g <- parse(text = g)
-  q <- length(para)
-  for (i in 1:q) {
-    assign(names(para)[i], para[i])
-  }
-  if (!missing(constants)) {
-    for (i in seq_along(constants)) assign(names(constants[i]), constants[[i]])}
-  est <- eval(g)
-  names(est) <- NULL
-  gd <- rep(0, q)
-  for (i in 1:q) {
-    gd[i] <- eval(D(g, names(para)[i]))
-  }
-  se.est <- as.vector(sqrt(t(gd) %*% vcov. %*% gd))
-  result <- data.frame(Estimate = est, SE = se.est, row.names = c(func))
-  result
-}
-
-# Adapted from matrixcalc
-hadamard.prod <- function (x, y) 
-{
-  if (!is.numeric(x)) {
-    stop("argument x is not numeric")
-  }
-  if (!is.numeric(y)) {
-    stop("argument y is not numeric")
-  }
-  if (is.matrix(x)) {
-    Xmat <- x
-  }
-  else {
-    if (is.vector(x)) {
-      Xmat <- matrix(x, nrow = length(x), ncol = 1)
-    }
-    else {
-      stop("argument x is neither a matrix or a vector")
-    }
-  }
-  if (is.matrix(y)) {
-    Ymat <- y
-  }
-  else {
-    if (is.vector(y)) {
-      Ymat <- matrix(y, nrow = length(x), ncol = 1)
-    }
-    else {
-      stop("argument x is neither a matrix or a vector")
-    }
-  }
-  if (nrow(Xmat) != nrow(Ymat)) 
-    stop("argumentx x and y do not have the same row order")
-  if (ncol(Xmat) != ncol(Ymat)) 
-    stop("arguments x and y do not have the same column order")
-  return(Xmat * Ymat)
-}
-
-calcPredInt <- function(x, sigma2, tau2, k, level = 0.95) {
-  pi.lb <- x + qt((1-level)/2, df=(k-2))*sqrt(tau2+sigma2)
-  pi.ub <- x + qt((1+level)/2, df=(k-2))*sqrt(tau2+sigma2)
-  out <- list(lower = as.numeric(pi.lb), upper = as.numeric(pi.ub))
-  return(out)
-}
-
-
-
-
-generateHyperparametersMA <- function(x, ...) {
-  # Generate initial values from the relevant distributions
-  model.pars <- list()
-  model.pars[[1]] <- list(param = "mu.tobs", 
-                          param.f = rnorm, 
-                          param.args = list(n = 1, 
-                                            mean = x$hp.mu.mean, 
-                                            sd = sqrt(x$hp.mu.var)))
-  
-  if (x$hp.tau.dist == "dunif") {
-    model.pars[[2]] <- list(param = "bsTau", 
-                            param.f = runif, 
-                            param.args = list(n = 1, 
-                                              min = x$hp.tau.min, 
-                                              max = x$hp.tau.max))
-  } else if (x$hp.tau.dist == "dhalft") {
-    model.pars[[2]] <- list(param = "bsTau", 
-                            param.f = rstudentt, 
-                            param.args = list(n = 1, 
-                                              mean = x$hp.tau.mean, 
-                                              sigma = x$hp.tau.sigma, 
-                                              df = x$hp.tau.df,
-                                              lower = x$hp.tau.min, 
-                                              upper = x$hp.tau.max))
-  } else {
-    stop("Invalid distribution for 'hp.tau.dist'!")
-  }
-  model.pars
-}
-
-
-restore.c.var.hanley <- function(cstat, N.subjects, N.events, restore.method=4, g=NULL) {
-  
-  fHanley <- function(cstat, nstar, mstar, m, n) {
-    ((cstat*(1-cstat)*(1+nstar*(1-cstat)/(2-cstat) + mstar*cstat/(1+cstat)))/(m*n))
-  }
-  
-  n <- N.events #Number of events
-  m <- N.subjects-N.events #Number of non-events
-  
-  if (restore.method==2) {
-    mstar <- m-1
-    nstar <- n-1
-  } else if (restore.method==4) {
-    mstar <- nstar <- N.subjects/2-1
-  } else {
-    stop ("Method not implemented yet!")
-  }
-  
-  # Crude estimate
-  cstat.var <- fHanley(cstat=cstat, m=m, n=n, mstar=mstar, nstar=nstar)
-  if (is.null(g)) {
-    return(cstat.var)
-  }
-  
-  # Apply delta method if a transformation for c is defined
-  ti <- rep(NA, length(cstat))
-  for (i in 1:length(cstat)) {
-    ci <- cstat[i]
-    vi <- cstat.var[i]
-    names(ci) <- names(vi) <- "cstat"
-    ti[i] <- as.numeric((deltaMethod(object=ci, g=g, vcov.=vi))["SE"])**2
-  }
-  return(ti)
-}
-
-
-restore.c.var.se <- function(cstat, c.se, g=NULL) {
-  if(is.null(g)) {
-    return (c.se**2)
-  }
-  
-  ti <- rep(NA, length(cstat))
-  
-  for (i in 1:length(cstat)) {
-    ci <- cstat[i]
-    vi <- c.se[i]**2
-    names(ci) <- names(vi) <- "cstat"
-    ti[i] <- as.numeric((deltaMethod(object=ci, g=g, vcov.=vi))["SE"])**2
-  }
-  return(ti)
-}
-
-restore.c.var.hanley2<- function(sd.LP, N.subjects, N.events, restore.method=4, g=NULL) {
-  cstat <- calculate.cstat.sdPI(sd.LP, g=g)
-  return(restore.c.var.hanley(cstat=cstat, N.subjects=N.subjects, N.events=N.events, restore.method=restore.method, g=g))
-}
-
-restore.c.var.se <- function(cstat, c.se, g=NULL) {
-  if(is.null(g)) {
-    return (c.se**2)
-  }
-  
-  ti <- rep(NA, length(cstat))
-  
-  for (i in 1:length(cstat)) {
-    ci <- cstat[i]
-    vi <- c.se[i]**2
-    names(ci) <- names(vi) <- "cstat"
-    ti[i] <- as.numeric((deltaMethod(object=ci, g=g, vcov.=vi))["SE"])**2
-  }
-  return(ti)
-}
-
-restore.c.var.ci <- function(cil, ciu, level, g=NULL) {
-  if (!is.null(g)) {
-    lower <- eval(parse(text = g), list(cstat = cil))
-    upper <- eval(parse(text = g), list(cstat = ciu))
-  } else {
-    lower <- cil
-    upper <- ciu
-  }
-  if (missing(level)) level <- rep(0.95, length(cil))
-  
-  return(((upper - lower)/(2*qnorm((1 - level)/2)))**2)
-}
-
-calculate.cstat.theta <- function(cstat, g = NULL) {
-  if (is.null(g)) {
-    return(cstat)
-  }
-  return(eval(parse(text = g), list(cstat = cstat)))
-}
-
-calculate.cstat.sdPI <- function (sdPI, g=NULL) {
-  myfun <- function(x, sd.lp) {
-    inv.logit(sqrt(2)*sd.lp*x)*dnorm(x, mean=0, sd=1)
-  }
-  cstat <- rep(NA, length(sdPI))
-  for (i in 1:length(sdPI)) {
-    cstat[i] <- ifelse(is.na(sdPI[i]), NA, 2*(integrate(myfun, lower=0, upper=+Inf, sd.lp=sdPI[i]))$value)
-  }
-  if(is.null(g)) {
-    return(cstat)
-  }
-  return(eval(parse(text=g), list(cstat = cstat)))
-}
-
-
-# See params of geom_smooth for more details
-# Smoothed calibration plot: use  formula = obsy ~ splines::bs(predy, 3)
-plotCalibration <- function(predy, obsy, modelname="Model", 
-                            formula = obsy ~ predy, 
-                            method="glm", se = se, 
-                            level=0.95, 
-                            fam=binomial, ...) {
-  #require(ggplot2)
-  #require(ggExtra) ## Do the extra outside the package
-  
-  # Use formula instead
-  
-  # Add density plot under gg plot
-  #predy<-rnorm(300)
-  #obsy<-rt(300,df=10)
-  #family <- gaussian
-  
-  xy <- data.frame(predy, obsy)
-  
-  scatter <- ggplot(xy, aes(x=predy, y=obsy)) +
-    labs(x = "Predicted", y="Observed") + 
-    scale_x_continuous(limits=c(min(predy),max(obsy))) + 
-    scale_y_continuous(limits=c(min(predy),max(obsy))) #+ 
-  
-  
-  # Add reference line for perfect calibration
-  scatter <- scatter + geom_abline(aes(slope=1, intercept=0, linetype="Perfect calibration"), size=1)
-  scatter <- scatter + geom_smooth(aes(linetype=modelname), method = method, 
-                                   se = se, level=level, method.args = list(family = fam), ...)
-
-  
-  #scatter <- scatter + ggMarginal(data = xy, x = "predy", y = "obsy", margins = "x", type="histogram", size=4)
-  #scatter <- scatter + labs(x = "Predicted", y="Observed") 
-  scatter
-}
-
+
+
+# Adapted from R package 'car' to avoid package loading issues in R-forge
+deltaMethod <- function(object, g, vcov., func = g, constants, level = 0.95, ...) {
+  if (!is.character(g)) 
+    stop("The argument 'g' must be a character string")
+  
+  para <- object         
+  g <- parse(text = g)
+  q <- length(para)
+  for (i in 1:q) {
+    assign(names(para)[i], para[i])
+  }
+  if (!missing(constants)) {
+    for (i in seq_along(constants)) assign(names(constants[i]), constants[[i]])}
+  est <- eval(g)
+  names(est) <- NULL
+  gd <- rep(0, q)
+  for (i in 1:q) {
+    gd[i] <- eval(D(g, names(para)[i]))
+  }
+  se.est <- as.vector(sqrt(t(gd) %*% vcov. %*% gd))
+  result <- data.frame(Estimate = est, SE = se.est, row.names = c(func))
+  result
+}
+
+# Adapted from matrixcalc
+hadamard.prod <- function (x, y) 
+{
+  if (!is.numeric(x)) {
+    stop("argument x is not numeric")
+  }
+  if (!is.numeric(y)) {
+    stop("argument y is not numeric")
+  }
+  if (is.matrix(x)) {
+    Xmat <- x
+  }
+  else {
+    if (is.vector(x)) {
+      Xmat <- matrix(x, nrow = length(x), ncol = 1)
+    }
+    else {
+      stop("argument x is neither a matrix or a vector")
+    }
+  }
+  if (is.matrix(y)) {
+    Ymat <- y
+  }
+  else {
+    if (is.vector(y)) {
+      Ymat <- matrix(y, nrow = length(x), ncol = 1)
+    }
+    else {
+      stop("argument x is neither a matrix or a vector")
+    }
+  }
+  if (nrow(Xmat) != nrow(Ymat)) 
+    stop("argumentx x and y do not have the same row order")
+  if (ncol(Xmat) != ncol(Ymat)) 
+    stop("arguments x and y do not have the same column order")
+  return(Xmat * Ymat)
+}
+
+calcPredInt <- function(x, sigma2, tau2, k, level = 0.95) {
+  pi.lb <- x + qt((1-level)/2, df=(k-2))*sqrt(tau2+sigma2)
+  pi.ub <- x + qt((1+level)/2, df=(k-2))*sqrt(tau2+sigma2)
+  out <- list(lower = as.numeric(pi.lb), upper = as.numeric(pi.ub))
+  return(out)
+}
+
+
+
+
+generateHyperparametersMA <- function(x, ...) {
+  # Generate initial values from the relevant distributions
+  model.pars <- list()
+  model.pars[[1]] <- list(param = "mu.tobs", 
+                          param.f = rnorm, 
+                          param.args = list(n = 1, 
+                                            mean = x$hp.mu.mean, 
+                                            sd = sqrt(x$hp.mu.var)))
+  
+  if (x$hp.tau.dist == "dunif") {
+    model.pars[[2]] <- list(param = "bsTau", 
+                            param.f = runif, 
+                            param.args = list(n = 1, 
+                                              min = x$hp.tau.min, 
+                                              max = x$hp.tau.max))
+  } else if (x$hp.tau.dist == "dhalft") {
+    model.pars[[2]] <- list(param = "bsTau", 
+                            param.f = rstudentt, 
+                            param.args = list(n = 1, 
+                                              mean = x$hp.tau.mean, 
+                                              sigma = x$hp.tau.sigma, 
+                                              df = x$hp.tau.df,
+                                              lower = x$hp.tau.min, 
+                                              upper = x$hp.tau.max))
+  } else {
+    stop("Invalid distribution for 'hp.tau.dist'!")
+  }
+  model.pars
+}
+
+
+restore.c.var.hanley <- function(cstat, N.subjects, N.events, restore.method=4, g=NULL) {
+  
+  fHanley <- function(cstat, nstar, mstar, m, n) {
+    ((cstat*(1-cstat)*(1+nstar*(1-cstat)/(2-cstat) + mstar*cstat/(1+cstat)))/(m*n))
+  }
+  
+  n <- N.events #Number of events
+  m <- N.subjects-N.events #Number of non-events
+  
+  if (restore.method==2) {
+    mstar <- m-1
+    nstar <- n-1
+  } else if (restore.method==4) {
+    mstar <- nstar <- N.subjects/2-1
+  } else {
+    stop ("Method not implemented yet!")
+  }
+  
+  # Crude estimate
+  cstat.var <- fHanley(cstat=cstat, m=m, n=n, mstar=mstar, nstar=nstar)
+  if (is.null(g)) {
+    return(cstat.var)
+  }
+  
+  # Apply delta method if a transformation for c is defined
+  ti <- rep(NA, length(cstat))
+  for (i in 1:length(cstat)) {
+    ci <- cstat[i]
+    vi <- cstat.var[i]
+    names(ci) <- names(vi) <- "cstat"
+    ti[i] <- as.numeric((deltaMethod(object=ci, g=g, vcov.=vi))["SE"])**2
+  }
+  return(ti)
+}
+
+
+restore.c.var.se <- function(cstat, c.se, g=NULL) {
+  if(is.null(g)) {
+    return (c.se**2)
+  }
+  
+  ti <- rep(NA, length(cstat))
+  
+  for (i in 1:length(cstat)) {
+    ci <- cstat[i]
+    vi <- c.se[i]**2
+    names(ci) <- names(vi) <- "cstat"
+    ti[i] <- as.numeric((deltaMethod(object=ci, g=g, vcov.=vi))["SE"])**2
+  }
+  return(ti)
+}
+
+restore.c.var.hanley2<- function(sd.LP, N.subjects, N.events, restore.method=4, g=NULL) {
+  cstat <- calculate.cstat.sdPI(sd.LP, g=g)
+  return(restore.c.var.hanley(cstat=cstat, N.subjects=N.subjects, N.events=N.events, restore.method=restore.method, g=g))
+}
+
+restore.c.var.se <- function(cstat, c.se, g=NULL) {
+  if(is.null(g)) {
+    return (c.se**2)
+  }
+  
+  ti <- rep(NA, length(cstat))
+  
+  for (i in 1:length(cstat)) {
+    ci <- cstat[i]
+    vi <- c.se[i]**2
+    names(ci) <- names(vi) <- "cstat"
+    ti[i] <- as.numeric((deltaMethod(object=ci, g=g, vcov.=vi))["SE"])**2
+  }
+  return(ti)
+}
+
+restore.c.var.ci <- function(cil, ciu, level, g=NULL) {
+  if (!is.null(g)) {
+    lower <- eval(parse(text = g), list(cstat = cil))
+    upper <- eval(parse(text = g), list(cstat = ciu))
+  } else {
+    lower <- cil
+    upper <- ciu
+  }
+  if (missing(level)) level <- rep(0.95, length(cil))
+  
+  return(((upper - lower)/(2*qnorm((1 - level)/2)))**2)
+}
+
+calculate.cstat.theta <- function(cstat, g = NULL) {
+  if (is.null(g)) {
+    return(cstat)
+  }
+  return(eval(parse(text = g), list(cstat = cstat)))
+}
+
+calculate.cstat.sdPI <- function (sdPI, g=NULL) {
+  myfun <- function(x, sd.lp) {
+    inv.logit(sqrt(2)*sd.lp*x)*dnorm(x, mean=0, sd=1)
+  }
+  cstat <- rep(NA, length(sdPI))
+  for (i in 1:length(sdPI)) {
+    cstat[i] <- ifelse(is.na(sdPI[i]), NA, 2*(integrate(myfun, lower=0, upper=+Inf, sd.lp=sdPI[i]))$value)
+  }
+  if(is.null(g)) {
+    return(cstat)
+  }
+  return(eval(parse(text=g), list(cstat = cstat)))
+}
+
+
+# See params of geom_smooth for more details
+# Smoothed calibration plot: use  formula = obsy ~ splines::bs(predy, 3)
+plotCalibration <- function(predy, obsy, modelname="Model", 
+                            formula = obsy ~ predy, 
+                            method="glm", se = se, 
+                            level=0.95, 
+                            fam=binomial, ...) {
+  #require(ggplot2)
+  #require(ggExtra) ## Do the extra outside the package
+  
+  # Use formula instead
+  
+  # Add density plot under gg plot
+  #predy<-rnorm(300)
+  #obsy<-rt(300,df=10)
+  #family <- gaussian
+  
+  xy <- data.frame(predy, obsy)
+  
+  scatter <- ggplot(xy, aes(x=predy, y=obsy)) +
+    labs(x = "Predicted", y="Observed") + 
+    scale_x_continuous(limits=c(min(predy),max(obsy))) + 
+    scale_y_continuous(limits=c(min(predy),max(obsy))) #+ 
+  
+  
+  # Add reference line for perfect calibration
+  scatter <- scatter + geom_abline(aes(slope=1, intercept=0, linetype="Perfect calibration"), size=1)
+  scatter <- scatter + geom_smooth(aes(linetype=modelname), method = method, 
+                                   se = se, level=level, method.args = list(family = fam), ...)
+
+  
+  #scatter <- scatter + ggMarginal(data = xy, x = "predy", y = "obsy", margins = "x", type="histogram", size=4)
+  #scatter <- scatter + labs(x = "Predicted", y="Observed") 
+  scatter
+}
+
diff --git a/README.Rmd b/README.Rmd
old mode 100644
new mode 100755
index 0806d7c..dc98bf1
--- a/README.Rmd
+++ b/README.Rmd
@@ -1,58 +1,58 @@
----
-output: github_document
----
-
-<!-- README.md is generated from README.Rmd. Please edit that file -->
-
-```{r, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>",
-  fig.path = "man/figures/README-",
-  out.width = "100%"
-)
-```
-
-# metamisc
-
-<!-- badges: start -->
-[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/metamisc)](https://cran.r-project.org/package=metamisc)
-[![metacran
-downloads](https://cranlogs.r-pkg.org/badges/last-month/precmed)](https://cran.r-project.org/package=metamisc)
-[![R-CMD-check](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml)
-<!-- badges: end -->
-
-
-
-This is the official repository of the R package **metamisc**, which was developed to facilitate meta-analysis of diagnosis and prognosis research studies. The package includes functions for the following tasks:
-
-* To develop and validate multivariable prediction models from datasets with clustering (de Jong et al., 2021)
-* To summarize multiple estimates of prediction model discrimination and calibration performance (Debray et al., 2019)
-* To evaluate funnel plot asymmetry (Debray et al., 2018)
-
-
-## Installation
-
-The `metamisc` package can be installed from CRAN as follows:
-
-``` r
-install.packages("metamisc")
-```
-
-You can install the development version of metamisc from [GitHub](https://github.com/) with:
-
-``` r
-# install.packages("devtools")
-devtools::install_github("smartdata-analysis-and-statistics/metamisc")
-```
-
-## JASP
-A visual interface to the software has been implemented by JASP <https://jasp-stats.org/>
-
-## References
-de Jong VMT, Moons KGM, Eijkemans MJC, Riley RD, Debray TPA. Developing more generalizable prediction models from pooled studies and large clustered data sets. *Stat Med*. 2021 May 5;40(15):3533–59. 
-
-Debray TPA, Moons KGM, Riley RD. Detecting small-study effects and funnel plot asymmetry in meta-analysis of survival data: a comparison of new and existing tests. *Res Syn Meth*. 2018;9(1):41–50. 
-
-Debray TPA, Damen JAAG, Riley R, Snell KIE, Reitsma JB, Hooft L, et al. A framework for meta-analysis of prediction model studies with binary and time-to-event outcomes. *Stat Methods Med Res*. 2019 Sep;28(9):2768–86. 
-
+---
+output: github_document
+---
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.path = "man/figures/README-",
+  out.width = "100%"
+)
+```
+
+# metamisc
+
+<!-- badges: start -->
+[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/metamisc)](https://cran.r-project.org/package=metamisc)
+[![metacran
+downloads](https://cranlogs.r-pkg.org/badges/last-month/precmed)](https://cran.r-project.org/package=metamisc)
+[![R-CMD-check](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml)
+<!-- badges: end -->
+
+
+
+This is the official repository of the R package **metamisc**, which was developed to facilitate meta-analysis of diagnosis and prognosis research studies. The package includes functions for the following tasks:
+
+* To develop and validate multivariable prediction models from datasets with clustering (de Jong et al., 2021)
+* To summarize multiple estimates of prediction model discrimination and calibration performance (Debray et al., 2019)
+* To evaluate funnel plot asymmetry (Debray et al., 2018)
+
+
+## Installation
+
+The `metamisc` package can be installed from CRAN as follows:
+
+``` r
+install.packages("metamisc")
+```
+
+You can install the development version of metamisc from [GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("smartdata-analysis-and-statistics/metamisc")
+```
+
+## JASP
+A visual interface to the software has been implemented by JASP <https://jasp-stats.org/>
+
+## References
+de Jong VMT, Moons KGM, Eijkemans MJC, Riley RD, Debray TPA. Developing more generalizable prediction models from pooled studies and large clustered data sets. *Stat Med*. 2021 May 5;40(15):3533–59. 
+
+Debray TPA, Moons KGM, Riley RD. Detecting small-study effects and funnel plot asymmetry in meta-analysis of survival data: a comparison of new and existing tests. *Res Syn Meth*. 2018;9(1):41–50. 
+
+Debray TPA, Damen JAAG, Riley R, Snell KIE, Reitsma JB, Hooft L, et al. A framework for meta-analysis of prediction model studies with binary and time-to-event outcomes. *Stat Methods Med Res*. 2019 Sep;28(9):2768–86. 
+
diff --git a/README.md b/README.md
index 7ed9a36..2d7823b 100644
--- a/README.md
+++ b/README.md
@@ -1,59 +1,59 @@
-
-<!-- README.md is generated from README.Rmd. Please edit that file -->
-
-# metamisc
-
-<!-- badges: start -->
-
-[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/metamisc)](https://cran.r-project.org/package=metamisc)
-[![metacran
-downloads](https://cranlogs.r-pkg.org/badges/last-month/precmed)](https://cran.r-project.org/package=metamisc)
-[![R-CMD-check](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml)
-<!-- badges: end -->
-
-This is the official repository of the R package **metamisc**, which was
-developed to facilitate meta-analysis of diagnosis and prognosis
-research studies. The package includes functions for the following
-tasks:
-
-- To develop and validate multivariable prediction models from datasets
-  with clustering (de Jong et al., 2021)
-- To summarize multiple estimates of prediction model discrimination and
-  calibration performance (Debray et al., 2019)
-- To evaluate funnel plot asymmetry (Debray et al., 2018)
-
-## Installation
-
-The `metamisc` package can be installed from CRAN as follows:
-
-``` r
-install.packages("metamisc")
-```
-
-You can install the development version of metamisc from
-[GitHub](https://github.com/) with:
-
-``` r
-# install.packages("devtools")
-devtools::install_github("smartdata-analysis-and-statistics/metamisc")
-```
-
-## JASP
-
-A visual interface to the software has been implemented by JASP
-<https://jasp-stats.org/>
-
-## References
-
-de Jong VMT, Moons KGM, Eijkemans MJC, Riley RD, Debray TPA. Developing
-more generalizable prediction models from pooled studies and large
-clustered data sets. *Stat Med*. 2021 May 5;40(15):3533–59.
-
-Debray TPA, Moons KGM, Riley RD. Detecting small-study effects and
-funnel plot asymmetry in meta-analysis of survival data: a comparison of
-new and existing tests. *Res Syn Meth*. 2018;9(1):41–50.
-
-Debray TPA, Damen JAAG, Riley R, Snell KIE, Reitsma JB, Hooft L, et
-al. A framework for meta-analysis of prediction model studies with
-binary and time-to-event outcomes. *Stat Methods Med Res*. 2019
-Sep;28(9):2768–86.
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+# metamisc
+
+<!-- badges: start -->
+
+[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/metamisc)](https://cran.r-project.org/package=metamisc)
+[![metacran
+downloads](https://cranlogs.r-pkg.org/badges/last-month/precmed)](https://cran.r-project.org/package=metamisc)
+[![R-CMD-check](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/smartdata-analysis-and-statistics/metamisc/actions/workflows/R-CMD-check.yaml)
+<!-- badges: end -->
+
+This is the official repository of the R package **metamisc**, which was
+developed to facilitate meta-analysis of diagnosis and prognosis
+research studies. The package includes functions for the following
+tasks:
+
+- To develop and validate multivariable prediction models from datasets
+  with clustering (de Jong et al., 2021)
+- To summarize multiple estimates of prediction model discrimination and
+  calibration performance (Debray et al., 2019)
+- To evaluate funnel plot asymmetry (Debray et al., 2018)
+
+## Installation
+
+The `metamisc` package can be installed from CRAN as follows:
+
+``` r
+install.packages("metamisc")
+```
+
+You can install the development version of metamisc from
+[GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("smartdata-analysis-and-statistics/metamisc")
+```
+
+## JASP
+
+A visual interface to the software has been implemented by JASP
+<https://jasp-stats.org/>
+
+## References
+
+de Jong VMT, Moons KGM, Eijkemans MJC, Riley RD, Debray TPA. Developing
+more generalizable prediction models from pooled studies and large
+clustered data sets. *Stat Med*. 2021 May 5;40(15):3533–59.
+
+Debray TPA, Moons KGM, Riley RD. Detecting small-study effects and
+funnel plot asymmetry in meta-analysis of survival data: a comparison of
+new and existing tests. *Res Syn Meth*. 2018;9(1):41–50.
+
+Debray TPA, Damen JAAG, Riley R, Snell KIE, Reitsma JB, Hooft L, et
+al. A framework for meta-analysis of prediction model studies with
+binary and time-to-event outcomes. *Stat Methods Med Res*. 2019
+Sep;28(9):2768–86.
diff --git a/_pkgdown.yml b/_pkgdown.yml
old mode 100644
new mode 100755
diff --git a/data/Tzoulaki.rda b/data/Tzoulaki.rda
old mode 100644
new mode 100755
diff --git a/man/Tzoulaki.Rd b/man/Tzoulaki.Rd
old mode 100644
new mode 100755
index 41dd74c..115954c
--- a/man/Tzoulaki.Rd
+++ b/man/Tzoulaki.Rd
@@ -1,48 +1,48 @@
-\name{Tzoulaki}
-\alias{Tzoulaki}
-\docType{data}
-\title{
-The incremental value of cardiovascular risk factors
-}
-\description{
-%%  ~~ A concise (1-5 lines) description of the dataset. ~~
-Tzoulaki et al. (2009) reviewed studies that evaluated various candidate prognostic factors in their ability to improve prediction of coronary heart disease (CHD) or other outcomes beyond what the Framingham risk score (FRS) can achieve.
-
-}
-\usage{data("Tzoulaki")}
-\format{
-  A data frame containing data from 27 studies on the following 2 variables.
-  \describe{
-    \item{\code{PubmedID}}{a character vector with the Pubmed ID of the study}
-    \item{\code{N}}{a numeric vector describing the study size}
-    \item{\code{N.events}}{a numeric vector describing the observed number of events}
-    \item{\code{FRS.orig.refitted}}{a boolean vector describing whether the coefficients of the original Framingham Risk Score (FRS) were re-estimated}
-    \item{\code{FRS.modif.refitted}}{a boolean vector describing whether the coefficients of the modified Framingham Risk Score were re-estimated}
-    \item{\code{predictors}}{a character vector indicating which new risk factor(s) were included in the modified FRS }
-    \item{\code{outcome}}{a character vector indicating the primary outcome being predicted}
-    \item{\code{AUC.orig}}{a numeric vector describing the Area under the ROC curve (AUC) of the original FRS model}
-    \item{\code{AUC.orig.CIl}}{a numeric vector describing the lower boundary of the 95\% confidence interval of the AUC of the original FRS model}
-    \item{\code{AUC.orig.CIu}}{a numeric vector describing the upper boundary of the 95\% confidence interval of the AUC of the original FRS model}
-    \item{\code{AUC.modif}}{a numeric vector describing the Area under the ROC curve (AUC) of the modified FRS model that includes one or more new risk factors}
-    \item{\code{AUC.modif.CIl}}{a numeric vector describing the lower boundary of the 95\% confidence interval of the AUC of the modified FRS model}
-    \item{\code{AUC.modif.CIu}}{a numeric vector describing the upper boundary of the 95\% confidence interval of the AUC of the modified FRS model}
-    \item{\code{pval.AUCdiff}}{a numeric vector with the p-value of the difference between \code{AUC.orig} and \code{AUC.modif}}
-    \item{\code{sign.AUCdiff}}{a boolean vector indicating whether the difference between \code{AUC.orig} and \code{AUC.modif} is below 0.05}
-  }
-}
-\details{
-%%  ~~ If necessary, more details than the __description__ above ~~
-}
-\source{
-%%  ~~ reference to a publication or URL from which the data were obtained ~~
-Tzoulaki I, Liberopoulos G, Ioannidis JPA. Assessment of claims of improved prediction beyond the Framingham risk score.  \emph{JAMA}. 2009 Dec 2;302(21):2345–52. 
-
-}
-\references{
-%%  ~~ possibly secondary sources and usages ~~
-}
-\examples{
-data(Tzoulaki)
-## maybe str(Tzoulaki) ; plot(Tzoulaki) ...
-}
-\keyword{datasets}
+\name{Tzoulaki}
+\alias{Tzoulaki}
+\docType{data}
+\title{
+The incremental value of cardiovascular risk factors
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of the dataset. ~~
+Tzoulaki et al. (2009) reviewed studies that evaluated various candidate prognostic factors in their ability to improve prediction of coronary heart disease (CHD) or other outcomes beyond what the Framingham risk score (FRS) can achieve.
+
+}
+\usage{data("Tzoulaki")}
+\format{
+  A data frame containing data from 27 studies on the following 2 variables.
+  \describe{
+    \item{\code{PubmedID}}{a character vector with the Pubmed ID of the study}
+    \item{\code{N}}{a numeric vector describing the study size}
+    \item{\code{N.events}}{a numeric vector describing the observed number of events}
+    \item{\code{FRS.orig.refitted}}{a boolean vector describing whether the coefficients of the original Framingham Risk Score (FRS) were re-estimated}
+    \item{\code{FRS.modif.refitted}}{a boolean vector describing whether the coefficients of the modified Framingham Risk Score were re-estimated}
+    \item{\code{predictors}}{a character vector indicating which new risk factor(s) were included in the modified FRS }
+    \item{\code{outcome}}{a character vector indicating the primary outcome being predicted}
+    \item{\code{AUC.orig}}{a numeric vector describing the Area under the ROC curve (AUC) of the original FRS model}
+    \item{\code{AUC.orig.CIl}}{a numeric vector describing the lower boundary of the 95\% confidence interval of the AUC of the original FRS model}
+    \item{\code{AUC.orig.CIu}}{a numeric vector describing the upper boundary of the 95\% confidence interval of the AUC of the original FRS model}
+    \item{\code{AUC.modif}}{a numeric vector describing the Area under the ROC curve (AUC) of the modified FRS model that includes one or more new risk factors}
+    \item{\code{AUC.modif.CIl}}{a numeric vector describing the lower boundary of the 95\% confidence interval of the AUC of the modified FRS model}
+    \item{\code{AUC.modif.CIu}}{a numeric vector describing the upper boundary of the 95\% confidence interval of the AUC of the modified FRS model}
+    \item{\code{pval.AUCdiff}}{a numeric vector with the p-value of the difference between \code{AUC.orig} and \code{AUC.modif}}
+    \item{\code{sign.AUCdiff}}{a boolean vector indicating whether the difference between \code{AUC.orig} and \code{AUC.modif} is below 0.05}
+  }
+}
+\details{
+%%  ~~ If necessary, more details than the __description__ above ~~
+}
+\source{
+%%  ~~ reference to a publication or URL from which the data were obtained ~~
+Tzoulaki I, Liberopoulos G, Ioannidis JPA. Assessment of claims of improved prediction beyond the Framingham risk score.  \emph{JAMA}. 2009 Dec 2;302(21):2345–52. 
+
+}
+\references{
+%%  ~~ possibly secondary sources and usages ~~
+}
+\examples{
+data(Tzoulaki)
+## maybe str(Tzoulaki) ; plot(Tzoulaki) ...
+}
+\keyword{datasets}
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
old mode 100644
new mode 100755
diff --git a/vignettes/ma-pf.Rmd b/vignettes/ma-pf.Rmd
old mode 100644
new mode 100755
diff --git a/vignettes/ma-pf.knit.md b/vignettes/ma-pf.knit.md
deleted file mode 100644
index 4d35f38..0000000
--- a/vignettes/ma-pf.knit.md
+++ /dev/null
@@ -1,485 +0,0 @@
----
-title: "Meta-analysis of prognostic factors"
-author:
-  - name: Thomas Debray
-    orcid: 0000-0002-1790-2719
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{Meta-analysis of prognostic factors}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
-bibliography: "https://api.citedrive.com/bib/093a08ac-0337-479e-a147-17929fa7f7b0/references.bib?x=eyJpZCI6ICIwOTNhMDhhYy0wMzM3LTQ3OWUtYTE0Ny0xNzkyOWZhN2Y3YjAiLCAidXNlciI6ICI0MDA5IiwgInNpZ25hdHVyZSI6ICIwOWJiZjg2ZDJiNjk1NmQ1ODVhZTMxZmZiODI0OTgyNWYxYTEzNTQ5MzdiZjBiNzJiZjY3MTlhMTE2NGJhNzQ3In0=/bibliography.bib"
----
-
-
-
-
-
-
-# Introduction
-
-An important task in medical research is the identification and assessment of prognostic factors. A prognostic factor is any measure that, among people with a given health condition (that is, a startpoint), is associated with a subsequent clinical outcome (an endpoint) [@riley_prognosis_2013]. Commonly investigated prognostic factors include simple measures such as age, sex, and size of tumor, but they can also include more complex factors such as abnormal levels of proteins or catecholamines and unusual genetic mutations [@sauerbrei_evidence-based_2006]. They can be useful as modifiable targets for interventions to improve outcomes, building blocks for prognostic models, or predictors of differential treatment response.
-
-Over the past few decades, numerous prognostic factor studies have been published in the medical literature. For example, @riley_reporting_2003 identified 260 studies reporting associations for 130 different tumour markers for neuroblastoma. More recently, @tzoulaki_assessment_2009 identified 79 studies reporting the added value of 86 different markers when added to the Framingham risk score. Despite this huge research effort, the prognostic value of most traditional factors under discussion is uncertain and the usefulness of many specific markers, prognostic indices, and classification schemes is still unproven [@sauerbrei_evidence-based_2006].
-
-This vignette aims to illustrate how the results from multiple prognostic factor studies can be summarized and how sources of between-study heterogeneity can be examined. Hereto, we will use the R packages  **metamisc** and **metafor**. The [https://cran.r-project.org/package=metafor](metafor) package provides a comprehensive collection of functions for conducting meta-analyses in R. The [https://cran.r-project.org/package=metamisc](metamisc) package provides additional functions to facilitate meta-analysis of prognosis studies. We can load these packages as follows:
-
-
-```r
-library(metafor)
-library(metamisc)
-```
-
-# Case Study
-
-Endometrial cancer (EC) is the fourth most common malignancy in women and the most common gynecologic cancer. Overall, the 5-year survival rates for EC are approximately 78--90% for stage I, 74% for stage II, 36--57% for stage III, and 20% for stage IV. Such poor outcomes raise an urgent requirement that more accurate prognosis and predictive markers should be applied for EC to guide the therapy and monitor the disease progress for individual patients.
-
-Several biological molecules have been proposed as prognostic biomarkers in EC. Among them, hormone receptors such as estrogen receptors (ER), progesterone receptors (PR), and human epidermal growth factor receptor 2 (HER2) are attractive because of their physiological functions. Recently, Zhang \emph{et al.} conducted a systematic review to evaluate the overall risk of these hormone receptors for endometrial cancer survival [@zhang_prognostic_2015]. This review included 16 studies recruiting 1764 patients for HER2. For each study, estimates of effect were retrieved as follows. The simplest method consisted in the direct collection of HR, odds ratio or risk ratio, and their 95% CI from the original article, with an HR of less than 1 being associated with a better outcome. If not available, the total numbers of observed deaths/cancer recurrences and the numbers of patients in each group were extracted to calculate HR. When data were only available as Kaplan-Meier curves, data were extracted from the graphical survival plots, and estimation of the HR was then performed using the described method.
-
-We can load the data from all 16 studies in R as follows:
-
-
-```r
-data(Zhang)
-```
-
-This creates an object `Zhang` that contains the summary data from 14 studies reporting on overall survival (OS) and from 6 studies reporting on progression-free survival (PFS). A total of 14 studies assessed the relation between HER2 and overall survival. The corresponding hazard
-ratios (HR) are given below:
-
-
-|Study                  |   Country   | Disease |   N| Hazard Ratio (95% CI) |
-|:----------------------|:-----------:|:-------:|---:|:---------------------:|
-|Gonzalez-Rodilla, 2013 |    Spain    |   EC    | 126|   1.14 (0.52; 2.52)   |
-|Togami, 2012           |    Japan    |  UPSC   |  71|    3 (1.15; 7.01)     |
-|Mori, 2010             |    Japan    |   EEC   |  63|   2.58 (1.03; 7.92)   |
-|Jongen, 2009           | Netherlands |   EEC   | 315|   0.66 (0.1; 4.28)    |
-|Konecny, 2009          |     USA     |   EC    | 273|   2.13 (1.65; 2.73)   |
-|Odicino, 2008          |    Italy    |  UPSC   |  10|   1.6 (0.89; 2.89)    |
-|Santin, 2005           |     USA     |  UPSC   |  30|  5.19 (1.52; 17.64)   |
-|Santin, 2005           |     USA     |  UPSC   |  27|  12.43 (1.57; 98.28)  |
-|Backe, 1997            |   Germany   |   EC    | 222|   1.17 (0.93; 1.49)   |
-|Kohlberger, 1996       |  Australia  |   EC    | 100|  3.95 (1.07; 14.69)   |
-|Saffari, 1995          |     USA     |   EC    |  75|  5.25 (1.33; 20.74)   |
-|Peiro, 2004            |   Germany   |   EC    |  10|   1.64 (1.04; 2.57)   |
-|Cianciulli, 2003       |    Italy    |   EC    |  73|   1.9 (0.83; 4.36)    |
-|Gates, 2006            |     USA     |   EC    |  99|    4 (0.77; 20.8)     |
-
-Results suggest that the hormone receptor HER2 has prognostic value for survival but is prone to substantial between-study heterogeneity. For example, hazard ratios appear much larger for studies that were conducted in the USA. Possibly, the variation in treatment effect estimates is caused by differences in the baseline characteristics of patients (age, tumor stage, race), differences in the cutoff value of HER2, differences in received treatments, or differences in the duration of follow-up. Importantly, because estimated hazard ratios have not been adjusted for any patient-level covariates, they are particularly prone to heterogeneity in patient spectrum.
-
-
-
-# First steps
-
-To facilitate any quantitative analysis, information on the standard error of the different study effect sizes is needed. Estimates for the standard error can be obtained from the reported 95% confidence intervals [@altman_how_2011]. It is then commonly assumed that the log hazard ratio follows a Normal distribution, such that he standard error (SE) of the log hazard ratio is given as:
-
-$\mathrm{SE}=(\log(u)-\log(l))/(2*1.96)$
-
-where the upper and lower limits of the 95% CI are $u$ and $l$ respectively. We can implement this calculation as follows:
-
-
-```r
-Zhang <- Zhang %>% mutate(logHR = log(HR), 
-                          se.logHR =  log(HR.975/HR.025) / (2*qnorm(0.975))
-                          )
-```
-
-It is often helpful to display the effect sizes of all studies in a forest plot. An advantage of the forest plot is that allows a visual inspection of the available evidence and facilitates the identification of potential between-study heterogeneity. The forest plot for overall survival can then be generated using the `forest` function in **metamisc**. This requires to provide information on the estimated hazard ratios (via the argument `theta`), as well as the bounds of their 95% confidence interval (via `theta.ci.lb` and `theta.ci.ub`). 
-
-
-```r
-library(dplyr)
-
-# Select the 14 studies investigating overall survival
-dat_os <- Zhang %>% filter(outcome == "OS")
-
-# Generate a forest plot of the log hazard ratio
-metamisc::forest(theta = dat_os$HR, 
-                 theta.ci.lb = dat_os$HR.025, 
-                 theta.ci.ub = dat_os$HR.975, 
-                 theta.slab = dat_os$Study, 
-                 xlab = "Hazard ratio of HER2 versus OS", 
-                 refline = 1)
-```
-
-![](/private/var/folders/k_/zk47l71x1z7dz5knt5bzg3c00000gn/T/Rtmp6sGVII/preview-13f596bdd6d2d.dir/ma-pf_files/figure-html/unnamed-chunk-6-1.png)<!-- -->
-
-We can also generate a forest plot using **metafor**:
-
-
-```r
-metafor::forest(x = dat_os$HR, 
-                ci.lb = dat_os$HR.025, 
-                ci.ub = dat_os$HR.975, 
-                slab = dat_os$Study,
-                xlab = "Hazard ratio of HER2 versus OS", 
-                refline = 1)
-```
-
-![](/private/var/folders/k_/zk47l71x1z7dz5knt5bzg3c00000gn/T/Rtmp6sGVII/preview-13f596bdd6d2d.dir/ma-pf_files/figure-html/unnamed-chunk-7-1.png)<!-- -->
-
-# Assessment of publication bias
-
-The presence of small-study effects is a common threat to systematic reviews and meta-analyses. Small-study effects is a generic term for the phenomenon that sometimes smaller studies show different, often stronger, effects than the large ones [@debray_detecting_2018]. One possible reason is publication bias. Previously, @altman_systematic_2001 argued that it is probable that studies showing a strong (often statistically significant) prognostic ability are more likely to be published. For this reason, it is important to evaluate the potential presence of small-study effects, which can be verified by visual inspection of the funnel plot. In this plot, the estimate of the reported effect size is plotted against a measure of precision or sample size for each included study of the meta-analysis. The premise is that the scatter of plots should reflect a funnel shape, if small-study effects do not exist (provided that effect sizes are not substantially affected by the presence of between-study heterogeneity). However, when small studies are predominately in one direction (usually the direction of larger effect sizes), asymmetry will ensue.
-
-A common approach to construct the funnel plot is to display the individual observed effect sizes on the x-axis against the corresponding standard errors on the y-axis, and to use the fixed effect summary estimate as reference value. In the absence of publication bias and heterogeneity, one would then expect to see the points forming a funnel shape, with the majority of the points falling inside of the pseudo-confidence region of the summary estimate.
-
-
-```r
-res <- rma(yi = logHR, sei = se.logHR, method = "FE", data = dat_os)
-funnel(res, xlab = "Log Hazard Ratio")
-```
-
-![](/private/var/folders/k_/zk47l71x1z7dz5knt5bzg3c00000gn/T/Rtmp6sGVII/preview-13f596bdd6d2d.dir/ma-pf_files/figure-html/unnamed-chunk-8-1.png)<!-- -->
-
-In the case study, most study estimates fall within the pseudo-confidence region, hence there appears limited evidence for publication bias.
-
-We can formally test the presence of asymmetry in the funnel plot by evaluating whether there is an association between the estimated standard error and the estimated effect size.
-
-
-```r
-regtest(x = dat_os$logHR, 
-        sei = dat_os$se.logHR, 
-        model = "lm", 
-        predictor = "sei")
-```
-
-```
-## 
-## Regression Test for Funnel Plot Asymmetry
-## 
-## Model:     weighted regression with multiplicative dispersion
-## Predictor: standard error
-## 
-## Test for Funnel Plot Asymmetry: t = 2.1622, df = 12, p = 0.0515
-## Limit Estimate (as sei -> 0):   b = 0.2590 (CI: -0.0760, 0.5939)
-```
-
-
-
-It is common to use a 10% level of significance because the number of studies in a meta-analysis is usually low. In the case study, the P-value is 0.052, which implies there is evidence for funnel plot asymmetry . 
-
-Funnel plot asymmetry tests can also be performed using **metamisc** as follows:
-
-
-```r
-regfit <- fat(b = dat_os$logHR, 
-              b.se = dat_os$se.logHR, 
-              method = "E-FIV")
-```
-
-which yields
-
-
-```
-## Call: fat(b = dat_os$logHR, b.se = dat_os$se.logHR, method = "E-FIV")
-## 
-## Fixed effect summary estimate:  0.5193  
-## 
-## test for funnel plot asymmetry: t =2.1622, df = 12, p = 0.0515
-```
-
-Again, we can construct a funnel plot:
-
-
-```r
-plot(regfit)
-```
-
-![](/private/var/folders/k_/zk47l71x1z7dz5knt5bzg3c00000gn/T/Rtmp6sGVII/preview-13f596bdd6d2d.dir/ma-pf_files/figure-html/unnamed-chunk-13-1.png)<!-- -->
-
-Some caution is warranted when interpreting the results for funnel plot asymmetry tests [@debray_detecting_2018]. First, the power to detect asymmetry is often limited because meta-analyses usually do not include many studies. Second, many tests are known to yield inadequate type-I error rates or to suffer from low power. For instance, it has been demonstrated that aforementioned test to evaluate the association between the estimated standard error and effect size tends to yield type-I error rates that are too high. Finally, funnel plot asymmetry may rather be caused by heterogeneity than by publication bias. We therefore adjust aforementioned regression test to use inverse of the total sample size (rather than the standard error) as predictor.
-
-
-```r
-regtest(x = dat_os$logHR, 
-        sei = dat_os$se.logHR, 
-        ni = dat_os$N, 
-        model = "lm", 
-        predictor = "ninv")
-```
-
-```
-## 
-## Regression Test for Funnel Plot Asymmetry
-## 
-## Model:     weighted regression with multiplicative dispersion
-## Predictor: inverse of the sample size
-## 
-## Test for Funnel Plot Asymmetry: t = 0.1552, df = 12, p = 0.8793
-## Limit Estimate (as ni -> inf):  b = 0.5088 (CI: 0.2226, 0.7950)
-```
-
-From here onwards, we will assume that the potential for publication bias is negligible, and proceed with standard meta-analysis methods.
-
-# Meta-analysis of the prognostic value of HER2
-
-Meta-analysis is an option when the identified studies are considered sufficiently robust and comparable, and requires there are at least two estimates of the same statistic across studies. A random effects approach is often essential to allow for unexplained heterogeneity across studies due to differences in their methods, time-scale, populations, cut-points, adjustment factors, and treatments.
-
-A standard random effects meta-analysis combines the study estimates of the statistic of interest (here given as the log HR of HER2) in order to estimate the average effect (denoted by $\mu$) and its standard deviation (denoted by $\tau$) across studies. If $\hat b_i$ and $\mathrm{var}(\hat b_i)$ denote the estimate and its variance in study $i$, then a general random effects meta-analysis model can be specified as:
-
-$\hat b_i \sim N(\mu, \mathrm{var}(\hat b_i) + \tau^2)$
-
-It is common to first estimate the heterogeneity parameter $\tau$ and to use the resulting value to estimate $\mu$. However, such approach does not adequately reflect the error associated with parameter estimation, especially when the number of studies is small. For this reason, alternative estimators have been proposed that simultaneously estimate $\mu$ and $\tau$. Here, we will focus on Restricted Maximum Likelihood Estimation (REML), which is implemented as default in `metafor`.
-
-
-```r
-resREML <- rma(yi = logHR, sei = se.logHR, method = "REML", 
-               slab = Study, data = dat_os)
-resREML
-```
-
-```
-## 
-## Random-Effects Model (k = 14; tau^2 estimator: REML)
-## 
-## tau^2 (estimated amount of total heterogeneity): 0.0883 (SE = 0.0854)
-## tau (square root of estimated tau^2 value):      0.2972
-## I^2 (total heterogeneity / total variability):   49.17%
-## H^2 (total variability / sampling variability):  1.97
-## 
-## Test for Heterogeneity:
-## Q(df = 13) = 28.9214, p-val = 0.0067
-## 
-## Model Results:
-## 
-## estimate      se    zval    pval   ci.lb   ci.ub      
-##   0.6669  0.1354  4.9251  <.0001  0.4015  0.9324  *** 
-## 
-## ---
-## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-```
-
-The pooled estimate for the log hazard ratio is 0.667 with a standard error of 0.135. The between-study standard deviation of the log hazard ratio is 0.297. We can extract the key statistics as follows:
-
-
-```r
-# Summary estimate of the log hazard ratio for HER2
-mu <- resREML$b
-
-# 95% confidence interval of the pooled log hazard ratio
-mu.ci <- c(resREML$ci.lb, resREML$ci.ub)
-
-# Between-study variance of the log hazard ratio
-tau2 <- resREML$tau2
-
-# Error variance of the pooled log hazard ratio
-sigma2 <- as.numeric(vcov(resREML))
-
-# Number of studies contributing to the meta-analyis
-numstudies <- resREML$k.all
-```
-
-We can use the information above to derive the summary estimate for the hazard ratio and its corresponding 95% confidence interval:
-
-
-```r
-exp(mu)
-```
-
-```
-##             [,1]
-## intrcpt 1.948268
-```
-
-```r
-exp(mu.ci)
-```
-
-```
-## [1] 1.494104 2.540483
-```
-
-The summary HR of HER2 is statistically significant, indicating that increased levels of HER2 are associated with poorer survival. We can also obtain the summary estimate and 95% CI for the HR of HER2 by simply using the `predict` function:
-
-
-```r
-predict(resREML, transf = exp)
-```
-
-```
-## 
-##    pred  ci.lb  ci.ub  pi.lb  pi.ub 
-##  1.9483 1.4941 2.5405 1.0272 3.6954
-```
-
-Although the summary result ($\hat \mu$) is usually the main focus of a meta-analysis, this reflects some average across studies and it may be hard to translate to clinical practice when there is large between-study heterogeneity. We can quantify the impact of between-study heterogeneity by constructing a $100(1-\alpha/2)$% prediction interval, which gives the potential true prognostic effect in a new population conditional on the meta-analysis results [@riley_interpretation_2011]. An approximate prediction interval (PI) is given as follows:
-
-$\hat \mu \pm t_{\alpha, N-2} \sqrt{\hat \tau^2 + \hat \sigma^2}$
-
-where $t_{\alpha, N-2}$ is the $100(1-\alpha/2)$% percentile of the t-distribution for $N-2$ degrees of freedom, $N$ is the number of studies, $\hat \sigma$ is the estimated standard error of $\hat \mu$, and $\hat \tau$ is the estimated between-study standard deviation. In `R`, can calculate the approximate 95% PI for $\hat \mu$ as follows:
-
-
-```r
-level <- 0.05
-crit <- qt(c(level/2, 1-(level/2)), df = (numstudies - 2))
-
-pi_lower <- exp(mu + crit[1] * sqrt(tau2 + sigma2))
-pi_upper <- exp(mu + crit[2] * sqrt(tau2 + sigma2))
-
-c(pi_lower, pi_upper)
-```
-
-```
-## [1] 0.9563084 3.9691662
-```
-
-The 95% prediction interval ranges from 0.956 to 3.969, and suggests there is substantial heterogeneity in the prognostic value of HER2. In particular, although increased levels of HER2 are generally associated with poorer survival, they may also lead to improved survival (HR < 1) in certain settings. We can add the summary estimate and prediction interval to the forest plot:
-
-
-```r
-# Generate a forest plot of the log hazard ratio
-metamisc::forest(theta = dat_os$HR, 
-                 theta.ci.lb = dat_os$HR.025, 
-                 theta.ci.ub = dat_os$HR.975, 
-                 theta.slab = dat_os$Study, 
-                 theta.summary = exp(mu),
-                 theta.summary.ci.lb = exp(mu.ci[1]),
-                 theta.summary.ci.ub = exp(mu.ci[2]),
-                 theta.summary.pi.lb = pi_lower,
-                 theta.summary.pi.ub = pi_upper,
-                 xlab = "Hazard ratio of HER2 versus OS", 
-                 refline = 1)
-```
-
-![](/private/var/folders/k_/zk47l71x1z7dz5knt5bzg3c00000gn/T/Rtmp6sGVII/preview-13f596bdd6d2d.dir/ma-pf_files/figure-html/unnamed-chunk-20-1.png)<!-- -->
-
-A possible approach to enhance the interpretation of meta-analysis results is to calculate the probability that the prognostic effect of HER2 will be above some useful value (e.g. a HR \> 1.5 for a binary factor, which indicates risk is increased by at least 50%) in a new setting. We can calculate this probability as follows:
-
-$Pr(\mathrm{HR} > 1.5) = Pr(\hat \mu > \log(1.5)) = 1 - Pr(\hat \mu \leq \log(1.5))$
-
-where $Pr(\hat \mu \leq \log(1.5))$ is approximated using a scaled Student-$t$ distribution (similar to the calculation of our prediction interval):
-
-
-```r
-probOS <- 1 - pt((log(1.5) - mu)/sqrt(tau2 + sigma2), df = (numstudies - 2))
-probOS
-```
-
-```
-##              [,1]
-## intrcpt 0.7805314
-```
-
-The probability that HER2 will yield a hazard ratio for overall survival of at least 1.5 in a new setting is 78%. This means that despite the presence of between-study heterogeneity, it is likely that HER2 will provide substantial discriminative ability when used as a single prognostic factor in a new setting. We can also estimate this probability by means of simulation:
-
-
-```r
-# Simulate 100000 new studies
-Nsim <- 1000000
-
-# Random draws from a Student T distribution
-rnd_t <- rt(Nsim, df = (numstudies - 2))
-
-# Generate 1,000,000 hazard ratios
-HRsim <- exp(c(mu) + rnd_t*sqrt(tau2 + sigma2))
-
-# Calculate the proportion of hazard ratios greater than  1.5
-mean(HRsim > 1.5)
-```
-
-```
-## [1] 0.780081
-```
-
-Again, the probability that HER2 will yield a hazard ratio for overall survival of >1.5 in a new setting is 78%.
-
-# Multivariate meta-analysis
-
-In previous section, we used 14 of the 16 identified studies to evaluate the prognostic effect of HER2 on overall survival. Two studies were excluded from the meta-analysis because they did not provide direct evidence about overall survival. This is unwelcome, especially if the participants are otherwise representative of the population, clinical settings, and condition of interest [@riley_multivariate_2017]. For this reason, we here discuss how multivariate meta-analysis methods can be used to borrow strength from studies that do not investigate the primary outcome of interest. Briefly, multivariate meta-analysis methods simultaneously summarize the effect size across multiple outcomes whilst accounting for their correlation. For example, six studies in the review of @zhang_prognostic_2015 assessed the hazard ratio of HER2 for progression-free survival, four of which also assessed overall survival. Hence, by conducting a multivariate meta-analysis we can borrow strength from two additional studies when estimating the hazard ratio for overall survival. The hazard ratios for progression free survival are depicted below:
-
-
-|Study          |   Country   | Disease |   N| Hazard Ratio (95% CI) |
-|:--------------|:-----------:|:-------:|---:|:---------------------:|
-|Togami, 2012   |    Japan    |  UPSC   |  71|   3.43 (1.5; 7.23)    |
-|Mori, 2010     |    Japan    |   EEC   |  63|   3.8 (1.29; 12.35)   |
-|Jongen, 2009   | Netherlands |   EEC   | 315|  1.75 (0.23; 13.08)   |
-|Coronado, 2001 |    Spain    |   EC    | 114|   2.69 (1.35; 5.37)   |
-|Backe, 1997    |   Germany   |   EC    | 222|   1.36 (0.65; 2.85)   |
-|Vos, 2011      |   England   |   EC    | 156|   1.41 (0.51; 3.88)   |
-
-We first conduct a univariate meta-analysis of the six studies investigating progression-free survival:
-
-
-```r
-dat_pfs <- Zhang %>% filter(outcome == "PFS")
-resPFS <- rma(yi = logHR, 
-              sei = se.logHR, 
-              method = "REML", 
-              slab = Study, 
-              data = dat_pfs)
-```
-
-![](/private/var/folders/k_/zk47l71x1z7dz5knt5bzg3c00000gn/T/Rtmp6sGVII/preview-13f596bdd6d2d.dir/ma-pf_files/figure-html/unnamed-chunk-24-1.png)<!-- -->
-
-Results indicate that the hormone receptor HER2 also has prognostic value for progression-free survival. Furthermore, the reported HRs appear to be much more homogeneous across studies, since the between-study standard deviation is 0.17 for progression-free survival whereas it was  0.30 for overall survival. Note that the univariate meta-analysis for progression-free survival is based on merely 6 studies, and that the univariate meta-analysis for overall survival was based on 14 studies. We can now employ multivariate meta-analysis to borrow information from the 4 studies that report prognostic effects for both endpoints. This, in turn, allows all studies to contribute on the summary effect for HER2 in both outcomes.
-
-We first need to define the within-study covariance matrix of the estimated log hazard ratios for progression-free survival and overall survival. We here assume that estimates for the hazard ratio are independent within studies and construct a block diagonal matrix that only considers the error variance of each estimate:
-
-
-```r
-V <- diag(Zhang$se.logHR**2)
-```
-
-A multivariate random-effects model can now be used to simultaneously meta-analyze the hazard ratios for overall and progression-free survival:
-
-
-```r
-res.MV <- rma.mv(yi = logHR, 
-                 V = V, 
-                 mods = ~ outcome - 1, 
-                 random = ~ outcome | Study, 
-                 struct = "UN", 
-                 data = Zhang, 
-                 method = "REML")
-res.MV
-```
-
-```
-## 
-## Multivariate Meta-Analysis Model (k = 20; method: REML)
-## 
-## Variance Components:
-## 
-## outer factor: Study   (nlvls = 16)
-## inner factor: outcome (nlvls = 2)
-## 
-##             estim    sqrt  k.lvl  fixed  level 
-## tau^2.1    0.0865  0.2942     14     no     OS 
-## tau^2.2    0.0770  0.2775      6     no    PFS 
-## 
-##      rho.OS  rho.PFS    OS  PFS 
-## OS        1              -    4 
-## PFS  1.0000        1    no    - 
-## 
-## Test for Residual Heterogeneity:
-## QE(df = 18) = 33.7664, p-val = 0.0135
-## 
-## Test of Moderators (coefficients 1:2):
-## QM(df = 2) = 35.6315, p-val < .0001
-## 
-## Model Results:
-## 
-##             estimate      se    zval    pval   ci.lb   ci.ub      
-## outcomeOS     0.6704  0.1318  5.0868  <.0001  0.4121  0.9287  *** 
-## outcomePFS    0.8734  0.2151  4.0606  <.0001  0.4518  1.2950  *** 
-## 
-## ---
-## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-```
-
-The summary estimate of the log hazard ratio for overall survival is 0.670 (multivariate meta-analysis) versus 0.667 (univariate meta-analysis) with an SE of 0.132 and, respectively, 0.135. Hence, we gained some precision by including evidence from 2 additional studies that only evaluated progression-free survival.
-
-
-|Outcome                   |Model                      | $\mu$ | SE($\mu$) | $\tau$ |HR   |95% CI     |
-|:-------------------------|:--------------------------|:-----:|:---------:|:------:|:----|:----------|
-|Overall Survival          |Univariate meta-analysis   | 0.667 |   0.135   | 0.297  |1.95 |1.49; 2.54 |
-|Overall Survival          |Multivariate meta-analysis | 0.670 |   0.132   | 0.294  |1.95 |1.51; 2.53 |
-|Progression-free Survival |Univariate meta-analysis   | 0.815 |   0.201   | 0.168  |2.26 |1.52; 3.35 |
-|Progression-free Survival |Multivariate meta-analysis | 0.873 |   0.215   | 0.277  |2.40 |1.57; 3.65 |
-
-Note that estimation of between-study heterogeneity was difficult for progression-free survival due to the limited number of studies. In particular, we found $\tau^2$= 0.028 with an SE of 0.145. In the multivariate meta-analysis, the estimated between-study variance for PFS was much larger ($\tau^2$=0.077), and based on all 16 rather than merely 6 studies.
-
-In summary, the multivariate meta-analysis approach is often helpful as it reduces the need to exclude relevant studies from a meta-analysis, thereby decreasing the risk of bias (e.g. due to selective outcome reporting) and potentially improving precision. As indicated by @riley_multivariate_2017, a multivariate meta-analysis of multiple outcomes is most beneficial when the outcomes are highly correlated and the percentage of studies with missing outcomes is large.
-
-
-# References
diff --git a/vignettes/ma-pm.Rmd b/vignettes/ma-pm.Rmd
old mode 100644
new mode 100755
index 043a2f7..b58bc58
--- a/vignettes/ma-pm.Rmd
+++ b/vignettes/ma-pm.Rmd
@@ -36,22 +36,20 @@ In this case study, we summarize the results from these 22 studies, as well as t
 We can load the data from all 23 validation studies as follows:
 
 ```{r}
-library(metafor)
 library(metamisc)
 data(EuroSCORE)
 ```
 
 ```{r, echo = F}
-out <- EuroSCORE %>% mutate(mortality = sprintf("%.2f", 100*n.events/n),
-                            euroscore = sprintf("%.2f", 100*Pe)) %>%
-  select(Study, n, mortality, euroscore, c.index)
-kable(out, col.names = c("Study", "Patients (n)", "Mortality (%)", "euroSCORE II (%)", "c-index"),
+out <- EuroSCORE %>% mutate(mortality = sprintf("%.2f", 100*n.events/n)) %>%
+  select(Study, n, mortality, c.index)
+kable(out, col.names = c("Study", "Patients (n)", "Mortality (%)", "c-index"),
       align = "lrcc")
 ```
 
 
 # Meta-analysis of calibration performance
-Calibration refers to a model's accuracy of predicted risk probabilities, and indicates the extent to which expected outcomes (predicted from the model) and observed outcomes agree. Summarizing estimates of calibration performance is challenging because calibration plots are most often not presented, and because studies tend to report different types of summary statistics in calibration. For example, in the case study, calibration was assessed using the Hosmer-Lemeshow test, calibration plots or by comparing the observed mortality to the predicted EuroSCORE II (either overall or for groups of patients). Within each validation study, we can compare the total	number	of observed events (O) with the total number of expected (predicted)	events by deriving their ratio O:E. The total O:E ratio provides a rough indication of the overall model calibration (across the entire range of predicted risks). It	describes	whether	more	(O:E	>	1)	or	fewer	(O:E	<	1)	events	occurred		than	expected	 based	on	the	model. Whilst the O:E ratio itself was not explicitly reported in all studies, it can be calculated from other reported information:
+Calibration refers to a model's accuracy of predicted risk probabilities, and indicates the extent to which expected outcomes (predicted from the model) and observed outcomes agree. Summarising estimates of calibration performance is challenging because calibration plots are most often not presented, and because studies tend to report different types of summary statistics in calibration. For example, in the case study, calibration was assessed using the Hosmer-Lemeshow test, calibration plots or by comparing the observed mortality to the predicted EuroSCORE II (either overall or for groups of patients). Within each validation study, we can compare the total	number	of observed events (O) with the total number of expected (predicted)	events by deriving their ratio O:E. The total O:E ratio provides a rough indication of the overall model calibration (across the entire range of predicted risks). It	describes	whether	more	(O:E	>	1)	or	fewer	(O:E	<	1)	events	occurred		than	expected	 based	on	the	model. Whilst the O:E ratio itself was not explicitly reported in all studies, it can be calculated from other reported information:
 
 ```{r}
 EuroSCORE <- EuroSCORE %>% mutate(oe = n.events/e.events)
@@ -63,176 +61,10 @@ The O:E ratio can also be derived from the observed and predicted mortality risk
 EuroSCORE %>% select(Po, Pe) %>% mutate(oe = Po/Pe)
 ```
 
-It is recommended to first transform extracted O:E ratios to	the	log (natural logarithm) scale before applying a meta-analysis [@snell_meta-analysis_2017].
+It is recommended to first transform extracted O:E ratios to	the	log (natural logarithm) scale before applying a meta-analysis.
 
 ```{r}
-EuroSCORE <- EuroSCORE %>% mutate(logOE = log(oe))
+EuroSCORE <- EuroSCORE %>% mutate(logoe = log(oe))
 ```
 
-Meta-analysis requires a standard error for each extracted estimate of model performance. If standard errors cannot be retrieved from original articles, they should be calculated using reported information. For example, the standard error of the log O:E ratio, which is approximately given as 
-
-$$\sqrt{(1/O) - (1/N)} \approx \sqrt{1/O}$$
-
-We can implement this as follows:
-
-```{r}
-EuroSCORE <- EuroSCORE %>% mutate(se.logOE = sqrt((1/n.events - 1/n)))
-```
-
-We can now visualise the calibration performance of EuroSCORE II across the included studies by generating a forest plot:
-
-```{r, echo = FALSE}
-metafor::forest(x = EuroSCORE$logOE, sei = EuroSCORE$se.logOE, slab = EuroSCORE$Study, 
-                transf = exp, xlab = "O:E ratio", refline = 1, cex = 0.8)
-```
-
-For each validation study, the O:E ratio is provided with its 95% confidence interval. The size of the boxes indicates the	sample size of the corresponding validation study. In this forest plot, O:E	ratios appear to vary	substantially	across the included validation	studies. These results suggest that the calibration of EuroSCORE II is prone to substantial between-study heterogeneity.
-
-In **metamisc**, we can combine the aforementioned steps as follows:
-
-```{r}
-oe.ad <- oecalc(N = n, O = n.events, E = e.events, slab = Study, 
-                data = EuroSCORE)
-plot(oe.ad, refline = 1, sort = "no")
-```
-
-Note that if we want to derive the log O:E ratio instead, we should use:
-
-```{r}
-logoe.ad <- oecalc(N = n, O = n.events, E = e.events, 
-                   slab = Study, g = "log(OE)", data = EuroSCORE)
-```
-
-
-## Fixed effect meta-analysis
-We can now summarize the results from the included validation studies. A simple approach is to calculate a weighted average, where the estimate of each study is simply weighted by its precision. This approach is also known as fixed effect meta-analysis, where the weighted average $\mu$ is given by:
-
-$$\mu = \dfrac{ \sum_{i=1}^{K}\left( \hat \theta_i / \left(\mathrm{SE}(\hat \theta_i)\right)^2 \right)}{ \sum_{i=1}^{K}\left( 1 /  \left(\mathrm{SE}(\hat \theta_i)\right)^2 \right) }\\$$
-In this expression, $\hat \theta_i$ represent the study specific estimates for the log O:E ratio, and $K$ represents the total number of included studies. Aforementioned estimate for $\mu$ corresponds to the maximum likelihood of a fixed effect meta-analysis that assumes normality of the log O:E ratio within studies:
-
-$$\hat \theta_i \sim \mathcal{N}\left(\mu, \left(\mathrm{SE}(\hat \theta_i)\right)^2\right)$$
-We can directly calculate the pooled log O:E ratio as follows:
-
-```{r}
-mu <- sum(EuroSCORE$logOE/EuroSCORE$se.logOE**2)/sum(1/EuroSCORE$se.logOE**2)
-```
-
-Since the meta-analysis is performed using log-transformed estimates, the summary estimate for the total O:E ratio is given as $\exp(\hat \mu)$:
-
-```{r}
-exp(mu)
-```
-
-The	fixed	effect	meta-analysis	of the total O:E ratio can also be performed using **metafor**:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-fit <- rma(yi = logOE, sei = se.logOE, data = EuroSCORE, method = "FE")
-exp(fit$beta)
-```
-
-Alternatively, we can directly implement all of the aforementioned data preparation and meta-analysis steps using the `valmeta()` command. Hereto, we will use information on the total number of observed and expected events from each validation study, as well as the total sample size. Note that unless specified otherwise, `valmeta()` will apply the log transformation to summarize estimates of the total O:E ratio, and back-transform the results to the original scale.
-
-```{r,message=F,warning=F,echo=T,eval=T}
-valmeta(measure = "OE", O = n.events, E = e.events,  N = n, method = "FE", 
-        data = EuroSCORE)
-```
-
-The pooled O:E ratio is `r sprintf("%.2f", exp(mu))`, which implies that, on average, EuroSCORE II tends to over-estimate the risk of peri-operative mortality.
-
-The previous meta-analysis models derived a summary estimate for the total O:E ratio from information on the total number of observed events, the total number of expected events, and the total sample size. In practice, however, the total sample size may not always be reported, and in such situations approximations can be used for estimating the standard error of the total O:E ratio. To illustrate this, we can re-run our fixed effect meta-analysis by omitting the sample size from the `valmeta()` command:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-valmeta(measure = "OE", O = n.events, E = e.events, method = "FE", 
-        data = EuroSCORE)
-```
-
-Results are nearly identical to the analyses where we utilized information on the sample size of the validation studies.
-
-
-## Random effects meta-analysis
-The discrimination and calibration of a prediction model are highly likely to vary between validation studies due to differences between the studied populations [@riley_external_2016]. A major reason is different case mix variation, which generally occurs when studies differ in the distribution of predictor values, other relevant participant or setting characteristics (such as treatment received), and the outcome prevalence (diagnosis) or incidence (prognosis). Case mix variation across different settings or populations can lead to genuine differences in the performance of a prediction model, even when the true (underlying) predictor effects are consistent (that is, when the effect of a particular predictor on outcome risk is the same regardless of the study population). For this reason, it is often more appropriate to adopt random effects meta-analysis models when summarizing estimates of prediction model performance. Briefly, this approach considers two sources of variability in study results:
-
-* The estimated effect $\hat \theta_i$ for any study (i) may differ from that study's true effect ($\theta_i$) due to estimation error, $\mathrm{SE}(\hat \theta_i)$. 
-* The true effect ($\theta_i$) for each study differs from $\mu$ because of between-study variance ($\tau^2$). 
-
-The weighted average $\mu$ is then given as:
-
-$$\mu = \dfrac{ \sum_{i=1}^{K}\left( \hat \theta_i / \left(\tau^2 + \left(\mathrm{SE}(\hat \theta_i)\right)^2\right) \right)}{ \sum_{i=1}^{K}\left( 1 / \left(\tau^2 + \left(\mathrm{SE}(\hat \theta_i)\right)^2\right) \right) }$$
-
-The heterogeneity parameter $\tau^2$ is often estimated separately and then inserted as known value in the equations above to obtain an estimate for $\mu$. It is, however, often more reliable to simultaneously estimate $\mu$ and $\tau$ and directly account for their respective estimation error. The random effects model can then more generally be described as follows, and requires iterative estimation procedures.
-
-$$
-\begin{aligned} \hat \theta_i &\sim \mathcal{N}\left(\theta_i, \left(\mathrm{SE}(\hat \theta_i)\right)^2\right)  \\ \theta_i  &\sim \mathcal{N}\left(\mu, \tau^2\right) \end{aligned}
-$$
-
-As indicated, the random effects meta-analysis model assumes normality of the performance statistic (log O:E ratio), both at the within-study and between-study levels [@snell_meta-analysis_2017]. Within each study, the estimated performance statistic is assumed to be normally distributed around some *true* performance for that study ($\theta_i$) with *known* standard deviation $\mathrm{SE}(\hat \theta_i)$. Between studies, the *true* performance statistic from each study is also assumed to be drawn from a normal distribution with mean performance $\mu$ and between-study variance $\tau^2$.
-
-The random effects model can be implemented as follows. In line with previous recommendations [@debray_guide_2017], we will adopt restricted maximum likelihood estimation and use the method by @knapp_improved_2003 for calculating 95\% confidence intervals.
-
-```{r}
-fit.REML <- valmeta(measure = "OE", O = n.events, E = e.events,  N = n, 
-                    method = "REML", slab = Study, data = EuroSCORE)
-fit.REML
-```
-
-More detailed information on the meta-analysis model (summarizing estimates of the log O:E ratio) can be explored as follows:
-
-```{r} 
-fit.REML$fit
-```
-
-Results from the random effects meta-analysis suggest that, on average, EuroSCORE II tends to slightly underestimate the risk of mortality. There was a substantial amount of between-study heterogeneity, I2 = `r sprintf("%.0f%%",fit.REML$fit$I2)` ($\hat \tau$ = `r sprintf("%.2f",sqrt(fit.REML$fit$tau2))`). To facilitate the interpretation of the summary estimate, it is often helpful to calculate an (approximate) 95\% prediction interval (PI) [@riley_interpretation_2011]. This interval provides a range for the predicted model performance in a new validation of the model. A 95\% PI for the summary estimate in a new setting is approximately given as:
-
-$$\hat \mu \pm t_{K-2} \,\sqrt{\hat \tau^2 + \left(\widehat{\mathrm{SE}}(\hat \mu)\right)^2}$$
-
-where the Student-$t$ (rather than the Normal) distribution is used to help account for the uncertainty of $\hat \tau$. We can extract the (approximate) prediction interval for the total O:E ratio as follows:
-
-```{r}
-c(fit.REML$pi.lb, fit.REML$pi.ub)
-```
-
-This wide prediction interval contains values well above and below the value of 1, indicating that EuroSCORE II yields predicted probabilities that are systematically too low in some populations (O:E >> 1), but also yields predicted probabilities that are systematically too high in other populations (O:E << 1). The wide prediction interval illustrates the weakness of focussing solely on average performance, as calibration is good on average but is poor in some populations. This issue is also visible in the forest plot:
-
-```{r}
-plot(fit.REML)
-```
-
-An alternative approach to assess the influence of between-study heterogeneity is to calculate the probability of *good* performance. We can, for instance, calculate the	probability	that the	total O:E	ratio	of	the	EuroSCORE II model	in	a	new	study	will	be	between	 0.8	and	1.2.	
-
-One approach to estimate this probability is by means of simulation. In particular, we can use the prediction interval to generate new validation study results:
-
-```{r}
-dfr <- fit.REML$numstudies - 2 #number of included studies minus 2
-tau2 <- as.numeric(fit.REML$fit$tau2)
-sigma2 <- as.numeric(vcov(fit.REML$fit))
-
-Nsim <- 1000000  # Simulate 100000 new validation studies
-OEsim <- exp(mu + rt(Nsim, df = dfr)*sqrt(tau2 + sigma2))
-sum(OEsim > 0.8 & OEsim < 1.2)/Nsim
-```
-
-Hence, the empirical probability that the	 *true* total O:E	ratio	of	EuroSCORE II in	a	new	study	will	be	between	 0.8	and	1.2 is `r sprintf("%.0f%%", (sum(OEsim > 0.8 & OEsim < 1.2)/Nsim)*100)`. 
-
-A more formal approach to calculate this probability is given below:
-
-$$
-\begin{aligned} 
-\mathrm{Pr}(0.8 \leq \mathrm{O:E} \leq 1.2) &= \mathrm{Pr}(\log(0.8) \leq \log(\mathrm{O:E}) \leq \log(1.2)) \\
-&= \mathrm{Pr}(\log(0.8) \leq \hat \mu \leq \log(1.2))  \\
-&= \mathrm{Pr}(\hat \mu < \log(1.2)) -  \mathrm{Pr}(\hat \mu < \log(0.8)) \\ 
-&\approx  \mathrm{Pr}(\hat \mu \leq \log(1.2)) -  \mathrm{Pr}(\hat \mu \leq \log(0.8))
-\end{aligned}
-$$
-
-Again, we use the Student-$t$ distribution to calculate $\mathrm{Pr}(\hat \mu \leq \log(1.2))$ and $\mathrm{Pr}(\hat \mu \leq \log(0.8))$:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-prob1 <- pt((log(1.2) - mu)/sqrt(tau2+sigma2), df=dfr)
-prob2 <- pt((log(0.8) - mu)/sqrt(tau2+sigma2), df=dfr)
-prob1 - prob2
-```
-
-Due to the substantial miscalibration across studies, updating EuroSCORE II (in particular its intercept term) may be necessary before implementation.
-
-
 # References
diff --git a/vignettes/mapf.html b/vignettes/mapf.html
old mode 100644
new mode 100755
diff --git a/vignettes/mapf.qmd b/vignettes/mapf.qmd
old mode 100644
new mode 100755
index 229c0e2..ab388c0
--- a/vignettes/mapf.qmd
+++ b/vignettes/mapf.qmd
@@ -1,304 +1,304 @@
----
-title: "Meta-analysis of prognostic factor studies"
-author:
-  - name: Thomas Debray
-    orcid: 0000-0002-1790-2719
-date: last-modified
-date-format: "[Last Updated on] DD MMMM, YYYY"
-params:
-  dir_data: "data"
-  dir_r: "R_TD"
-format:
-  html:
-    toc: true
-    number-sections: true
-    css: styles.css
-bibliography: "https://api.citedrive.com/bib/093a08ac-0337-479e-a147-17929fa7f7b0/references.bib?x=eyJpZCI6ICIwOTNhMDhhYy0wMzM3LTQ3OWUtYTE0Ny0xNzkyOWZhN2Y3YjAiLCAidXNlciI6ICI0MDA5IiwgInNpZ25hdHVyZSI6ICIwOWJiZjg2ZDJiNjk1NmQ1ODVhZTMxZmZiODI0OTgyNWYxYTEzNTQ5MzdiZjBiNzJiZjY3MTlhMTE2NGJhNzQ3In0=/bibliography.bib"
----
-
-```{r}
-#| echo: false
-#| message: false
-#| warning: false
-library(knitr)
-
-# Use GitHub actions to automatically build and publish the site every time you make a change. The easiest way to set this up is to run usethis::use_pkgdown_github_pages().
-
-show.text = TRUE
-```
-
-# Introduction
-
-An important task in medical research is the identification and assessment of prognostic factors. A prognostic factor is any measure that, among people with a given health condition (that is, a startpoint), is associated with a subsequent clinical outcome (an endpoint) [@riley_prognosis_2013]. Commonly investigated prognostic factors include simple measures such as age, sex, and size of tumor, but they can also include more complex factors such as abnormal levels of proteins or catecholamines and unusual genetic mutations [@sauerbrei_evidence-based_2006]. They can be useful as modifiable targets for interventions to improve outcomes, building blocks for prognostic models, or predictors of differential treatment response.
-
-Over the past few decades, numerous prognostic factor studies have been published in the medical literature. For example, @riley_reporting_2003 identified 260 studies reporting associations for 130 different tumour markers for neuroblastoma. More recently, @tzoulaki_assessment_2009 identified 79 studies reporting the added value of 86 different markers when added to the Framingham risk score. Despite this huge research effort, the prognostic value of most traditional factors under discussion is uncertain and the usefulness of many specific markers, prognostic indices, and classification schemes is still unproven [@sauerbrei_evidence-based_2006].
-
-The aim of this introduction is to illustrate how to summarize the results from multiple prognostic factor studies and how to investigate sources of between-study heterogeneity. 
-
-In this practical we will make use of the R packages `metamisc` and `metafor`. The [https://cran.r-project.org/package=metafor](metafor) package provides a comprehensive collection of functions for conducting meta-analyses in R. The [https://cran.r-project.org/package=metamisc](metamisc) package provides additional functions to facilitate meta-analysis of prognosis studies. We can load these packages as follows:
-
-```{r}
-#| message: false
-#| warning: false
-library(metafor)
-library(metamisc)
-```
-
-# Meta-analysis of prognostic effect estimates
-
-## Background information
-
-Endometrial cancer (EC) is the fourth most common malignancy in women and the most common gynecologic cancer. Overall, the 5-year survival rates for EC are approximately 78--90% for stage I, 74% for stage II, 36--57% for stage III, and 20% for stage IV. Such poor outcomes raise an urgent requirement that more accurate prognosis and predictive markers should be applied for EC to guide the therapy and monitor the disease progress for individual patients.
-
-Several biological molecules have been proposed as prognostic biomarkers in EC. Among them, hormone receptors such as estrogen receptors (ER), progesterone receptors (PR), and human epidermal growth factor receptor 2 (HER2) are attractive because of their physiological functions. Recently, Zhang \emph{et al.} conducted a systematic review to evaluate the overall risk of these hormone receptors for endometrial cancer survival [@zhang_prognostic_2015]. This review included 16 studies recruiting 1764 patients for HER2. We can load the data from all 16 studies in R as follows:
-
-```{r}
-data(Zhang)
-```
-
-This creates an object `Zhang` that contains the summary data from 14 studies reporting on overall survival (OS) and from 6 studies reporting on progression-free survival (PFS). As a result, a total of 20 estimates are available for the hazard ratio of HER2:
-
-```{r}
-Zhang
-```
-
-For each study, estimates of effect were retrieved as follows. The simplest method consisted in the direct collection of HR, odds ratio or risk ratio, and their 95% CI from the original article, with an HR of less than 1 being associated with a better outcome. If not available, the total numbers of observed deaths/cancer recurrences and the numbers of patients in each group were extracted to calculate HR. When data were only available as Kaplan-Meier curves, data were extracted from the graphical survival plots, and estimation of the HR was then performed using the described method.
-
-A total of 14 studies assessed the relation between HER2 and overall survival. The corresponding hazard ratios (HR) are given below:
-
-```{r kableOS}
-Zhang
-```
- EEC endometrioid endometrial cancer, UPSC uterine papillary serous carcinoma
-
-The hormone receptor HER2 has prognostic value for survival but is prone to substantial between-study heterogeneity. Hazard ratios appear much larger for studies conducted in the USA
-
-The heterogeneity of the population was probably due to the difference in the baseline characteristics of patients (age, tumor stage, race, methodology for assessing HRs expression, or country), the cutoff value of markers, the undergoing treatment, the duration of follow-up, and others. Also, HRs are unadjusted, and therefore highly prone to heterogeneity in patient spectrum.
-
-
-Note that the total number of studies is indeed 16:
-
-```{r}
-length(unique(Zhang$Study))
-```
-
-## Exploratory analyses
-
-It is often helpful to display the effect sizes of all studies in a forest plot. The forest plot for overall survival can then be generated as follows. Briefly, we here provide information on the estimated hazard ratios (via the argument `theta`), as well as the bounds of their 95% confidence interval (via `theta.ci.lb` and `theta.ci.ub`). Further, we only include the 14 estimates for overall survival.
-
-```{r,message=F,warning=F,echo=T,eval=F}
-ds <- subset(Zhang, outcome=="OS")
-with(ds, forest(theta = HR, theta.ci.lb = HR.025, theta.ci.ub = HR.975, 
-                theta.slab = Study, xlab = "Hazard ratio of HER2 versus OS", refline = 1))
-```
-
-We can also generate the same plot using `metafor`:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-metafor::forest(x = Zhang$HR, ci.lb = Zhang$HR.025, ci.ub = Zhang$HR.975, slab = 
-                  Zhang$Study, subset = (Zhang$outcome=="OS"),  xlab = 
-                  "Hazard ratio of HER2 versus OS", refline = 1)
-```
-
-It is easier to assess the summary effect and the presence of statistical heterogeneity.
-
-## Data preparation
-
-To facilitate any further analysis, information on the standard error of the different study effect sizes is needed. Estimates for the standard error can be obtained from the reported 95% confidence intervals [@altman_how_2011]. However, rather than estimating the standard error of the HR, we will calculate the log HR and estimate its corresponding standard error. The main reason is that the HR is a ratio measure, for which the (within-study) sampling variation can be approximated with a Normal distribution if the log scale is used.
-
-The standard error (SE) of the log hazard ratio is simply given as:
-
-$\mathrm{SE}=(\log(u)-\log(l))/(2*1.96)$
-
-where the upper and lower limits of the 95% CI are $u$ and $l$ respectively. In `R`, we have:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-Zhang$logHR <- log(Zhang$HR)
-Zhang$se.logHR <- (log(Zhang$HR.975)-log(Zhang$HR.025))/(2*qnorm((1-0.05/2)))
-head(Zhang)
-```
-
-## Assessment of publication bias
-
-The presence of small-study effects is a common threat to systematic reviews and meta-analyses. Small-study effects is a generic term for the phenomenon that sometimes smaller studies show different, often stronger, effects than the large ones [@debray_detecting_2018]. One possible reason is publication bias. Previously, @altman_systematic_2001 argued that it is probable that studies showing a strong (often statistically significant) prognostic ability are more likely to be published. For this reason, it is important to evaluate the potential presence of small-study effects, which can be verified by visual inspection of the funnel plot. In this plot, the estimate of the reported effect size is plotted against a measure of precision or sample size for each included study of the meta-analysis. The premise is that the scatter of plots should reflect a funnel shape, if small-study effects do not exist (provided that effect sizes are not substantially affected by the presence of between-study heterogeneity). However, when small studies are predominately in one direction (usually the direction of larger effect sizes), asymmetry will ensue.
-
-A common approach to construct the funnel plot is to display the individual observed effect sizes on the x-axis against the corresponding standard errors on the y-axis, and to use the fixed effect summary estimate as reference value. In the absence of publication bias and heterogeneity, one would then expect to see the points forming a funnel shape, with the majority of the points falling inside of the pseudo-confidence region of the summary estimate.
-
-```{r,message=F,warning=F,echo=T,eval=T}
-res <- rma(yi = logHR, sei = se.logHR, subset = (outcome=="OS"), method = "FE", data = Zhang)
-funnel(res, xlab="Log Hazard Ratio")
-```
-
-Most study estimates fall within the pseudo-confidence region, hence there appears limited evidence for publication bias.
-
-We can formally test the presence of asymmetry in the funnel plot by evaluating whether there is an association between the estimated standard error and the estimated effect size.
-
-```{r,message=F,warning=F,echo=T,eval=T}
-regtest(x = Zhang$logHR, sei = Zhang$se.logHR, subset = (Zhang$outcome=="OS"), 
-        model = "lm", predictor = "sei")
-```
-
-```{r,message=F,warning=F,echo=F,eval=T}
-regfit = regtest(x=Zhang$logHR, sei=Zhang$se.logHR, subset=(Zhang$outcome=="OS"), model="lm", predictor="sei")
-```
-
-The P-value for funnel plot asymmetry is given as `r sprintf("%.3f", regfit$pval)`. Note that it is common to use a 10% level of significance because the number of studies in a meta-analysis is usually low and also there is evidence for funnel plot asymmetry, as the P-value is below 0.10.
-
-Funnel plot asymmetry tests can also be performed using [metamisc](https://cran.r-project.org/package=metamisc)
-
-```{r}
-ds <- subset(Zhang, outcome=="OS")
-regfit <- with(ds, fat(b=logHR, b.se=se.logHR, method="E-FIV"))
-regfit
-```
-
-Again, we can construct a funnel plot:
-
-```{r}
-plot(regfit)
-```
-
-Some caution is warranted when interpreting the results for funnel plot asymmetry tests [@debray_detecting_2018]. First, the power to detect asymmetry is often limited because meta-analyses usually do not include many studies. Second, many tests are known to yield inadequate type-I error rates or to suffer from low power. For instance, it has been demonstrated that aforementioned test to evaluate the association between the estimated standard error and effect size tends to yield type-I error rates that are too high. Finally, funnel plot asymmetry may rather be caused by heterogeneity than by publication bias.
-
-Adjust aformentioned regression test to use inverse of the total sample size (rather than the standard error) as predictor.
-
-```{r,message=F,warning=F}
-regtest(x = Zhang$logHR, sei = Zhang$se.logHR, ni = Zhang$N, 
-        subset = (Zhang$outcome=="OS"), model = "lm", 
-        predictor = "ninv")
-```
-
-From here onwards, we will assume that the potential for publication bias is neglegible, and proceed with standard meta-analysis methods.
-
-## Meta-analysis of the prognostic value of HER2
-
-Meta-analysis is an option when the identified studies are considered sufficiently robust and comparable, and requires there are at least two estimates of the same statistic across studies. A random effects approach is often essential to allow for unexplained heterogeneity across studies due to differences in their methods, time-scale, populations, cut-points, adjustment factors, and treatments.
-
-A standard random effects meta-analysis combines the study estimates of the statistic of interest (here given as the log HR of HER2) in order to estimate the average effect (denoted by $\mu$) and its standard deviation (denoted by $\tau$) across studies. If $\hat b_i$ and $\mathrm{var}(\hat b_i)$ denote the estimate and its variance in study $i$, then a general random effects meta-analysis model can be specified as:
-
-$\hat b_i \sim N(\mu, \mathrm{var}(\hat b_i) + \tau^2)$
-
-It is common to first estimate the heterogeneity parameter $\tau$ and to use the resulting value to estimate $\mu$. However, such approach does not adequately reflect the error associated with parameter estimation, especially when the number of studies is small. For this reason, alternative estimators have been proposed that simultaneously estimate $\mu$ and $\tau$. Here, we will focus on Restricted Maximum Likelihood Estimation (REML), which is implemented as default in `metafor`.
-
-```{r,message=F,warning=F,echo=T,eval=T}
-resREML <- rma(yi = logHR, sei = se.logHR, subset = (outcome=="OS"), 
-               method = "REML", slab = Study, data = Zhang)
-resREML
-# The following approach yields the same results
-# rma(yi=logHR, sei=se.logHR, subset=(outcome=="OS"), data=Zhang)
-```
-
-`r if(show.text){paste("The summary effect size is ", sprintf("%.3f", resREML$b), " and the between-study standard deviation is ", sprintf("%.3f", sqrt(resREML$tau2)), "")}`
-
-`r if(show.text){paste("The standard error of the summary effect size is ", sprintf("%.3f", sqrt(vcov(resREML))), " and the standard error of the between-study variance is ", sprintf("%.3f", resREML$se.tau2), "")}`
-
-We can calculate the summary HR and the corresponding 95% CI by back-transforming the estimate for $\mu$ and its confidence bounds:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-exp(resREML$b)
-exp(c(resREML$ci.lb, resREML$ci.ub))
-```
-
-Note that the confidence interval for $\hat \mu$ is usually based on a Student T distribution to account for estimation error in $\hat \tau$.
-
-The summary HR of HER2 is statistically significant, so the hormone receptor appears to be predictive on average. In particular, an increased level of HER2 was associated with poorer survival, on average.
-
-We can also obtain the summary estimate and 95% CI for the HR of HER2 by simply using the `predict` function:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-predict(resREML, transf = exp)
-```
-
-Although the summary result ($\hat \mu$) is usually the main focus of a meta-analysis, this reflects some average across studies and it may be hard to translate to clinical practice when there is large between-study heterogeneity. We can quantify the impact of between-study heterogeneity by constructing a $100(1-\alpha/2)$% prediction interval, which gives the potential true prognostic effect in a new population conditional on the meta-analysis results [@riley_interpretation_2011]. An approximate prediction interval (PI) is given as follows:
-
-$\hat \mu \pm t_{\alpha, N-2} \sqrt{\hat \tau^2 + \hat \sigma^2}$
-
-where $t_{\alpha, N-2}$ is the $100(1-\alpha/2)$% percentile of the t-distribution for $N-2$ degrees of freedom, $N$ is the number of studies, $\hat \sigma$ is the estimated standard error of $\hat \mu$, and $\hat \tau$ is the estimated between-study standard deviation. In `R`, can calculate the approximate 95% PI for $\hat \mu$ as follows:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-level <- 0.05
-crit <- qt(c(level/2, 1-(level/2)), df = (resREML$k-2))
-mu <- resREML$b
-tau2 <- resREML$tau2
-sigma2 <- vcov(resREML)
-mu + crit * c(sqrt(tau2 + sigma2))
-```
-
-`r if(show.text){paste("The boundaries of the approximate 95% PI are given as ", sprintf("%.3f", exp(resREML$b + qt(level/2, df=(resREML$k-2)) * sqrt(tau2 + sigma2))), " to ", sprintf("%.3f", exp(resREML$b + qt(1-level/2, df=(resREML$k-2)) * sqrt(tau2 + sigma2))), ".")}`
-
-There is substantial heterogeneity in the prognostic value of HER 2 and its usefulness may be very limited in certain populations.
-
-A possible approach to enhance the interpretation of meta-analysis results is to calculate the probability that the prognostic effect of HER 2 will be above some useful value (e.g. a HR \> 1.5 for a binary factor, which indicates risk is increased by at least 50%). We can calculate this probability as follows:
-
-$Pr(\mathrm{HR} > 1.5) = Pr(\hat \mu > \log(1.5)) = 1 - Pr(\hat \mu \leq \log(1.5))$
-
-where $Pr(\hat \mu \leq \log(1.5))$ is approximated using a scaled Student-$t$ distribution (similar to the calculation of our prediction interval):
-
-```{r,message=F,warning=F,echo=T,eval=T}
-1-pt((log(1.5) - mu)/sqrt(tau2+sigma2), df=(resREML$k-2))
-```
-
-```{r,message=F,warning=F,echo=T,eval=T}
-probOS <- 1-pt((log(1.5) - mu)/sqrt(tau2+sigma2), df=(resREML$k-2))
-```
-
-We can also estimate this probability by means of simulation:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-# Simulate 100000 new studies
-Nsim <- 1000000 
-HRsim <- exp(mu + rt(Nsim, df=(resREML$k-2))*sqrt(tau2+sigma2))
-sum(HRsim>1.5)/Nsim
-```
-
-`r if(show.text){paste("The probability is ", sprintf("%.0f%%", probOS*100), ", suggesting that using HER2 will often provide adequate discrimination.")}`
-
-## Multivariate meta-analysis
-
-In previous section, we used 14 of the 16 identified studies to evaluate the prognostic effect of HER2 on overall survival. Two studies were excluded from the meta-analysis because they did not provide direct evidence about overall survival. This is unwelcome, especially if the participants are otherwise representative of the population, clinical settings, and condition of interest [@riley_multivariate_2017]. For this reason, we here discuss how multivariate meta-analysis methods can be used to include studies that do not necessarliy provide direct evidence on the outcome of interest. Briefly, multivariate meta-analysis methods simultaneously analyse multiple outcomes, which allows more studies to contribute towards each outcome.
-
-For instance, 6 studies in the review of @zhang_prognostic_2015 assessed the relation between HER2 and PFS. The corresponding hazard ratios are given below:
-
-```{r kablePFS, echo=F}
-#table.zhang.PFS
-```
-
-We can therefore combine this evidence with the HRs on overall survival to obtain more accurate estimates for the prognostic value of the hormone receptor HER2 with respect to PFS *and* OS.
-
-The hormone receptor HER2 also has prognostic value for progression-free survival. Furthermore, the reported HRs appear to be much more homogeneous across studies.
-
-Perform a random-effects meta-analysis and construct a forest plot.
-
-```{r,message=F,warning=F,echo=T,eval=T}
-resPFS <- rma(yi = Zhang$logHR, sei = Zhang$se.logHR, subset = (Zhang$outcome=="PFS"), 
-              method = "REML", slab = Zhang$Study)
-resPFS
-metafor::forest(resPFS, transf = exp, xlab = "Hazard Ratio", refline = 1)
-```
-
-Note that the univariate meta-analysis for PFS is based on merely 6 studies, and that the univariate meta-analysis for OS was based on 14 studies. We can now employ multivariate meta-analysis to ''borrow information'' from the 4 studies that report prognostic effects for both PFS and OS (Togami 2012, Mori 2010, Jongen 2009-2 and Backe 1997). This, in turn, allows all studies to contribute on the summary effect for HER2 in both outcomes.
-
-Before we can proceed with the model fitting, we need to construct the full (block-diagonal) variance-covariance for all studies from the two outcomes. We can do this using the `diag()` function in one line of code:
-
-```{r,message=F,warning=F,echo=T,eval=T}
-V <- diag(Zhang$se.logHR**2)
-```
-
-A multivariate random-effects model can now be used to meta-analyze the two outcomes simultaneously.
-
-```{r,message=F,warning=F,echo=T,eval=T}
-res.MV <- rma.mv(yi = logHR, V, mods = ~ outcome - 1, random = ~ outcome | Study, 
-                 struct = "UN", data = Zhang, method = "REML")
-res.MV
-```
-
-`r if(show.text){paste("The new summary estimate is ", sprintf("%.3f", (res.MV$beta["outcomeOS",])), " (versus ", sprintf("%.3f", resREML$b), ") with an SE of ", sprintf("%.3f", sqrt(vcov(res.MV)["outcomeOS","outcomeOS"])), "(versus ", sprintf("%.3f", sqrt(vcov(resREML))), "). Hence, we have gained some precision. ")}`
-
-`r if(show.text){paste("The new summary estimate is ", sprintf("%.3f", (res.MV$beta["outcomePFS",])), " (versus ", sprintf("%.3f", resPFS$b), ") with an SE of ", sprintf("%.3f", sqrt(vcov(res.MV)["outcomePFS","outcomePFS"])), "(versus ", sprintf("%.3f", sqrt(vcov(resPFS))), "). We have lost precision, possibly because the presence of between-study heterogeneity is now more apparent, thereby complicating estimation of the summary effect.")}`
-
-Note that estimation of between-study heterogeneity was difficult for progression-free survival due to the limited number of studies. In particular, we found $\tau^2$= `r sprintf("%.3f", resPFS$tau2)` with an SE of `r sprintf("%.3f", resPFS$se.tau2)`. In the multivariate meta-analysis, the estimated between-study variance for PFS was much larger ($\tau^2$=`r sprintf("%.3f", res.MV$tau2[2])`), and based on all 16 rather than merely 6 studies.
-
-In summary, the multivariate meta-analysis approach is often helpful as it reduces the need to exclude relevant studies from a meta-analysis, thereby decreasing the risk of bias (e.g. due to selective outcome reporting) and potentially improving precision. As indicated by @riley_multivariate_2017, a multivariate meta-analysis of multiple outcomes is most beneficial when the outcomes are highly correlated and the percentage of studies with missing outcomes is large.
-
-
-# References
+---
+title: "Meta-analysis of prognostic factor studies"
+author:
+  - name: Thomas Debray
+    orcid: 0000-0002-1790-2719
+date: last-modified
+date-format: "[Last Updated on] DD MMMM, YYYY"
+params:
+  dir_data: "data"
+  dir_r: "R_TD"
+format:
+  html:
+    toc: true
+    number-sections: true
+    css: styles.css
+bibliography: "https://api.citedrive.com/bib/093a08ac-0337-479e-a147-17929fa7f7b0/references.bib?x=eyJpZCI6ICIwOTNhMDhhYy0wMzM3LTQ3OWUtYTE0Ny0xNzkyOWZhN2Y3YjAiLCAidXNlciI6ICI0MDA5IiwgInNpZ25hdHVyZSI6ICIwOWJiZjg2ZDJiNjk1NmQ1ODVhZTMxZmZiODI0OTgyNWYxYTEzNTQ5MzdiZjBiNzJiZjY3MTlhMTE2NGJhNzQ3In0=/bibliography.bib"
+---
+
+```{r}
+#| echo: false
+#| message: false
+#| warning: false
+library(knitr)
+
+# Use GitHub actions to automatically build and publish the site every time you make a change. The easiest way to set this up is to run usethis::use_pkgdown_github_pages().
+
+show.text = TRUE
+```
+
+# Introduction
+
+An important task in medical research is the identification and assessment of prognostic factors. A prognostic factor is any measure that, among people with a given health condition (that is, a startpoint), is associated with a subsequent clinical outcome (an endpoint) [@riley_prognosis_2013]. Commonly investigated prognostic factors include simple measures such as age, sex, and size of tumor, but they can also include more complex factors such as abnormal levels of proteins or catecholamines and unusual genetic mutations [@sauerbrei_evidence-based_2006]. They can be useful as modifiable targets for interventions to improve outcomes, building blocks for prognostic models, or predictors of differential treatment response.
+
+Over the past few decades, numerous prognostic factor studies have been published in the medical literature. For example, @riley_reporting_2003 identified 260 studies reporting associations for 130 different tumour markers for neuroblastoma. More recently, @tzoulaki_assessment_2009 identified 79 studies reporting the added value of 86 different markers when added to the Framingham risk score. Despite this huge research effort, the prognostic value of most traditional factors under discussion is uncertain and the usefulness of many specific markers, prognostic indices, and classification schemes is still unproven [@sauerbrei_evidence-based_2006].
+
+The aim of this introduction is to illustrate how to summarize the results from multiple prognostic factor studies and how to investigate sources of between-study heterogeneity. 
+
+In this practical we will make use of the R packages `metamisc` and `metafor`. The [https://cran.r-project.org/package=metafor](metafor) package provides a comprehensive collection of functions for conducting meta-analyses in R. The [https://cran.r-project.org/package=metamisc](metamisc) package provides additional functions to facilitate meta-analysis of prognosis studies. We can load these packages as follows:
+
+```{r}
+#| message: false
+#| warning: false
+library(metafor)
+library(metamisc)
+```
+
+# Meta-analysis of prognostic effect estimates
+
+## Background information
+
+Endometrial cancer (EC) is the fourth most common malignancy in women and the most common gynecologic cancer. Overall, the 5-year survival rates for EC are approximately 78--90% for stage I, 74% for stage II, 36--57% for stage III, and 20% for stage IV. Such poor outcomes raise an urgent requirement that more accurate prognosis and predictive markers should be applied for EC to guide the therapy and monitor the disease progress for individual patients.
+
+Several biological molecules have been proposed as prognostic biomarkers in EC. Among them, hormone receptors such as estrogen receptors (ER), progesterone receptors (PR), and human epidermal growth factor receptor 2 (HER2) are attractive because of their physiological functions. Recently, Zhang \emph{et al.} conducted a systematic review to evaluate the overall risk of these hormone receptors for endometrial cancer survival [@zhang_prognostic_2015]. This review included 16 studies recruiting 1764 patients for HER2. We can load the data from all 16 studies in R as follows:
+
+```{r}
+data(Zhang)
+```
+
+This creates an object `Zhang` that contains the summary data from 14 studies reporting on overall survival (OS) and from 6 studies reporting on progression-free survival (PFS). As a result, a total of 20 estimates are available for the hazard ratio of HER2:
+
+```{r}
+Zhang
+```
+
+For each study, estimates of effect were retrieved as follows. The simplest method consisted in the direct collection of HR, odds ratio or risk ratio, and their 95% CI from the original article, with an HR of less than 1 being associated with a better outcome. If not available, the total numbers of observed deaths/cancer recurrences and the numbers of patients in each group were extracted to calculate HR. When data were only available as Kaplan-Meier curves, data were extracted from the graphical survival plots, and estimation of the HR was then performed using the described method.
+
+A total of 14 studies assessed the relation between HER2 and overall survival. The corresponding hazard ratios (HR) are given below:
+
+```{r kableOS}
+Zhang
+```
+ EEC endometrioid endometrial cancer, UPSC uterine papillary serous carcinoma
+
+The hormone receptor HER2 has prognostic value for survival but is prone to substantial between-study heterogeneity. Hazard ratios appear much larger for studies conducted in the USA
+
+The heterogeneity of the population was probably due to the difference in the baseline characteristics of patients (age, tumor stage, race, methodology for assessing HRs expression, or country), the cutoff value of markers, the undergoing treatment, the duration of follow-up, and others. Also, HRs are unadjusted, and therefore highly prone to heterogeneity in patient spectrum.
+
+
+Note that the total number of studies is indeed 16:
+
+```{r}
+length(unique(Zhang$Study))
+```
+
+## Exploratory analyses
+
+It is often helpful to display the effect sizes of all studies in a forest plot. The forest plot for overall survival can then be generated as follows. Briefly, we here provide information on the estimated hazard ratios (via the argument `theta`), as well as the bounds of their 95% confidence interval (via `theta.ci.lb` and `theta.ci.ub`). Further, we only include the 14 estimates for overall survival.
+
+```{r,message=F,warning=F,echo=T,eval=F}
+ds <- subset(Zhang, outcome=="OS")
+with(ds, forest(theta = HR, theta.ci.lb = HR.025, theta.ci.ub = HR.975, 
+                theta.slab = Study, xlab = "Hazard ratio of HER2 versus OS", refline = 1))
+```
+
+We can also generate the same plot using `metafor`:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+metafor::forest(x = Zhang$HR, ci.lb = Zhang$HR.025, ci.ub = Zhang$HR.975, slab = 
+                  Zhang$Study, subset = (Zhang$outcome=="OS"),  xlab = 
+                  "Hazard ratio of HER2 versus OS", refline = 1)
+```
+
+It is easier to assess the summary effect and the presence of statistical heterogeneity.
+
+## Data preparation
+
+To facilitate any further analysis, information on the standard error of the different study effect sizes is needed. Estimates for the standard error can be obtained from the reported 95% confidence intervals [@altman_how_2011]. However, rather than estimating the standard error of the HR, we will calculate the log HR and estimate its corresponding standard error. The main reason is that the HR is a ratio measure, for which the (within-study) sampling variation can be approximated with a Normal distribution if the log scale is used.
+
+The standard error (SE) of the log hazard ratio is simply given as:
+
+$\mathrm{SE}=(\log(u)-\log(l))/(2*1.96)$
+
+where the upper and lower limits of the 95% CI are $u$ and $l$ respectively. In `R`, we have:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+Zhang$logHR <- log(Zhang$HR)
+Zhang$se.logHR <- (log(Zhang$HR.975)-log(Zhang$HR.025))/(2*qnorm((1-0.05/2)))
+head(Zhang)
+```
+
+## Assessment of publication bias
+
+The presence of small-study effects is a common threat to systematic reviews and meta-analyses. Small-study effects is a generic term for the phenomenon that sometimes smaller studies show different, often stronger, effects than the large ones [@debray_detecting_2018]. One possible reason is publication bias. Previously, @altman_systematic_2001 argued that it is probable that studies showing a strong (often statistically significant) prognostic ability are more likely to be published. For this reason, it is important to evaluate the potential presence of small-study effects, which can be verified by visual inspection of the funnel plot. In this plot, the estimate of the reported effect size is plotted against a measure of precision or sample size for each included study of the meta-analysis. The premise is that the scatter of plots should reflect a funnel shape, if small-study effects do not exist (provided that effect sizes are not substantially affected by the presence of between-study heterogeneity). However, when small studies are predominately in one direction (usually the direction of larger effect sizes), asymmetry will ensue.
+
+A common approach to construct the funnel plot is to display the individual observed effect sizes on the x-axis against the corresponding standard errors on the y-axis, and to use the fixed effect summary estimate as reference value. In the absence of publication bias and heterogeneity, one would then expect to see the points forming a funnel shape, with the majority of the points falling inside of the pseudo-confidence region of the summary estimate.
+
+```{r,message=F,warning=F,echo=T,eval=T}
+res <- rma(yi = logHR, sei = se.logHR, subset = (outcome=="OS"), method = "FE", data = Zhang)
+funnel(res, xlab="Log Hazard Ratio")
+```
+
+Most study estimates fall within the pseudo-confidence region, hence there appears limited evidence for publication bias.
+
+We can formally test the presence of asymmetry in the funnel plot by evaluating whether there is an association between the estimated standard error and the estimated effect size.
+
+```{r,message=F,warning=F,echo=T,eval=T}
+regtest(x = Zhang$logHR, sei = Zhang$se.logHR, subset = (Zhang$outcome=="OS"), 
+        model = "lm", predictor = "sei")
+```
+
+```{r,message=F,warning=F,echo=F,eval=T}
+regfit = regtest(x=Zhang$logHR, sei=Zhang$se.logHR, subset=(Zhang$outcome=="OS"), model="lm", predictor="sei")
+```
+
+The P-value for funnel plot asymmetry is given as `r sprintf("%.3f", regfit$pval)`. Note that it is common to use a 10% level of significance because the number of studies in a meta-analysis is usually low and also there is evidence for funnel plot asymmetry, as the P-value is below 0.10.
+
+Funnel plot asymmetry tests can also be performed using [metamisc](https://cran.r-project.org/package=metamisc)
+
+```{r}
+ds <- subset(Zhang, outcome=="OS")
+regfit <- with(ds, fat(b=logHR, b.se=se.logHR, method="E-FIV"))
+regfit
+```
+
+Again, we can construct a funnel plot:
+
+```{r}
+plot(regfit)
+```
+
+Some caution is warranted when interpreting the results for funnel plot asymmetry tests [@debray_detecting_2018]. First, the power to detect asymmetry is often limited because meta-analyses usually do not include many studies. Second, many tests are known to yield inadequate type-I error rates or to suffer from low power. For instance, it has been demonstrated that aforementioned test to evaluate the association between the estimated standard error and effect size tends to yield type-I error rates that are too high. Finally, funnel plot asymmetry may rather be caused by heterogeneity than by publication bias.
+
+Adjust aformentioned regression test to use inverse of the total sample size (rather than the standard error) as predictor.
+
+```{r,message=F,warning=F}
+regtest(x = Zhang$logHR, sei = Zhang$se.logHR, ni = Zhang$N, 
+        subset = (Zhang$outcome=="OS"), model = "lm", 
+        predictor = "ninv")
+```
+
+From here onwards, we will assume that the potential for publication bias is neglegible, and proceed with standard meta-analysis methods.
+
+## Meta-analysis of the prognostic value of HER2
+
+Meta-analysis is an option when the identified studies are considered sufficiently robust and comparable, and requires there are at least two estimates of the same statistic across studies. A random effects approach is often essential to allow for unexplained heterogeneity across studies due to differences in their methods, time-scale, populations, cut-points, adjustment factors, and treatments.
+
+A standard random effects meta-analysis combines the study estimates of the statistic of interest (here given as the log HR of HER2) in order to estimate the average effect (denoted by $\mu$) and its standard deviation (denoted by $\tau$) across studies. If $\hat b_i$ and $\mathrm{var}(\hat b_i)$ denote the estimate and its variance in study $i$, then a general random effects meta-analysis model can be specified as:
+
+$\hat b_i \sim N(\mu, \mathrm{var}(\hat b_i) + \tau^2)$
+
+It is common to first estimate the heterogeneity parameter $\tau$ and to use the resulting value to estimate $\mu$. However, such approach does not adequately reflect the error associated with parameter estimation, especially when the number of studies is small. For this reason, alternative estimators have been proposed that simultaneously estimate $\mu$ and $\tau$. Here, we will focus on Restricted Maximum Likelihood Estimation (REML), which is implemented as default in `metafor`.
+
+```{r,message=F,warning=F,echo=T,eval=T}
+resREML <- rma(yi = logHR, sei = se.logHR, subset = (outcome=="OS"), 
+               method = "REML", slab = Study, data = Zhang)
+resREML
+# The following approach yields the same results
+# rma(yi=logHR, sei=se.logHR, subset=(outcome=="OS"), data=Zhang)
+```
+
+`r if(show.text){paste("The summary effect size is ", sprintf("%.3f", resREML$b), " and the between-study standard deviation is ", sprintf("%.3f", sqrt(resREML$tau2)), "")}`
+
+`r if(show.text){paste("The standard error of the summary effect size is ", sprintf("%.3f", sqrt(vcov(resREML))), " and the standard error of the between-study variance is ", sprintf("%.3f", resREML$se.tau2), "")}`
+
+We can calculate the summary HR and the corresponding 95% CI by back-transforming the estimate for $\mu$ and its confidence bounds:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+exp(resREML$b)
+exp(c(resREML$ci.lb, resREML$ci.ub))
+```
+
+Note that the confidence interval for $\hat \mu$ is usually based on a Student T distribution to account for estimation error in $\hat \tau$.
+
+The summary HR of HER2 is statistically significant, so the hormone receptor appears to be predictive on average. In particular, an increased level of HER2 was associated with poorer survival, on average.
+
+We can also obtain the summary estimate and 95% CI for the HR of HER2 by simply using the `predict` function:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+predict(resREML, transf = exp)
+```
+
+Although the summary result ($\hat \mu$) is usually the main focus of a meta-analysis, this reflects some average across studies and it may be hard to translate to clinical practice when there is large between-study heterogeneity. We can quantify the impact of between-study heterogeneity by constructing a $100(1-\alpha/2)$% prediction interval, which gives the potential true prognostic effect in a new population conditional on the meta-analysis results [@riley_interpretation_2011]. An approximate prediction interval (PI) is given as follows:
+
+$\hat \mu \pm t_{\alpha, N-2} \sqrt{\hat \tau^2 + \hat \sigma^2}$
+
+where $t_{\alpha, N-2}$ is the $100(1-\alpha/2)$% percentile of the t-distribution for $N-2$ degrees of freedom, $N$ is the number of studies, $\hat \sigma$ is the estimated standard error of $\hat \mu$, and $\hat \tau$ is the estimated between-study standard deviation. In `R`, can calculate the approximate 95% PI for $\hat \mu$ as follows:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+level <- 0.05
+crit <- qt(c(level/2, 1-(level/2)), df = (resREML$k-2))
+mu <- resREML$b
+tau2 <- resREML$tau2
+sigma2 <- vcov(resREML)
+mu + crit * c(sqrt(tau2 + sigma2))
+```
+
+`r if(show.text){paste("The boundaries of the approximate 95% PI are given as ", sprintf("%.3f", exp(resREML$b + qt(level/2, df=(resREML$k-2)) * sqrt(tau2 + sigma2))), " to ", sprintf("%.3f", exp(resREML$b + qt(1-level/2, df=(resREML$k-2)) * sqrt(tau2 + sigma2))), ".")}`
+
+There is substantial heterogeneity in the prognostic value of HER 2 and its usefulness may be very limited in certain populations.
+
+A possible approach to enhance the interpretation of meta-analysis results is to calculate the probability that the prognostic effect of HER 2 will be above some useful value (e.g. a HR \> 1.5 for a binary factor, which indicates risk is increased by at least 50%). We can calculate this probability as follows:
+
+$Pr(\mathrm{HR} > 1.5) = Pr(\hat \mu > \log(1.5)) = 1 - Pr(\hat \mu \leq \log(1.5))$
+
+where $Pr(\hat \mu \leq \log(1.5))$ is approximated using a scaled Student-$t$ distribution (similar to the calculation of our prediction interval):
+
+```{r,message=F,warning=F,echo=T,eval=T}
+1-pt((log(1.5) - mu)/sqrt(tau2+sigma2), df=(resREML$k-2))
+```
+
+```{r,message=F,warning=F,echo=T,eval=T}
+probOS <- 1-pt((log(1.5) - mu)/sqrt(tau2+sigma2), df=(resREML$k-2))
+```
+
+We can also estimate this probability by means of simulation:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+# Simulate 100000 new studies
+Nsim <- 1000000 
+HRsim <- exp(mu + rt(Nsim, df=(resREML$k-2))*sqrt(tau2+sigma2))
+sum(HRsim>1.5)/Nsim
+```
+
+`r if(show.text){paste("The probability is ", sprintf("%.0f%%", probOS*100), ", suggesting that using HER2 will often provide adequate discrimination.")}`
+
+## Multivariate meta-analysis
+
+In previous section, we used 14 of the 16 identified studies to evaluate the prognostic effect of HER2 on overall survival. Two studies were excluded from the meta-analysis because they did not provide direct evidence about overall survival. This is unwelcome, especially if the participants are otherwise representative of the population, clinical settings, and condition of interest [@riley_multivariate_2017]. For this reason, we here discuss how multivariate meta-analysis methods can be used to include studies that do not necessarliy provide direct evidence on the outcome of interest. Briefly, multivariate meta-analysis methods simultaneously analyse multiple outcomes, which allows more studies to contribute towards each outcome.
+
+For instance, 6 studies in the review of @zhang_prognostic_2015 assessed the relation between HER2 and PFS. The corresponding hazard ratios are given below:
+
+```{r kablePFS, echo=F}
+#table.zhang.PFS
+```
+
+We can therefore combine this evidence with the HRs on overall survival to obtain more accurate estimates for the prognostic value of the hormone receptor HER2 with respect to PFS *and* OS.
+
+The hormone receptor HER2 also has prognostic value for progression-free survival. Furthermore, the reported HRs appear to be much more homogeneous across studies.
+
+Perform a random-effects meta-analysis and construct a forest plot.
+
+```{r,message=F,warning=F,echo=T,eval=T}
+resPFS <- rma(yi = Zhang$logHR, sei = Zhang$se.logHR, subset = (Zhang$outcome=="PFS"), 
+              method = "REML", slab = Zhang$Study)
+resPFS
+metafor::forest(resPFS, transf = exp, xlab = "Hazard Ratio", refline = 1)
+```
+
+Note that the univariate meta-analysis for PFS is based on merely 6 studies, and that the univariate meta-analysis for OS was based on 14 studies. We can now employ multivariate meta-analysis to ''borrow information'' from the 4 studies that report prognostic effects for both PFS and OS (Togami 2012, Mori 2010, Jongen 2009-2 and Backe 1997). This, in turn, allows all studies to contribute on the summary effect for HER2 in both outcomes.
+
+Before we can proceed with the model fitting, we need to construct the full (block-diagonal) variance-covariance for all studies from the two outcomes. We can do this using the `diag()` function in one line of code:
+
+```{r,message=F,warning=F,echo=T,eval=T}
+V <- diag(Zhang$se.logHR**2)
+```
+
+A multivariate random-effects model can now be used to meta-analyze the two outcomes simultaneously.
+
+```{r,message=F,warning=F,echo=T,eval=T}
+res.MV <- rma.mv(yi = logHR, V, mods = ~ outcome - 1, random = ~ outcome | Study, 
+                 struct = "UN", data = Zhang, method = "REML")
+res.MV
+```
+
+`r if(show.text){paste("The new summary estimate is ", sprintf("%.3f", (res.MV$beta["outcomeOS",])), " (versus ", sprintf("%.3f", resREML$b), ") with an SE of ", sprintf("%.3f", sqrt(vcov(res.MV)["outcomeOS","outcomeOS"])), "(versus ", sprintf("%.3f", sqrt(vcov(resREML))), "). Hence, we have gained some precision. ")}`
+
+`r if(show.text){paste("The new summary estimate is ", sprintf("%.3f", (res.MV$beta["outcomePFS",])), " (versus ", sprintf("%.3f", resPFS$b), ") with an SE of ", sprintf("%.3f", sqrt(vcov(res.MV)["outcomePFS","outcomePFS"])), "(versus ", sprintf("%.3f", sqrt(vcov(resPFS))), "). We have lost precision, possibly because the presence of between-study heterogeneity is now more apparent, thereby complicating estimation of the summary effect.")}`
+
+Note that estimation of between-study heterogeneity was difficult for progression-free survival due to the limited number of studies. In particular, we found $\tau^2$= `r sprintf("%.3f", resPFS$tau2)` with an SE of `r sprintf("%.3f", resPFS$se.tau2)`. In the multivariate meta-analysis, the estimated between-study variance for PFS was much larger ($\tau^2$=`r sprintf("%.3f", res.MV$tau2[2])`), and based on all 16 rather than merely 6 studies.
+
+In summary, the multivariate meta-analysis approach is often helpful as it reduces the need to exclude relevant studies from a meta-analysis, thereby decreasing the risk of bias (e.g. due to selective outcome reporting) and potentially improving precision. As indicated by @riley_multivariate_2017, a multivariate meta-analysis of multiple outcomes is most beneficial when the outcomes are highly correlated and the percentage of studies with missing outcomes is large.
+
+
+# References
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-12-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-12-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-13-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-13-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-22-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-22-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-6-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-6-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-7-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-7-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-8-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-8-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/figure-html/unnamed-chunk-9-1.png b/vignettes/mapf_files/figure-html/unnamed-chunk-9-1.png
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/bootstrap/bootstrap-icons.css b/vignettes/mapf_files/libs/bootstrap/bootstrap-icons.css
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/bootstrap/bootstrap-icons.woff b/vignettes/mapf_files/libs/bootstrap/bootstrap-icons.woff
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/bootstrap/bootstrap.min.css b/vignettes/mapf_files/libs/bootstrap/bootstrap.min.css
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/bootstrap/bootstrap.min.js b/vignettes/mapf_files/libs/bootstrap/bootstrap.min.js
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/clipboard/clipboard.min.js b/vignettes/mapf_files/libs/clipboard/clipboard.min.js
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/quarto-html/anchor.min.js b/vignettes/mapf_files/libs/quarto-html/anchor.min.js
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/quarto-html/popper.min.js b/vignettes/mapf_files/libs/quarto-html/popper.min.js
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/quarto-html/quarto-syntax-highlighting.css b/vignettes/mapf_files/libs/quarto-html/quarto-syntax-highlighting.css
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/quarto-html/quarto.js b/vignettes/mapf_files/libs/quarto-html/quarto.js
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/quarto-html/tippy.css b/vignettes/mapf_files/libs/quarto-html/tippy.css
old mode 100644
new mode 100755
diff --git a/vignettes/mapf_files/libs/quarto-html/tippy.umd.min.js b/vignettes/mapf_files/libs/quarto-html/tippy.umd.min.js
old mode 100644
new mode 100755