Merge branch 'main' into devKim

OHDSI · Sep 11, 2024 · 1658306 · 1658306
2 parents 5a08988 + 9bbb94f
commit 1658306
Show file tree

Hide file tree

Showing 10 changed files with 868 additions and 330 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,4 @@ inst/doc
 docs
 /sql
 /sql_1
+extras/data/*
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -55,7 +55,12 @@ Suggests:
     odbc,
     CohortCharacteristics,
     ggplot2,
-    DiagrammeR
+    DiagrammeR,
+    visOmopResults,
+    gt,
+    scales,
+    here,
+    ggpubr
 Config/testthat/edition: 3
 Config/testthat/parallel: true
 VignetteBuilder: knitr

diff --git a/R/intersectCohorts.R b/R/intersectCohorts.R
@@ -67,14 +67,15 @@ intersectCohorts <- function(cohort,
 
   # get intersections between cohorts
   tblName <- omopgenerics::uniqueTableName(prefix = uniquePrefix)
+  lowerWindow <- ifelse(gap != 0, -gap, gap)
   cohortOut <- cohort %>%
     dplyr::filter(.data$cohort_definition_id %in% .env$cohortId) %>%
     dplyr::select(-"cohort_definition_id") %>%
     splitOverlap(by = "subject_id", name = tblName, tmp = paste0(tblName)) |>
     PatientProfiles::addCohortIntersectFlag(
       targetCohortTable = omopgenerics::tableName(cohort),
       targetCohortId = cohortId,
-      window = c(0, 0),
+      window = c(lowerWindow, gap),
       nameStyle = "{cohort_name}",
       name = tblName
     )
@@ -131,12 +132,12 @@ intersectCohorts <- function(cohort,
     dplyr::inner_join(cdm[[setName]], by = cohortNames) %>%
     dplyr::select("cohort_definition_id", "subject_id",
                   "cohort_start_date", "cohort_end_date") %>%
-    dplyr::compute(name = name, temporary = FALSE)
+    dplyr::compute(name = tblName, temporary = FALSE)
   if (cohortOut |> dplyr::tally() |> dplyr::pull("n") > 0) {
     class(cohortOut) <- c(class(cohortOut), "cohort_table")
     cohortOut <- cohortOut %>%
-      dplyr::compute(name = name, temporary = FALSE) |>
-      joinOverlap(name = name, gap = gap)
+      dplyr::compute(name = tblName, temporary = FALSE) |>
+      joinOverlap(name = tblName, gap = gap)
   }
 
   # attributes
@@ -192,19 +193,29 @@ intersectCohorts <- function(cohort,
     )))
 
   # intersect cohort
-  cdm[[name]] <- omopgenerics::newCohortTable(
-    table = cohortOut,
-    cohortSetRef = cohSet,
-    cohortAttritionRef = intersectAttrition,
-    cohortCodelistRef = intersectCodelist,
-    .softValidation = FALSE
-  )
-
   if (keepOriginalCohorts) {
-    cdm <- bind(cdm[[name]], originalCohorts, name = name)
+    cohortOut <- omopgenerics::newCohortTable(
+      table = cohortOut,
+      cohortSetRef = cohSet,
+      cohortAttritionRef = intersectAttrition,
+      cohortCodelistRef = intersectCodelist,
+      .softValidation = FALSE
+    )
+    cdm <- bind(originalCohorts, cohortOut, name = name)
+  } else {
+    cohortOut <- cohortOut %>%
+      dplyr::compute(name = name, temporary = FALSE)
+    cdm[[name]] <- omopgenerics::newCohortTable(
+      table = cohortOut,
+      cohortSetRef = cohSet,
+      cohortAttritionRef = intersectAttrition,
+      cohortCodelistRef = intersectCodelist,
+      .softValidation = FALSE
+    )
   }
 
   CDMConnector::dropTable(cdm, name = dplyr::starts_with(uniquePrefix))
+  CDMConnector::dropTable(cdm, name = tblName)
 
   return(cdm[[name]])
 }

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/data-raw/domainsData.R b/data-raw/domainsData.R
@@ -10,5 +10,3 @@ domainsData <- dplyr::tribble(
   "visit",       "visit_occurrence",     "visit_concept_id",       "visit_source_concept_id",        "visit_start_date",           "visit_end_date",
   "device",      "device_exposure",      "device_concept_id",      "device_source_concept_id",       "device_exposure_start_date", "device_exposure_end_date"
 )
-
-usethis::use_data(domainsData, internal = TRUE, overwrite = TRUE)
diff --git a/data-raw/getBenchmarkResults.R b/data-raw/getBenchmarkResults.R
@@ -0,0 +1,54 @@
+library(readr)
+library(omopgenerics)
+library(here)
+library(dplyr)
+library(tidyr)
+
+readData <- function(path) {
+  zipFiles <- list.files(path = path, pattern = ".zip")
+  tempfolder <- tempdir()
+  data <- NULL
+  for (file in zipFiles) {
+    file <- file.path(path, file)
+    fname = unzip(file, list = TRUE)$Name
+    fname <- fname[tools::file_ext(fname) == "csv"]
+    unzip(file, files = fname, exdir = tempfolder, overwrite = TRUE)
+    files <- file.path(tempfolder, fname)
+    data <- c(data, readFiles(files))
+  }
+  return(data)
+}
+
+readFiles <- function(files) {
+  data <- list()
+  for (file in files) {
+    data[[file]] <- readr::read_csv(file, col_types = readr::cols(.default = readr::col_character()))
+    if (all(colnames(data[[file]]) %in% omopgenerics::resultColumns()) & "settings" %in% data[[file]]$variable_name) {
+      data[[file]] <- data[[file]] |> omopgenerics::newSummarisedResult()
+    }
+  }
+  names(data) <- basename(tools::file_path_sans_ext(names(data)))
+  return(data)
+}
+
+mergeData <- function(data, patterns) {
+  x <- list()
+  for (pat in patterns) {
+    dataSubset <- data[grepl(pat, names(data))]
+    srExp <- length(dataSubset)
+    srObs <- sum(lapply(data[grepl(pat, names(data))], class) |> unlist() == "summarised_result")
+    if (srObs > 0) {
+      if (srObs == srExp) {
+        x[[pat]] <- dataSubset %>% omopgenerics::bind()
+      } else {
+        cli::cli_abort("Not all results with pattern {pat} have class summarised result.")
+      }
+    }  else {
+      x[[pat]] <- dataSubset %>% dplyr::bind_rows() %>% distinct()
+    }
+  }
+  return(x)
+}
+
+resultPatterns <- c("time", "comparison", "details", "omop", "index_counts", "sql_indexes")
+benchmarkData <- readData(here::here("data-raw", "data")) %>% mergeData(resultPatterns)
diff --git a/data-raw/internalData.R b/data-raw/internalData.R
@@ -0,0 +1,3 @@
+source(here::here("data-raw", "domainsData.R"))
+source(here::here("data-raw", "getBenchmarkResults.R"))
+usethis::use_data(domainsData, benchmarkData, internal = TRUE, overwrite = TRUE)