diff --git a/lib/R/R/context.R b/lib/R/R/context.R index b43c9dbc0a..9565afbc8a 100644 --- a/lib/R/R/context.R +++ b/lib/R/R/context.R @@ -14,29 +14,34 @@ # limitations under the License. -#' The type of storage to use for the terminology cache +#' Terminology cache storage type +#' +#' The type of storage to use for the terminology cache. +#' +#' The following values are supported: +#' \itemize{ +#' \item \code{MEMORY} - Use an in-memory cache +#' \item \code{DISK} - Use a disk-based cache +#' } +#' #' @export StorageType <- list( MEMORY = "memory", DISK = "disk" ) - -#' Pathling context lifecycle management functions. +#' Create or retrieve the Pathling context #' -#' @rdname pathling_context -#' @name pathling_context -#' -#' @description -#' \code{pathling_connect()} creates a PathlingContext with the given configuration options. +#' Creates a Pathling context with the given configuration options. #' -#' @details -#' If no SparkSession is provided, and there is not one already present in this process - a new -#' SparkSession will be created. +#' If no Spark session is provided and there is not one already present in this process, a new +#' one will be created. #' #' If a SparkSession is not provided, and one is already running within the current process, it -#' will be reused - and it is assumed that the Pathling library API JAR is already on the -#' classpath. If you are running your own cluster, make sure it is on the list of packages. +#' will be reused. +#' +#' It is assumed that the Pathling library API JAR is already on the classpath. If you are running +#' your own cluster, make sure it is on the list of packages. #' #' @param spark A pre-configured SparkSession instance, use this if you need to control the way #' that the session is set up @@ -75,9 +80,11 @@ StorageType <- list( #' @param accept_language The default value of the Accept-Language HTTP header passed to the #' terminology server #' -#' @return \code{pathling_connect()} returns a PathlingContext instance initialized with the specified configuration. +#' @return A Pathling context instance initialized with the specified configuration #' #' @importFrom sparklyr j_invoke_static j_invoke +#' +#' @family context lifecycle functions #' #' @export #' @@ -207,27 +214,30 @@ pathling_connect <- function( encoders_config, terminology_config) } -#' @description -#' \code{pathling_spark()} obtains the Spark connection associated with a Pathling context. +#' Get the Spark session +#' +#' Returns the Spark connection associated with a Pathling context. #' #' @param pc The PathlingContext object. #' -#' @return \code{pathling_spark()} returns spark connection associated with this Pathling context. -#' -#' @rdname pathling_context +#' @return The Spark connection associated with this Pathling context. +#' +#' @family context lifecycle functions #' #' @export pathling_spark <- function(pc) { sparklyr::spark_connection(pc) } -#' @description -#' \code{pathling_disconnect()} disconnects the Spark connection associated with a Pathling context. +#' Disconnect from the Spark session +#' +#' Disconnects the Spark connection associated with a Pathling context. +#' #' @param pc The PathlingContext object. #' -#' @return NULL +#' @return No return value, called for side effects only. #' -#' @rdname pathling_context +#' @family context lifecycle functions #' #' @export pathling_disconnect <- function(pc) { @@ -235,16 +245,14 @@ pathling_disconnect <- function(pc) { invisible(NULL) } -#' @description -#' \code{pathling_disconnect_all()} disconnects all Spark connections. +#' Disconnect all Spark connections #' -#' @return NULL -#' -#' @rdname pathling_context +#' @return No return value, called for side effects only. +#' +#' @family context lifecycle functions #' #' @export pathling_disconnect_all <- function() { sparklyr::spark_disconnect_all() invisible(NULL) } - diff --git a/lib/R/R/data.R b/lib/R/R/data.R index 3499989ce2..2a6e57b0c5 100644 --- a/lib/R/R/data.R +++ b/lib/R/R/data.R @@ -1,6 +1,4 @@ -# Copyright 2023 Commonwealth Scientific and Industrial Research -# Organisation (CSIRO) ABN 41 687 119 230. -# +# Copyright 2023 Commonwealth Scientific and Industrial Research # Organisation (CSIRO) ABN 41 687 119 230. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,15 +16,14 @@ #' #' A synthetic data set of simplified and flattened FHIR Condition resources generated by Synthea. #' -#' @format ## `conditions` #' A data frame with 19 rows and 6 columns: -#' \describe{ -#' \item{START}{The onset date} -#' \item{STOP}{The atabement date} -#' \item{PATIENT}{The id of the patient} -#' \item{ENCOUNTER}{The id of the encounter} -#' \item{CODE}{The SNOMED CT code of the condition} -#' \item{DESCRIPTION}{The display name of the condition} +#' \itemize{ +#' \item \code{START} - The onset date +#' \item \code{STOP} - The abatement date +#' \item \code{PATIENT} - The ID of the patient +#' \item \code{ENCOUNTER} - The ID of the encounter +#' \item \code{CODE} - The SNOMED CT code of the condition +#' \item \code{DESCRIPTION} - The display name of the condition #' } "conditions" diff --git a/lib/R/R/datasource.R b/lib/R/R/datasource.R index 0b17deb006..9f96c65405 100644 --- a/lib/R/R/datasource.R +++ b/lib/R/R/datasource.R @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - #'@importFrom sparklyr j_invoke data_sources <- function(pc) { j_invoke(pc, "read") @@ -26,12 +25,25 @@ invoke_datasource <- function(pc, name, ...) { j_invoke(name, ...) } -#' Creates a data source from a directory containing NDJSON files. +#' ImportMode #' -#' @description -#' \code{pathling_read_ndjson()} creates a data source from a directory containing NDJSON files. -#' The files must be named with the resource type code and must have the ".ndjson" extension, -#' e.g. "Patient.ndjson" or "Observation.ndjson". +#' The following import modes are supported: +#' \itemize{ +#' \item{\code{OVERWRITE}: Overwrite any existing data.} +#' \item{\code{MERGE}: Merge the new data with the existing data based on resource ID.} +#' } +#' +#' @export +ImportMode <- list( + OVERWRITE = "overwrite", + MERGE = "merge" +) + +#' Create a data source from NDJSON +#' +#' Creates a data source from a directory containing NDJSON files. The files must be named with the +#' resource type code and must have the ".ndjson" extension, e.g. "Patient.ndjson" or +#' "Observation.ndjson". #' #' @param pc The PathlingContext object. #' @param path The URI of the directory containing the NDJSON files. @@ -39,10 +51,12 @@ invoke_datasource <- function(pc, name, ...) { #' @param file_name_mapper An optional function that maps a filename to the set of resource types #' that it contains. Currently not implemented. #' @return A DataSource object that can be used to run queries against the data. +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#ndjson}{Pathling documentation - Reading NDJSON} #' #' @export #' -#' @family Pathling data sources +#' @family data source functions #' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -55,10 +69,9 @@ pathling_read_ndjson <- function(pc, path, extension = "ndjson", file_name_mappe pc %>% invoke_datasource("ndjson", as.character(path), as.character(extension)) } -#' Creates a data source from a directory containing FHIR bundles. +#' Create a data source from FHIR bundles #' -#' @description -#' \code{pathling_read_bundles()} creates a data source from a directory containing FHIR bundles. +#' Creates a data source from a directory containing FHIR bundles. #' #' @param pc The PathlingContext object. #' @param path The URI of the directory containing the bundles. @@ -66,9 +79,11 @@ pathling_read_ndjson <- function(pc, path, extension = "ndjson", file_name_mappe #' @param mime_type The MIME type of the bundles. Defaults to "application/fhir+json". #' @return A DataSource object that can be used to run queries against the data. #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#fhir-bundles}{Pathling documentation - Reading Bundles} +#' #' @export #' -#' @family Pathling data sources +#' @family data source functions #' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -83,20 +98,21 @@ pathling_read_bundles <- function(pc, path, resource_types, mime_type = MimeType as.character(mime_type)) } -#' Creates an immutable data source from provided resource Spark DataFrames. +#' Create a data source from datasets #' -#' @description -#' \code{pathling_read_datasets()} creates an immutable, ad-hoc data source from a named list of Spark DataFrames indexed with +#' Creates an immutable, ad-hoc data source from a named list of Spark datasets indexed with #' resource type codes. #' #' @param pc The PathlingContext object. -#' @param resources A name list of Spark DataFrames, where the keys are resource type codes +#' @param resources A name list of Spark datasets, where the keys are resource type codes #' and the values are the data frames containing the resource data. #' @return A DataSource object that can be used to run queries against the data. #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#datasets}{Pathling documentation - Reading datasets} +#' #' @export #' -#' @family Pathling data sources +#' @family data source functions #' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -115,7 +131,7 @@ pathling_read_datasets <- function(pc, resources) { ds } -#' Creates a data source from a directory containing Parquet tables. +#' Create a data source from Parquet tables #' #' @description #' \code{pathling_read_parquet()} creates a data source from a directory containing Parquet tables. @@ -125,9 +141,11 @@ pathling_read_datasets <- function(pc, resources) { #' @param path The URI of the directory containing the Parquet tables. #' @return A DataSource object that can be used to run queries against the data. #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#parquet}{Pathling documentation - Reading Parquet} +#' #' @export #' -#' @family Pathling data sources +#' @family data source functions #' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -138,7 +156,7 @@ pathling_read_parquet <- function(pc, path) { pc %>% invoke_datasource("parquet", as.character(path)) } -#' Creates a data source from a directory containing Delta tables. +#' Create a data source from Delta tables #' #' @description #' \code{pathling_read_delta()} creates a data source from a directory containing Delta tables. @@ -148,9 +166,11 @@ pathling_read_parquet <- function(pc, path) { #' @param path The URI of the directory containing the Delta tables. #' @return A DataSource object that can be used to run queries against the data. #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#delta-lake}{Pathling documentation - Reading Delta} +#' #' @export #' -#' @family Pathling data sources +#' @family data source functions #' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -161,7 +181,7 @@ pathling_read_delta <- function(pc, path) { pc %>% invoke_datasource("delta", as.character(path)) } -#' Creates a data source from a set of Spark tables. +#' Create a data source from managed tables #' #' \code{pathling_read_tables()} creates a data source from a set of Spark tables, #' where the table names are the resource type codes. @@ -170,9 +190,11 @@ pathling_read_delta <- function(pc, path) { #' @param schema An optional schema name that should be used to qualify the table names. #' @return A DataSource object that can be used to run queries against the data. #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#managed-tables}{Pathling documentation - Reading managed tables} +#' #' @export #' -#' @family Pathling data sources +#' @family data source functions #' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -188,8 +210,7 @@ pathling_read_tables <- function(pc, schema = NULL) { } } - -#' Reads the data for the given resource type from the data source. +#' Get data for a resource type from a data source #' #' @param ds The DataSource object. #' @param resource_code A string representing the type of FHIR resource to read data from. @@ -222,63 +243,31 @@ invoke_datasink <- function(ds, name, ...) { } -#' Import modes. +#' Write FHIR data to NDJSON files +#' +#' Writes the data from a data source to a directory of NDJSON files. The files will be named using +#' the resource type and the ".ndjson" extension. #' -#' @export -ImportMode <- list( - OVERWRITE = "overwrite", - MERGE = "merge" -) - -#' Functions to perists data sources in various formats. -#' @rdname ds_write -#' @name ds_write_xxxx +#' @param ds The DataSource object. +#' @param path The URI of the directory to write the files to. +#' @param file_name_mapper An optional function that can be used to customise the mapping +#' of the resource type to the file name. Currently not implemented. +#' +#' @return No return value, called for side effects only. +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#ndjson-1}{Pathling documentation - Writing NDJSON} #' #' @examplesIf pathling_is_spark_installed() -#' # Create a temporary warehouse location, which will be used when we call ds_write_tables(). -#' temp_dir_path <- tempfile() -#' dir.create(temp_dir_path) -#' sc <- sparklyr::spark_connect(master = "local[*]", config = list( -#' "sparklyr.shell.conf" = c( -#' paste0("spark.sql.warehouse.dir=", temp_dir_path), -#' "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension", -#' "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" -#' ) -#' ), version = pathling_spark_info()$spark_version) -#' #' pc <- pathling_connect(sc) #' data_source <- pc %>% pathling_read_ndjson(pathling_examples('ndjson')) #' #' # Write the data to a directory of NDJSON files. #' data_source %>% ds_write_ndjson(file.path(tempdir(), 'ndjson')) #' -#' # Write the data to a directory of Parquet files. -#' data_source %>% ds_write_parquet(file.path(tempdir(), 'parquet')) -#' -#' # Write the data to a directory of Delta files. -#' data_source %>% ds_write_delta(file.path(tempdir(), 'delta'), import_mode = ImportMode$OVERWRITE) -#' -#' # Write the data to a set of Spark tables in 'fhir' database. -#' data_source %>% ds_write_tables("default", import_mode = ImportMode$MERGE) -#' #' pathling_disconnect(pc) -#' unlink(temp_dir_path, recursive = TRUE) -NULL - #' -#' @description -#' \code{ds_write_ndjson()} writes the data to a directory of NDJSON files. -#' The files will be named using the resource type and the ".ndjson" extension. -#' -#' @param ds The DataSource object. -#' @param path The URI of the directory to write the files to. -#' @param file_name_mapper An optional function that can be used to customise the mapping -#' of the resource type to the file name. Currently not implemented. -#' -#' @return NULL -#' -#' @rdname ds_write -#' +#' @family data sink functions +#' #' @export ds_write_ndjson <- function(ds, path, file_name_mapper = NULL) { #See: issue #1601 (Implement file_name_mappers in R sparkly API) @@ -286,49 +275,101 @@ ds_write_ndjson <- function(ds, path, file_name_mapper = NULL) { invoke_datasink(ds, "ndjson", path) } -#' @description -#' \code{ds_write_parquet()} writes the data to a directory of Parquet files. +#' Write FHIR data to Parquet files +#' +#' Writes the data from a data source to a directory of Parquet files. #' #' @param ds The DataSource object. #' @param path The URI of the directory to write the files to. #' -#' @return NULL +#' @return No return value, called for side effects only. #' -#' @rdname ds_write +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#parquet-1}{Pathling documentation - Writing Parquet} +#' +#' @examplesIf pathling_is_spark_installed() +#' pc <- pathling_connect(sc) +#' data_source <- pc %>% pathling_read_ndjson(pathling_examples('ndjson')) +#' +#' # Write the data to a directory of Parquet files. +#' data_source %>% ds_write_parquet(file.path(tempdir(), 'parquet')) +#' +#' pathling_disconnect(pc) +#' +#' @family data sink functions #' #' @export ds_write_parquet <- function(ds, path) { invoke_datasink(ds, "parquet", path) } -#' @description -#' \code{ds_write_delta()} writes the data to a directory of Delta files. +#' Write FHIR data to Delta files +#' +#' Writes the data from a data source to a directory of Delta files. #' #' @param ds The DataSource object. #' @param path The URI of the directory to write the files to. #' @param import_mode The import mode to use when writing the data - "overwrite" will overwrite any #' existing data, "merge" will merge the new data with the existing data based on resource ID. #' -#' @return NULL +#' @return No return value, called for side effects only. #' -#' @rdname ds_write +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#delta-lake-1}{Pathling documentation - Writing Delta} +#' +#' @seealso \code{\link{ImportMode}} +#' +#' @examplesIf pathling_is_spark_installed() +#' pc <- pathling_connect(sc) +#' data_source <- pc %>% pathling_read_ndjson(pathling_examples('ndjson')) +#' +#' # Write the data to a directory of Delta files. +#' data_source %>% ds_write_delta(file.path(tempdir(), 'delta'), import_mode = ImportMode$OVERWRITE) +#' +#' pathling_disconnect(pc) +#' +#' @family data sink functions #' #' @export ds_write_delta <- function(ds, path, import_mode = ImportMode$OVERWRITE) { invoke_datasink(ds, "delta", path, import_mode) } -#' @description -#' \code{ds_write_tables()} writes the data to a set of tables in the Spark catalog. +#' Write FHIR data to managed tables +#' +#' Writes the data from a data source to a set of tables in the Spark catalog. #' #' @param ds The DataSource object. #' @param schema The name of the schema to write the tables to. #' @param import_mode The import mode to use when writing the data - "overwrite" will overwrite any #' existing data, "merge" will merge the new data with the existing data based on resource ID. +#' +#' @return No return value, called for side effects only. +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#managed-tables-1}{Pathling documentation - Writing managed tables} +#' +#' @seealso \code{\link{ImportMode}} +#' +#' @examplesIf pathling_is_spark_installed() +#' # Create a temporary warehouse location, which will be used when we call ds_write_tables(). +#' temp_dir_path <- tempfile() +#' dir.create(temp_dir_path) +#' sc <- sparklyr::spark_connect(master = "local[*]", config = list( +#' "sparklyr.shell.conf" = c( +#' paste0("spark.sql.warehouse.dir=", temp_dir_path), +#' "spark.sql.extensions=io.delta.sql.DeltaSparkSessionExtension", +#' "spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog" +#' ) +#' ), version = pathling_spark_info()$spark_version) #' -#' @return NULL +#' pc <- pathling_connect(sc) +#' data_source <- pc %>% pathling_read_ndjson(pathling_examples('ndjson')) +#' +#' # Write the data to a set of Spark tables in the 'default' database. +#' data_source %>% ds_write_tables("default", import_mode = ImportMode$MERGE) +#' +#' pathling_disconnect(pc) +#' unlink(temp_dir_path, recursive = TRUE) #' -#' @rdname ds_write +#' @family data sink functions #' #' @export ds_write_tables <- function(ds, schema = NULL, import_mode = ImportMode$OVERWRITE) { diff --git a/lib/R/R/encoding.R b/lib/R/R/encoding.R index b3aafe85a4..560e0d20c2 100644 --- a/lib/R/R/encoding.R +++ b/lib/R/R/encoding.R @@ -15,20 +15,20 @@ library(purrr) -#' Encode FHIR Resources +#' Encode FHIR JSON or XML to a dataframe #' #' Takes a Spark DataFrame with string representations of FHIR resources in the given column and #' encodes the resources of the given types as Spark DataFrame. #' -#' @param pc The PathlingContext object. +#' @param pc The Pathling context object. #' @param df A Spark DataFrame containing the resources to encode. #' @param resource_name The name of the FHIR resource to extract (e.g., "Condition", "Observation"). -#' @param input_type The mime type of input string encoding. Defaults to "application/fhir+json". +#' @param input_type The MIME type of input string encoding. Defaults to "application/fhir+json". #' @param column The column in which the resources to encode are stored. If set to NULL, the input #' DataFrame is assumed to have one column of type string. #' @return A Spark DataFrame containing the given type of resources encoded into Spark columns. #' -#' @family Pathling encoding +#' @family encoding functions #' #' @importFrom rlang `%||%` #' @importFrom sparklyr sdf_register spark_dataframe j_invoke @@ -46,12 +46,12 @@ pathling_encode <- function(pc, df, resource_name, input_type = NULL, column = N input_type %||% MimeType$FHIR_JSON, column)) } -#' Encodes FHIR Bundles into Spark DataFrame +#' Encode FHIR Bundles to a dataframe #' -#' Takes a dataframe with string representations of FHIR bundles in the given column and encodes -#' the resources of the given types as Spark DataFrame. +#' Takes a dataframe with string representations of FHIR bundles in the given column and outputs +#' a dataframe of encoded resources. #' -#' @param pc A PathlingContext instance. +#' @param pc A Pathling context object. #' @param df A Spark DataFrame containing the bundles with the resources to encode. #' @param resource_name The name of the FHIR resource to extract (Condition, Observation, etc.). #' @param input_type The MIME type of the input string encoding. Defaults to 'application/fhir+json'. @@ -60,7 +60,7 @@ pathling_encode <- function(pc, df, resource_name, input_type = NULL, column = N #' #' @return A Spark DataFrame containing the given type of resources encoded into Spark columns. #' -#' @family Pathling encoding +#' @family encoding functions #' #' @importFrom rlang `%||%` #' @importFrom sparklyr sdf_register spark_dataframe j_invoke diff --git a/lib/R/R/etc.R b/lib/R/R/etc.R index 7159e3786d..b9d89e90f2 100644 --- a/lib/R/R/etc.R +++ b/lib/R/R/etc.R @@ -25,9 +25,21 @@ package_info <- function(pkgname) { read_dcf(path) } -#' Returns the Spark and Hadoop versions used by the Pathling R library. +#' Get versions of Spark and other dependencies +#' +#' Returns the versions of Spark and Spark packages used by the Pathling R library. +#' +#' @return A list containing the following keys: +#' \itemize{ +#' \item{\code{spark_version}: The version of Spark used by Pathling.} +#' \item{\code{scala_version}: The version of Scala used by Pathling.} +#' \item{\code{hadoop_version}: The version of Hadoop used by Pathling.} +#' \item{\code{hadoop_major_version}: The major version of Hadoop used by Pathling.} +#' \item{\code{delta_version}: The version of Delta used by Pathling.} +#' } +#' +#' @family installation functions #' -#' @return A list containing the Spark and Hadoop versions, under the keys 'spark_version' and 'hadoop_version' respectively. #' @export pathling_spark_info <- function() { metadata <- package_info("pathling") @@ -40,32 +52,43 @@ pathling_spark_info <- function() { ) } -#' Returns the version of the Pathling R library. +#' Get version of Pathling #' #' @return The version of the Pathling R library. +#' +#' @family installation functions +#' #' @export pathling_version <- function() { metadata <- package_info("pathling") metadata[["Config/pathling/Version"]] } -#' Installs the version of Spark/Hadoop required by pathling. +#' Install Spark #' -#' @description -#' Installs the version of Spark/Hadoop defined in the package metadata -#' using the sparklyr package \code{spark_install} function. +#' Installs the version of Spark/Hadoop defined in the package metadata using the +#' \code{\link{sparklyr::spark_install}} function. #' #' @return List with information about the installed version. +#' +#' @family installation functions +#' #' @export pathling_install_spark <- function() { spark_info <- pathling_spark_info() sparklyr::spark_install(version = spark_info$spark_version, hadoop_version = spark_info$hadoop_major_version) } -#' Checks if the version of Spark/Hadoop reuired by pathling is installed. -#' @return TRUE if the required version of Spark/Hadoop is installed, FALSE otherwise. +#' Check if Spark is installed +#' +#' Checks if the version of Spark/Hadoop required by Pathling is installed. +#' +#' @return \code{TRUE} if the required version of Spark/Hadoop is installed, \code{FALSE} otherwise. #' #' @importFrom rlang .data +#' +#' @family installation functions +#' #' @export pathling_is_spark_installed <- function() { spark_info <- pathling_spark_info() @@ -75,14 +98,14 @@ pathling_is_spark_installed <- function() { } -#' Constructs the path to the package example data. +#' Get path to Pathling example data #' -#' Construct the path to the package example data from components in a platform-independent way. +#' Construct the path to the package example data in a platform-independent way. #' #' @param ... character vector of the path components. #' @return The path to the examples data. #' -#' @family pathling examples +#' @family example functions #' #' @export #' @@ -92,21 +115,19 @@ pathling_examples <- function(...) { system.file("extdata", ..., package = "pathling") } -#' Reads example FHIR resource data frame. +#' Read resource from Pathling example data #' -#' @description -#' \code{pathling_example_resource()} reads a FHIR resource dataframe from the package example data. +#' Reads a FHIR resource dataframe from the package example data. +#' +#' The resources are read from the package example data in the \code{extdata/parquet} directory. +#' Currently the following resources are available: 'Patient' and 'Condition'. #' #' @param pc The PathlingContext object. #' @param resource_name The name of the resource to read. #' -#' @details -#' The resorces are read from the package example data in the \code{extdata/parquet} directory. -#' Currently the following resources are available: 'Patient' and 'Condition'. -#' #' @return A Spark DataFrame containing the resource data. #' -#' @family pathling examples +#' @family example functions #' #' @export #' diff --git a/lib/R/R/fhir.R b/lib/R/R/fhir.R index a8ed33a924..cc9bf50677 100644 --- a/lib/R/R/fhir.R +++ b/lib/R/R/fhir.R @@ -15,7 +15,15 @@ -#' Constants for FHIR encoding mime types +#' FHIR MIME types +#' +#' The following MIME types are supported: +#' \itemize{ +#' \item{\code{FHIR_JSON}: FHIR resources encoded as JSON} +#' \item{\code{FHIR_XML}: FHIR resources encoded as XML} +#' } +#' +#' @seealso \href{https://hl7.org/fhir/R4/formats.html}{FHIR R4 - Resource Formats} #' #' @export MimeType <- list( @@ -23,7 +31,12 @@ MimeType <- list( FHIR_XML = "application/fhir+xml" ) -#' Constants for FHIR versions +#' FHIR versions +#' +#' The following FHIR versions are supported: +#' \itemize{ +#' \item{\code{R4}: FHIR R4} +#' } #' #' @export Version <- list( diff --git a/lib/R/R/functions.R b/lib/R/R/functions.R index 40a4cd70d1..0f53e82335 100644 --- a/lib/R/R/functions.R +++ b/lib/R/R/functions.R @@ -13,53 +13,55 @@ # See the License for the specific language governing permissions and # limitations under the License. -#' The URI of the SNOMED code system. +#' SNOMED CT system URI +#' +#' The URI of the SNOMED CT code system: \code{http://snomed.info/sct}. +#' +#' @seealso \href{https://terminology.hl7.org/SNOMEDCT.html}{Using SNOMED CT with HL7 Standards} +#' #' @export SNOMED_URI <- "http://snomed.info/sct" -#' The URI of the LOINC code system. +#' LOINC system URI +#' +#' The URI of the LOINC code system: \code{http://loinc.org}. +#' +#' @seealso \href{https://terminology.hl7.org/LOINC.html}{Using LOINC with HL7 Standards} +#' #' @export LOINC_URI <- "http://loinc.org" - -#' Functions converting codes into a Column that contains a Coding struct. -#' -#' @rdname tx_to_coding -#' @name tx_to_xxxx_coding -#' -#' @details -#' The Coding struct Column can be used as an input to terminology functions such -#' as \code{\link{tx_member_of}} and \code{\link{tx_translate}}. -#' Please note that inside \code{sparklyr} verbs such as \code{mutate} the functions calls need to -#' be preceeded with \code{!!}, e.g: \code{!!tx_to_coding(CODE, SNOMED_URI)}. -#' +#' Convert codes to Coding structures +#' +#' Converts a Column containing codes into a Column that contains a Coding struct. +#' +#' The Coding struct Column can be used as an input to terminology functions such as +#' \code{\link{tx_member_of}} and \code{\link{tx_translate}}. Please note that inside +#' \code{sparklyr} verbs such as \code{mutate} the functions calls need to be preceded with +#' \code{!!}, e.g: \code{!!tx_to_coding(CODE, SNOMED_URI)}. +#' #' @param coding_column The Column containing the codes. #' @param system The URI of the system the codes belong to. #' @param version The version of the code system. #' #' @return A Column containing a Coding struct. +#' +#' @seealso \href{https://hl7.org/fhir/R4/datatypes.html#Coding}{FHIR R4 - Coding} +#' +#' @family terminology helpers #' +#' @export +#' #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() #' condition_df <- pathling_spark(pc) %>% sparklyr::copy_to(conditions) #' -#' # Convert codes to codins with explicit system -#' condition_df %>% sparklyr::mutate(snomedCoding = !!tx_to_coding(CODE, SNOMED_URI), .keep = 'none') -#' -#' # Convert codes to SNOMED codings -#' condition_df %>% sparklyr::mutate(snomedCoding = !!tx_to_snomed_coding(CODE), .keep = 'none') +#' # Convert codes to ICD-10 codings. +#' condition_df %>% sparklyr::mutate( +#' icdCoding = !!tx_to_coding(CODE, "http://hl7.org/fhir/sid/icd-10"), .keep = 'none' +#' ) #' #' pathling_disconnect(pc) -NULL - -#' @rdname tx_to_coding -#' -#' @family terminology helpers -#' -#' @description -#' \code{tx_to_coding()} converts a Column containing codes into a Column that contains a Coding struct. -#' -#' @export tx_to_coding <- function(coding_column, system, version = NULL) { rlang::expr(if (!is.null({ { coding_column } })) struct( @@ -72,39 +74,78 @@ tx_to_coding <- function(coding_column, system, version = NULL) { ) else NULL) } -#' @description -#' \code{tx_to_snomed_coding()} converts a Column containing codes into a Column that -#' contains a SNOMED Coding struct. +#' Convert SNOMED CT codes to Coding structures +#' +#' Converts a Column containing codes into a Column that contains a SNOMED Coding struct. +#' +#' The Coding struct Column can be used as an input to terminology functions such as +#' \code{\link{tx_member_of}} and \code{\link{tx_translate}}. Please note that inside +#' \code{sparklyr} verbs such as \code{mutate} the functions calls need to be preceded with +#' \code{!!}, e.g: \code{!!tx_to_coding(CODE, SNOMED_URI)}. +#' +#' @param coding_column The Column containing the codes. +#' @param version The version of the code system. +#' +#' @return A Column containing a Coding struct. #' #' @family terminology helpers -#' @rdname tx_to_coding # #' @export +#' +#' @examplesIf pathling_is_spark_installed() +#' pc <- pathling_connect() +#' condition_df <- pathling_spark(pc) %>% sparklyr::copy_to(conditions) +#' +#' # Convert codes to SNOMED CT codings. +#' # Equivalent to: tx_to_coding(CODE, "http://snomed.info/sct") +#' condition_df %>% sparklyr::mutate(snomedCoding = !!tx_to_snomed_coding(CODE), .keep = 'none') +#' +#' pathling_disconnect(pc) tx_to_snomed_coding <- function(coding_column, version = NULL) { tx_to_coding({ { coding_column } }, SNOMED_URI, { { version } }) } -#' @description -#' \code{tx_to_loinc_coding()} converts a Column containing codes into a Column that -#' contains a LOINC Coding struct. +#' Convert LOINC codes to Coding structures +#' +#' Converts a Column containing codes into a Column that contains a LOINC Coding struct. +#' +#' The Coding struct Column can be used as an input to terminology functions such as +#' \code{\link{tx_member_of}} and \code{\link{tx_translate}}. Please note that inside +#' \code{sparklyr} verbs such as \code{mutate} the functions calls need to be preceded with +#' \code{!!}, e.g: \code{!!tx_to_coding(CODE, SNOMED_URI)}. +#' +#' @param coding_column The Column containing the codes. +#' @param version The version of the code system. +#' +#' @return A Column containing a Coding struct. #' #' @family terminology helpers -#' @rdname tx_to_coding #' #' @export +#' +#' @examplesIf pathling_is_spark_installed() +#' pc <- pathling_connect() +#' condition_df <- pathling_spark(pc) %>% sparklyr::copy_to(conditions) +#' +#' # Convert codes to LOINC codings. +#' # Equivalent to: tx_to_coding(CODE, "http://loinc.org") +#' condition_df %>% sparklyr::mutate(loincCoding = !!tx_to_loinc_coding(CODE), .keep = 'none') +#' +#' pathling_disconnect(pc) tx_to_loinc_coding <- function(coding_column, version = NULL) { tx_to_coding({ { coding_column } }, LOINC_URI, { { version } }) } -#' Terminology helper functions +#' Convert a SNOMED CT ECL expression to a ValueSet URI #' -#' @description -#' \code{tx_to_ecl_value_set} converts a SNOMED CT ECL expression into a FHIR ValueSet URI. -#' It can be used with the `\code{\link{tx_member_of}} function. +#' Converts a SNOMED CT ECL expression into a FHIR ValueSet URI. It can be used with the +#'`\code{\link{tx_member_of}} function. #' #' @param ecl The ECL expression. #' #' @return The ValueSet URI. +#' +#' @seealso \href{https://terminology.hl7.org/SNOMEDCT.html#snomed-ct-implicit-value-sets}{Using SNOMED CT with HL7 Standards - Implicit Value Sets} #' #' @family terminology helpers #' @@ -118,7 +159,3 @@ tx_to_loinc_coding <- function(coding_column, version = NULL) { tx_to_ecl_value_set <- function(ecl) { paste0(SNOMED_URI, "?fhir_vs=ecl/", URLencode(ecl, reserved = TRUE)) } - - - - diff --git a/lib/R/R/query.R b/lib/R/R/query.R index 659c54c86c..ef5038e704 100644 --- a/lib/R/R/query.R +++ b/lib/R/R/query.R @@ -23,9 +23,10 @@ for_each_with_name <-function(sequence, FUN, ...) { } } -#' Executes an aggregate query for FHIR data. +#' Execute an aggregate query #' -#' The query calculates summary values based on aggregations and groupings of FHIR resources. +#' Executes an aggregate query over FHIR data. The query calculates summary values based on +#' aggregations and groupings of FHIR resources. #' #' @param ds The DataSource object containing the data to be queried. #' @param subject_resource A string representing the type of FHIR resource to aggregate data from. @@ -38,9 +39,11 @@ for_each_with_name <-function(sequence, FUN, ...) { #' Boolean value. Multiple filters are combined using logical AND operation. #' @return A Spark DataFrame containing the aggregated data. #' -#' @family Pathling queries +#' @family FHIRPath queries #' #' @importFrom sparklyr j_invoke sdf_register +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#aggregate}{Pathling documentation - Aggregate} #' #' @export #' @examplesIf pathling_is_spark_installed() @@ -80,9 +83,10 @@ ds_aggregate <- function(ds, subject_resource, aggregations, groupings = NULL, f sdf_register(j_invoke(q, "execute")) } -#' Executes an extract query for FHIR data. +#' Execute an extract query #' -#' The query extracts specified columns from FHIR resources in the tabular format. +#' Executes an extract query over FHIR data. This type of query extracts specified columns from +#' FHIR resources in a tabular format. #' #' @param ds The DataSource object containing the data to be queried. #' @param subject_resource A string representing the type of FHIR resource to extract data from. @@ -92,10 +96,12 @@ ds_aggregate <- function(ds, subject_resource, aggregations, groupings = NULL, f #' Boolean value. Multiple filters are combined using AND logic. #' @return A Spark DataFrame containing the extracted data. #' -#' @family Pathling queries +#' @family FHIRPath queries #' #' @importFrom sparklyr j_invoke sdf_register #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/fhirpath-query#extract}{Pathling documentation - Extract} +#' #' @export #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() @@ -124,7 +130,3 @@ ds_extract <- function(ds, subject_resource, columns, filters = NULL) { sdf_register(j_invoke(q, "execute")) } - - - - diff --git a/lib/R/R/udfs.R b/lib/R/R/udfs.R index 6952d42fa2..fd3cdc0e0e 100644 --- a/lib/R/R/udfs.R +++ b/lib/R/R/udfs.R @@ -14,7 +14,7 @@ # limitations under the License. # -# Placeholders for SQL functions and UDFs +# Placeholders for SQL functions and UDFs. # designation <- function(...) { } display <- function(...) { } @@ -30,9 +30,14 @@ property_decimal <- function(...) { } property_integer <- function(...) { } property_string <- function(...) { } -#' Converts a vector to an expression with the corresponding SQL array litera. +#' Convert a vector to a SQL array literal +#' +#' Converts a vector to an expression with the corresponding SQL array literal. +#' #' @param value A character or numeric vector to be converted -#' @return The `quosure` with the SQL array literal that can be used in dplyr::mutate. +#' +#' @return The \code{\link{topic-quosure}} with the SQL array literal that can be used in +#' \code{\link{dplyr::mutate}}. to_array <- function(value) { if (!is.null(value)) { rlang::new_quosure(rlang::expr(array(!!!value))) @@ -41,8 +46,20 @@ to_array <- function(value) { } } - -#' Allowed property types. +#' Coding property data types +#' +#' The following data types are supported: +#' \itemize{ +#' \item \code{STRING} - A string value. +#' \item \code{INTEGER} - An integer value. +#' \item \code{BOOLEAN} - A boolean value. +#' \item \code{DECIMAL} - A decimal value. +#' \item \code{DATETIME} - A date/time value. +#' \item \code{CODE} - A code value. +#' \item \code{CODING} - A Coding value. +#' } +#' +#' @seealso \href{https://hl7.org/fhir/R4/datatypes.html}{FHIR R4 - Data Types} #' #' @export PropertyType <- list( @@ -55,7 +72,23 @@ PropertyType <- list( CODING = "Coding" ) -#' Concept map equivalences. +#' Concept map equivalence types +#' +#' The following values are supported: +#' \itemize{ +#' \item \code{RELATEDTO} - The concepts are related to each other, and have at least some overlap in meaning, but the exact relationship is not known. +#' \item \code{EQUIVALENT} - The definitions of the concepts mean the same thing (including when structural implications of meaning are considered) (i.e. extensionally identical). +#' \item \code{EQUAL} - The definitions of the concepts are exactly the same (i.e. only grammatical differences) and structural implications of meaning are identical or irrelevant (i.e. intentionally identical). +#' \item \code{WIDER} - The target mapping is wider in meaning than the source concept. +#' \item \code{SUBSUMES} - The target mapping subsumes the meaning of the source concept (e.g. the source is-a target). +#' \item \code{NARROWER} - The target mapping is narrower in meaning than the source concept. The sense in which the mapping is narrower SHALL be described in the comments in this case, and applications should be careful when attempting to use these mappings operationally. +#' \item \code{SPECIALIZES} - The target mapping specializes the meaning of the source concept (e.g. the target is-a source). +#' \item \code{INEXACT} - There is some similarity between the concepts, but the exact relationship is not known. +#' \item \code{UNMATCHED} - This is an explicit assertion that there is no mapping between the source and target concept. +#' \item \code{DISJOINT} - This is an explicit assertion that the target concept is not in any way related to the source concept. +#' } +#' +#' @seealso \href{https://hl7.org/fhir/R4/valueset-concept-map-equivalence.html}{FHIR R4 - ConceptMapEquivalence} #' #' @export Equivalence <- list( @@ -71,17 +104,20 @@ Equivalence <- list( DISJOINT = "disjoint" ) -#' Checks if Coding is a member of ValueSet. +#' Test membership within a value set #' -#' \code{tx_member_of()} takes a Coding or array of Codings column as its input. Returns the column which contains a +#' Takes a Coding or array of Codings column as its input. Returns the column which contains a #' Boolean value, indicating whether any of the input Codings is a member of the specified FHIR #' ValueSet. #' -#' @param codings A Column containing a struct representation of a Coding or an array of such structs. +#' @param codings A Column containing a struct representation of a Coding or an array of such +#' structs. #' @param value_set_uri An identifier for a FHIR ValueSet. #' #' @return A Column containing the result of the operation. #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#value-set-membership}{Pathling documentation - Value set membership} +#' #' @family terminology functions #' #' @export @@ -89,7 +125,8 @@ Equivalence <- list( #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() #' -#' # Test the codings of the Condition `code` for membership in a SNOMED CT ValueSet. +#' # Test the Condition codings for membership in the SNOMED CT 'Lateralisable body structure +#' reference set' (723264001). #' pc %>% pathling_example_resource('Condition') %>% #' sparklyr::mutate( #' id, @@ -102,13 +139,12 @@ tx_member_of <- function(codings, value_set_uri) { rlang::expr(member_of({ { codings } }, { { value_set_uri } })) } -#' Translates a Coding column. +#' Translate between value sets #' -#' \code{tx_translate()} a Coding column as input. Returns the Column which contains an array of +#' Takes a Coding column as input. Returns the Column which contains an array of #' Coding value with translation targets from the specified FHIR ConceptMap. There #' may be more than one target concept for each input concept. Only the translation with #' the specified equivalences are returned. -#' See also \code{\link{Equivalence}}. #' #' @param codings A Column containing a struct representation of a Coding. #' @param concept_map_uri An identifier for a FHIR ConceptMap. @@ -119,6 +155,9 @@ tx_member_of <- function(codings, value_set_uri) { #' target specified, the server should return all known translations. #' #' @return A Column containing the result of the operation (an array of Coding structs). +#' +#' @seealso \code{\link{Equivalence}} +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#concept-translation}{Pathling documentation - Concept translation} #' #' @family terminology functions #' @@ -140,10 +179,9 @@ tx_translate <- function(codings, concept_map_uri, reverse = FALSE, equivalences !!to_array(equivalences), { { target } })) } -#' Checks if left Coding subsumes right Coding. +#' Test subsumption between codings #' -#' \code{tx_subsumes()} two Coding columns as input. Returns the Column, -#' which contains a Boolean value, +#' Takes two Coding columns as input. Returns a Column that contains a Boolean value, #' indicating whether the left Coding subsumes the right Coding. #' #' @param left_codings A Column containing a struct representation of a Coding or an array of Codings. @@ -151,6 +189,8 @@ tx_translate <- function(codings, concept_map_uri, reverse = FALSE, equivalences #' #' @return A Column containing the result of the operation (boolean). #' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#subsumption-testing}{Pathling documentation - Subsumption testing} +#' #' @family terminology functions #' #' @export @@ -170,16 +210,17 @@ tx_subsumes <- function(left_codings, right_codings) { rlang::expr(subsumes({ { left_codings } }, { { right_codings } }, FALSE)) } -#' Checks if left Coding is subsumed by right Coding. +#' Test subsumption between codings #' -#' \code{tx_subsumed_by()} takes two Coding columns as input. Returns the Column, -#' which contains a Boolean value, +#' Takes two Coding columns as input. Returns a Column that contains a Boolean value, #' indicating whether the left Coding is subsumed by the right Coding. #' #' @param left_codings A Column containing a struct representation of a Coding or an array of Codings. #' @param right_codings A Column containing a struct representation of a Coding or an array of Codings. #' #' @return A Column containing the result of the operation (boolean). +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#subsumption-testing}{Pathling documentation - Subsumption testing} #' #' @family terminology functions #' @@ -201,17 +242,18 @@ tx_subsumed_by <- function(left_codings, right_codings) { rlang::expr(subsumes({ { left_codings } }, { { right_codings } }, TRUE)) } -#' Retrieves the canonical display name for a Coding. +#' Get the display text for codings #' -#' \code{tx_display()} takes a Coding column as its input. Returns the Column, which contains the canonical display +#' Takes a Coding column as its input. Returns a Column that contains the canonical display #' name associated with the given code. #' #' @param coding A Column containing a struct representation of a Coding. #' @param accept_language The optional language preferences for the returned display name. -#' Overrides the parameter `accept_language` in -#' `pathling_connect`. +#' Overrides the parameter `accept_language` in \code{\link{pathling_connect}}. #' #' @return A Column containing the result of the operation (String). +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#multi-language-support}{Pathling documentation - Multi-language support} #' #' @family terminology functions #' @@ -220,7 +262,7 @@ tx_subsumed_by <- function(left_codings, right_codings) { #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() #' -#' # Get the display nane of the first coding of the Condition resource code with default language +#' # Get the display name of the first coding of the Condition resource, with the default language. #' pc %>% pathling_example_resource('Condition') %>% #' sparklyr::mutate( #' id, @@ -232,15 +274,12 @@ tx_display <- function(coding, accept_language = NULL) { rlang::expr(display({ { coding } }, { { accept_language } })) } -#' Retrieves the values of properties for a Coding. +#' Get properties for codings #' -#' \code{tx_property_of()} takes a Coding column as its input. -#' Returns the Column, which contains the values of properties +#' Takes a Coding column as its input. Returns a Column that contains the values of properties #' for this coding with specified names and types. The type of the result column depends on the #' types of the properties. Primitive FHIR types are mapped to their corresponding SQL primitives. -#' Complex types are mapped to their corresponding structs. The allowed property types are: -#' code | Coding | string | integer | boolean | dateTime | decimal. -#' See also \code{\link{PropertyType}}. +#' Complex types are mapped to their corresponding structs. #' #' @param coding A Column containing a struct representation of a Coding. #' @param property_code The code of the property to retrieve. @@ -250,6 +289,9 @@ tx_display <- function(coding, accept_language = NULL) { #' #' @return The Column containing the result of the operation (array of property values). #' +#' @seealso \code{\link{PropertyType}} +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#retrieving-properties}{Pathling documentation - Retrieving properties} +#' #' @family terminology functions #' #' @export @@ -257,7 +299,7 @@ tx_display <- function(coding, accept_language = NULL) { #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() #' -#' # Get the (first) value of `inactive` property of the first coding of the Condition resource code +#' # Get the (first) value of the `inactive` property of the first coding of the Condition resource. #' pc %>% pathling_example_resource('Condition') %>% #' sparklyr::mutate(id, #' is_inavtive = (!!tx_property_of(code[['coding']][[0]], @@ -287,11 +329,11 @@ tx_property_of <- function(coding, property_code, property_type = "string", acce } } -#' Retrieves the values of designations for a Coding. +#' Get designations for codings #' -#' \code{tx_designation()} takes a Coding column as its input. Returns the Column, which contains the values of -#' designations (strings) for this coding for the specified use and language. If the language is -#' not provided (is null), then all designations with the specified type are returned regardless of +#' Takes a Coding column as its input. Returns a Column that contains the values of designations +#' (strings) for this coding that match the specified use and language. If the language is +#' not provided, then all designations with the specified type are returned regardless of #' their language. #' #' @param coding A Column containing a struct representation of a Coding. @@ -299,6 +341,8 @@ tx_property_of <- function(coding, property_code, property_type = "string", acce #' @param language The language of the designations. #' #' @return The Column containing the result of the operation (array of strings with designation values). +#' +#' @seealso \href{https://pathling.csiro.au/docs/libraries/terminology#retrieving-designations}{Pathling documentation - Retrieving designations} #' #' @family Terminology functions #' @@ -307,8 +351,8 @@ tx_property_of <- function(coding, property_code, property_type = "string", acce #' @examplesIf pathling_is_spark_installed() #' pc <- pathling_connect() #' -#' # Get the (first) value of the SNONED-CD designation code '900000000000003001' -#' # for the first coding of the Condition resource code for language 'en'. +#' # Get the (first) SNOMED CT "Fully specified name" ('900000000000003001') +#' # for the first coding of the Condition resource, in the 'en' language. #' pc %>% pathling_example_resource('Condition') %>% #' sparklyr::mutate( #' id,