From 71a7b52661d67f246e92552a7ceed92193ede90c Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 08:04:07 +0200 Subject: [PATCH 01/43] Add arguments "type", "debug" to get_stations() "type" allows to specify the output type(s) --- R/get_stations.R | 121 +++++++++++++------- tests/testthat/test-function-get_stations.R | 60 ++++++++-- 2 files changed, 130 insertions(+), 51 deletions(-) diff --git a/R/get_stations.R b/R/get_stations.R index 89644c6f..f7504590 100644 --- a/R/get_stations.R +++ b/R/get_stations.R @@ -3,6 +3,12 @@ #' @param run_parallel default: TRUE #' @param n_cores number of cores to use if \code{run_parallel = TRUE}. #' Default: one less than the detected number of cores. +#' @param debug logical indicating whether or not to show debug messages +#' @param type vector of character describing the type(s) of output(s) to be +#' returned. Expected values (and default): \code{c("list", "data.frame", +#' "crosstable")}. If only one value is given the data is returned in the +#' expected type. If more than one values are given, a list is returned with +#' one list element per type. #' @return list with general station "overview" (either as list "overview_list" #' or as data.frame "overview_df") and a crosstable with information which #' parameters is available per station ("x" if available, NA if not) @@ -18,16 +24,19 @@ #' str(stations) #' get_stations <- function( - run_parallel = TRUE, n_cores = parallel::detectCores() - 1L + run_parallel = TRUE, + n_cores = parallel::detectCores() - 1L, + debug = TRUE, + type = c("list", "data.frame", "crosstable") ) { - overview_options <- unlist(get_overview_options()) + expected_types <- c("list", "data.frame", "crosstable") - # Prepare message text for console output - messageText <- sprintf( - "Importing %d station overviews from Wasserportal Berlin", - length(overview_options) - ) + stopifnot(is.character(type)) + stopifnot(all(type %in% expected_types)) + stopifnot(!anyDuplicated(type)) + + overview_options <- unlist(get_overview_options()) # Prepare parallel processing if desired if (run_parallel) { @@ -41,44 +50,76 @@ get_stations <- function( } # Loop through overview_options, either in parallel or sequentially - overview_list <- kwb.utils::catAndRun(messageText, expr = { - if (run_parallel) { - parallel::parLapply(cl, overview_options, FUN) - } else { - lapply(overview_options, FUN) - } - }) - - overview_df <- data.table::rbindlist( - overview_list, - fill = TRUE, - idcol = "key" - ) - - metadata <- tidyr::separate( - data.frame( - key = names(overview_options), - station_type = as.vector(overview_options) + overview_list <- kwb.utils::catAndRun( + sprintf( + "Importing %d station overviews from Wasserportal Berlin", + length(overview_options) ), - .data$key, - into = c("water_body", "variable"), - sep = "\\.", - remove = FALSE + dbg = debug, + expr = { + if (run_parallel) { + parallel::parLapply(cl, overview_options, FUN) + } else { + lapply(overview_options, FUN) + } + } ) - overview_df <- dplyr::left_join(overview_df, metadata, by = "key") + # Return the list if only the list is requested + if (identical(type, "list")) { + return(overview_list) + } - crosstable <- overview_df %>% - dplyr::select("Messstellennummer", "Messstellenname", "station_type") %>% - dplyr::mutate(value = "x") %>% - tidyr::pivot_wider( - names_from = "station_type", - values_from = "value" + # Function to convert overview_options to a data frame + overview_options_to_df <- function(overview_options) { + tidyr::separate( + data.frame( + key = names(overview_options), + station_type = as.vector(overview_options) + ), + .data$key, + into = c("water_body", "variable"), + sep = "\\.", + remove = FALSE ) + } + + # Convert overview_list to a data frame and append metadata from options + overview_df <- overview_list %>% + data.table::rbindlist(fill = TRUE, idcol = "key") %>% + dplyr::left_join(overview_options_to_df(overview_options), by = "key") + + # Return the data frame if only the data frame is requested + if (identical(type, "data.frame")) { + return(overview_df) + } + + # Create crosstable if requested + crosstable <- if ("crosstable" %in% type) { + overview_df %>% + dplyr::select("Messstellennummer", "Messstellenname", "station_type") %>% + dplyr::mutate(value = "x") %>% + tidyr::pivot_wider(names_from = "station_type", values_from = "value") + } # else NULL - list( - overview_list = overview_list, - overview_df = overview_df, - crosstable = crosstable + # Return the crosstable if only the crosstable is requested + if (identical(type, "crosstable")) { + return(crosstable) + } + + # If we arrive here, there are at least two types of output requested + stopifnot(length(type) > 1L) + + # Return a list with all requested types of output + c( + if ("list" %in% type) { + list(overview_list = overview_list) + }, + if ("data.frame" %in% type) { + list(overview_df = overview_df) + }, + if (!is.null(crosstable)) { + list(crosstable = crosstable) + } ) } diff --git a/tests/testthat/test-function-get_stations.R b/tests/testthat/test-function-get_stations.R index 945d4f6a..fdf86b87 100644 --- a/tests/testthat/test-function-get_stations.R +++ b/tests/testthat/test-function-get_stations.R @@ -1,16 +1,54 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:01.710311. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_stations() works", { - expect_output(result <- wasserportal:::get_stations()) + f <- wasserportal:::get_stations + + expect_error( + f(type = 1), + regexp = "is.character\\(type\\)" + ) + + expect_error( + f(type = "unsupported-type"), + regexp = "all\\(type %in% expected_types\\)" + ) + + expect_error( + f(type = c("list", "list")), + regexp = "!anyDuplicated" + ) + + # Check output type "list" - expect_identical( - names(result), - c("overview_list", "overview_df", "crosstable") + expect_output(result_list <- f(type = "list")) + expect_type(result_list, "list") + expect_true(all(grepl("^(surface_|ground)water", names(result_list)))) + + # Check output type "data.frame" + + expected_names <- c( + "key", + "Messstellennummer", + "Betreiber", + "stammdaten_link" ) + + expect_output(result_df <- f(type = "data.frame")) + expect_true("data.frame" %in% class(result_df)) + expect_true(all(expected_names %in% names(result_df))) + + # Check output type "crosstable" + + expect_output(result_crosstable <- f(type = "crosstable")) + expect_true("data.frame" %in% class(result_crosstable)) + expect_identical(unique(na.omit(unlist(result_crosstable[, -(1:2)]))), "x") + + # Check output of all types + + expect_output(result_all <- f()) + expect_identical(result_all, list( + overview_list = result_list, + overview_df = result_df, + crosstable = result_crosstable + )) + }) From 1db01fc3d4a50e2937971b651b4a40a9bfa8b6e2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 08:08:14 +0200 Subject: [PATCH 02/43] Make "type" the main (first) argument --- DESCRIPTION | 2 +- R/get_stations.R | 12 ++++++------ man/get_stations.Rd | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 18acf69a..418b81b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,6 +70,6 @@ Remotes: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 VignetteBuilder: knitr Config/testthat/edition: 3 diff --git a/R/get_stations.R b/R/get_stations.R index f7504590..73e4e4d1 100644 --- a/R/get_stations.R +++ b/R/get_stations.R @@ -1,14 +1,14 @@ #' Get Stations #' -#' @param run_parallel default: TRUE -#' @param n_cores number of cores to use if \code{run_parallel = TRUE}. -#' Default: one less than the detected number of cores. -#' @param debug logical indicating whether or not to show debug messages #' @param type vector of character describing the type(s) of output(s) to be #' returned. Expected values (and default): \code{c("list", "data.frame", #' "crosstable")}. If only one value is given the data is returned in the #' expected type. If more than one values are given, a list is returned with #' one list element per type. +#' @param run_parallel default: TRUE +#' @param n_cores number of cores to use if \code{run_parallel = TRUE}. +#' Default: one less than the detected number of cores. +#' @param debug logical indicating whether or not to show debug messages #' @return list with general station "overview" (either as list "overview_list" #' or as data.frame "overview_df") and a crosstable with information which #' parameters is available per station ("x" if available, NA if not) @@ -24,10 +24,10 @@ #' str(stations) #' get_stations <- function( + type = c("list", "data.frame", "crosstable"), run_parallel = TRUE, n_cores = parallel::detectCores() - 1L, - debug = TRUE, - type = c("list", "data.frame", "crosstable") + debug = TRUE ) { expected_types <- c("list", "data.frame", "crosstable") diff --git a/man/get_stations.Rd b/man/get_stations.Rd index 2f7a1bd2..30b65e4b 100644 --- a/man/get_stations.Rd +++ b/man/get_stations.Rd @@ -4,13 +4,26 @@ \alias{get_stations} \title{Get Stations} \usage{ -get_stations(run_parallel = TRUE, n_cores = parallel::detectCores() - 1L) +get_stations( + type = c("list", "data.frame", "crosstable"), + run_parallel = TRUE, + n_cores = parallel::detectCores() - 1L, + debug = TRUE +) } \arguments{ +\item{type}{vector of character describing the type(s) of output(s) to be +returned. Expected values (and default): \code{c("list", "data.frame", + "crosstable")}. If only one value is given the data is returned in the +expected type. If more than one values are given, a list is returned with +one list element per type.} + \item{run_parallel}{default: TRUE} \item{n_cores}{number of cores to use if \code{run_parallel = TRUE}. Default: one less than the detected number of cores.} + +\item{debug}{logical indicating whether or not to show debug messages} } \value{ list with general station "overview" (either as list "overview_list" From 0809baa398209587d78e535196177872cd056b97 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 08:26:29 +0200 Subject: [PATCH 03/43] Add test to check column names! Hopefully this helps decreasing the time required to fix a broken workflow due to changes on the Wasserportal platform --- ...t-function-get_wasserportal_masters_data.R | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tests/testthat/test-function-get_wasserportal_masters_data.R b/tests/testthat/test-function-get_wasserportal_masters_data.R index d8e292f5..a48a097d 100644 --- a/tests/testthat/test-function-get_wasserportal_masters_data.R +++ b/tests/testthat/test-function-get_wasserportal_masters_data.R @@ -1,16 +1,35 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:07.960625. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("get_wasserportal_masters_data() works", { - expect_error( - wasserportal:::get_wasserportal_masters_data() - # argument "master_urls" is missing, with no default - ) + f <- wasserportal:::get_wasserportal_masters_data -}) + expect_error(f()) + + # Ask for a non-existing URL + expect_message(capture.output(result <- f("no-such-url")), "Failed") + expect_identical(dim(result), c(0L, 0L)) + + # Find URLs for testing + # urls <- wasserportal::get_stations("list") %>% + # kwb.utils::selectElements("surface_water.water_level") %>% + # dplyr::filter(Betreiber == "Land Berlin") %>% + # dplyr::pull(stammdaten_link) + + url <- "https://wasserportal.berlin.de/station.php?anzeige=i&thema=ows&station=5866301" + expect_output(result <- f(url), "Importing 1 station metadata") + + expect_identical(names(result), c( + "Nummer", + "Name", + "Gewaesser", + "Betreiber", + "Auspraegung", + "Flusskilometer", + "Pegelnullpunkt_m_NHN", + "Rechtswert_UTM_33_N", + "Hochwert_UTM_33_N" + )) + +}) From c58db1692b9d02affeac0643a19a71a10e3ca782 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 08:47:56 +0200 Subject: [PATCH 04/43] Add tests for three functions --- .../test-function-add_wasserportal_metadata.R | 18 ++++++--------- tests/testthat/test-function-as_date_de.R | 22 ++++++++++--------- tests/testthat/test-function-assert_date.R | 18 +++++---------- 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/tests/testthat/test-function-add_wasserportal_metadata.R b/tests/testthat/test-function-add_wasserportal_metadata.R index e7528965..0a992a6c 100644 --- a/tests/testthat/test-function-add_wasserportal_metadata.R +++ b/tests/testthat/test-function-add_wasserportal_metadata.R @@ -1,16 +1,12 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:33.492441. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("add_wasserportal_metadata() works", { - expect_error( - wasserportal:::add_wasserportal_metadata() - # argument "x" is missing, with no default + f <- wasserportal:::add_wasserportal_metadata + + expect_error(f()) + + expect_identical( + f("anything", c("one", "two", "three")), + structure("anything", metadata = "three") ) }) - diff --git a/tests/testthat/test-function-as_date_de.R b/tests/testthat/test-function-as_date_de.R index a79515f6..ead07607 100644 --- a/tests/testthat/test-function-as_date_de.R +++ b/tests/testthat/test-function-as_date_de.R @@ -1,15 +1,17 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:44.744923. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("as_date_de() works", { - expect_error( - wasserportal:::as_date_de() - # argument "x" is missing, with no default + f <- wasserportal:::as_date_de + + expect_error(f()) + + expect_identical( + f("31.12.2023"), + as.Date("2023-12-31") + ) + + expect_identical( + f(c("30.12.2023", "31.12.2023")), + as.Date(c("2023-12-30", "2023-12-31")) ) }) diff --git a/tests/testthat/test-function-assert_date.R b/tests/testthat/test-function-assert_date.R index 36548b3f..a3167d09 100644 --- a/tests/testthat/test-function-assert_date.R +++ b/tests/testthat/test-function-assert_date.R @@ -1,16 +1,10 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:44.744923. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("assert_date() works", { - expect_error( - wasserportal:::assert_date() - # argument "x" is missing, with no default - ) + f <- wasserportal:::assert_date -}) + expect_error(f()) + expect_identical(f(1), as.Date(1)) + + expect_error(f("a")) +}) From e67b87e2929652f53bf346e02b192a13e683d3ca Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 08:49:19 +0200 Subject: [PATCH 05/43] Improve/fix error message of assert_date() and return early --- R/utils.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/utils.R b/R/utils.R index d3644e39..8bf3e233 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,18 +8,21 @@ as_date_de <- function(x) #' @importFrom kwb.utils isTryError assert_date <- function(x) { - if (! inherits(x, "Date")) { + if (inherits(x, "Date")) { + return(x) + } - x <- try(as.Date(x)) + result <- try(as.Date(x), silent = TRUE) - if (kwb.utils::isTryError(x)) { - stop(call. = FALSE, sprintf( - "%s cannot be converted to a Date object!", deparse(substitute(x)) - )) - } + if (kwb.utils::isTryError(result)) { + stop(call. = FALSE, sprintf( + "%s cannot be converted to a Date object: %s", + deparse(substitute(x)), + as.character(result) + )) } - x + result } # columns_to_labels ------------------------------------------------------------ From 563f3267dff0d78a02226217844ebd5a9c722d81 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 09:24:32 +0200 Subject: [PATCH 06/43] Add test for clean_timestamp_columns() --- .../test-function-clean_timestamp_columns.R | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test-function-clean_timestamp_columns.R b/tests/testthat/test-function-clean_timestamp_columns.R index 8fc2e3fc..eccd176d 100644 --- a/tests/testthat/test-function-clean_timestamp_columns.R +++ b/tests/testthat/test-function-clean_timestamp_columns.R @@ -1,16 +1,19 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:33.492441. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("clean_timestamp_columns() works", { - expect_error( - wasserportal:::clean_timestamp_columns() - # argument "data" is missing, with no default - ) + f <- wasserportal:::clean_timestamp_columns -}) + expect_error(f()) + expect_error(f(data.frame(no_such_column = 1))) + expect_error(f(data.frame(Datum = 1))) + + data <- data.frame(Datum = "24.09.2023 12:00") + result <- f(data, include_raw_time = FALSE) + + expect_identical(result, data.frame( + LocalDateTime = data$Datum %>% + as.POSIXct(format = "%d.%m.%Y %H:%M", tz = "Etc/GMT-1") %>% + structure(tzone = "Europe/Berlin") + )) + +}) From 16f83e84695dbbb04bcb05aec5586715e703fc5f Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 09:56:31 +0200 Subject: [PATCH 07/43] Clean test script --- R/.test-surface-water_download.R | 106 ++++++++++++++++--------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/R/.test-surface-water_download.R b/R/.test-surface-water_download.R index 0df84800..35a30146 100644 --- a/R/.test-surface-water_download.R +++ b/R/.test-surface-water_download.R @@ -1,51 +1,57 @@ -if (FALSE) { -library(wasserportal) - -stations <- wasserportal::get_stations() -stations_crosstable <- stations$crosstable - -stations_crosstable_bb <- stations_crosstable %>% - dplyr::filter(stringr::str_detect(.data$Messstellennummer, - pattern = "^[A-Z]{2}_")) - -stations_crosstable_berlin <- stations_crosstable %>% - dplyr::filter(stringr::str_detect(.data$Messstellennummer, - pattern = "^[A-Z]{2}_", - negate = TRUE)) - - - -station_crosstable_berlin <- stations_crosstable_berlin[1,] -stations_crosstable_berlin -from_date <- "1900-01-01" -sw_station_berlin_daily <- wasserportal::read_wasserportal_raw( - station = station_crosstable_berlin$Messstellennummer, - variable = get_station_variables(stations_crosstable_berlin)[1], - type = "daily", - from_date = from_date, - include_raw_time = TRUE, - stations_crosstable = stations_crosstable -) - -str(sw_station_berlin_daily) - - - -sw_stations_berlin_daily <- stats::setNames(lapply(stations_crosstable_berlin$Messstellennummer, - function(station) { - msg <- sprintf("Fetching data for station '%s'", station) - kwb.utils::catAndRun(msg, expr = { - wasserportal::read_wasserportal( - station = station, - type = "daily", - from_date = from_date, - include_raw_time = TRUE, - stations_crosstable = stations_crosstable -)})} -), nm = stations_crosstable$Messstellennummer) - -str(sw_stations_daily) - - +if (FALSE) +{ + `%>%` <- magrittr::`%>%` + + stations <- wasserportal::get_stations() + + stations_crosstable <- kwb.utils::selectElements(stations, "crosstable") + + stations_crosstable_bb <- stations_crosstable %>% + dplyr::filter(stringr::str_detect( + .data$Messstellennummer, + pattern = "^[A-Z]{2}_" + )) + + stations_crosstable_berlin <- stations_crosstable %>% + dplyr::filter(stringr::str_detect( + .data$Messstellennummer, + pattern = "^[A-Z]{2}_", + negate = TRUE + )) + + station_crosstable_berlin <- stations_crosstable_berlin[1L, ] + + stations_crosstable_berlin + + from_date <- "1900-01-01" + + sw_station_berlin_daily <- wasserportal::read_wasserportal_raw( + station = station_crosstable_berlin %>% + kwb.utils::selectColumns("Messstellennummer"), + variable = wasserportal::get_station_variables(stations_crosstable_berlin)[1], + type = "daily", + from_date = from_date, + include_raw_time = TRUE, + stations_crosstable = stations_crosstable + ) + + str(sw_station_berlin_daily) + + sw_stations_berlin_daily <- stations_crosstable_berlin %>% + kwb.utils::selectColumns("Messstellennummer") %>% + lapply(function(station) kwb.utils::catAndRun( + sprintf("Fetching data for station '%s'", station), + expr = wasserportal::read_wasserportal( + station = station, + type = "daily", + from_date = from_date, + include_raw_time = TRUE, + stations_crosstable = stations_crosstable + ) + )) %>% + stats::setNames( + kwb.utils::selectColumns(stations_crosstable, "Messstellennummer") + ) + + str(sw_stations_daily) } - From d304fc405350e72ce738b2d0d6973c3dfc5437c8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 09:59:46 +0200 Subject: [PATCH 08/43] Use new argument "type", do not use variable "station_crosstable_berlin" --- R/.test-surface-water_download.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/.test-surface-water_download.R b/R/.test-surface-water_download.R index 35a30146..816e4544 100644 --- a/R/.test-surface-water_download.R +++ b/R/.test-surface-water_download.R @@ -2,9 +2,7 @@ if (FALSE) { `%>%` <- magrittr::`%>%` - stations <- wasserportal::get_stations() - - stations_crosstable <- kwb.utils::selectElements(stations, "crosstable") + stations_crosstable <- wasserportal::get_stations(type = "crosstable") stations_crosstable_bb <- stations_crosstable %>% dplyr::filter(stringr::str_detect( @@ -19,14 +17,12 @@ if (FALSE) negate = TRUE )) - station_crosstable_berlin <- stations_crosstable_berlin[1L, ] - stations_crosstable_berlin from_date <- "1900-01-01" sw_station_berlin_daily <- wasserportal::read_wasserportal_raw( - station = station_crosstable_berlin %>% + station = stations_crosstable_berlin[1L, ] %>% kwb.utils::selectColumns("Messstellennummer"), variable = wasserportal::get_station_variables(stations_crosstable_berlin)[1], type = "daily", From a0972122b9d9d57f94dbb6ccea4e440708760f27 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 10:53:10 +0200 Subject: [PATCH 09/43] Add arg "stations_list" to get_groundwater_data() --- R/get_groundwater_data.R | 17 ++++++++++++----- man/get_groundwater_data.Rd | 9 +++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/R/get_groundwater_data.R b/R/get_groundwater_data.R index 474d6d0e..09573292 100644 --- a/R/get_groundwater_data.R +++ b/R/get_groundwater_data.R @@ -2,10 +2,12 @@ #' #' @description wrapper function to scrape all available raw data, i.e. groundwater #' level and quality data and save in list -#' @param stations stations list as retrieved by \code{\link{get_stations}} +#' @param stations list as retrieved by \code{\link{get_stations}}. +#' Deprecated. Please use \code{stations_list} instead #' @param groundwater_options as retrieved by \code{\link{get_groundwater_options}} #' @param debug print debug messages (default: TRUE) -#' +#' @param stations_list list of station metadata as returned by +#' \code{\link{get_stations}(type = "list")} #' @return list with elements "groundwater.level" and "groundwater.quality" data #' frames #' @export @@ -21,10 +23,16 @@ get_groundwater_data <- function( stations, groundwater_options = get_groundwater_options(), - debug = TRUE + debug = TRUE, + stations_list = NULL ) { #kwb.utils::assignPackageObjects("wasserportal") + + if (is.null(stations_list)) { + stations_list <- kwb.utils::selectElements(stations, "overview_list") + } + result <- lapply( X = seq_along(groundwater_options), FUN = function(i) { @@ -37,8 +45,7 @@ get_groundwater_data <- function( ), dbg = debug, expr = { - ids <- stations %>% - kwb.utils::selectElements("overview_list") %>% + ids <- stations_list %>% kwb.utils::selectElements(option_name) %>% kwb.utils::selectColumns("Messstellennummer") lapply( diff --git a/man/get_groundwater_data.Rd b/man/get_groundwater_data.Rd index 29565eae..524d65a4 100644 --- a/man/get_groundwater_data.Rd +++ b/man/get_groundwater_data.Rd @@ -7,15 +7,20 @@ get_groundwater_data( stations, groundwater_options = get_groundwater_options(), - debug = TRUE + debug = TRUE, + stations_list = NULL ) } \arguments{ -\item{stations}{stations list as retrieved by \code{\link{get_stations}}} +\item{stations}{list as retrieved by \code{\link{get_stations}}. +Deprecated. Please use \code{stations_list} instead} \item{groundwater_options}{as retrieved by \code{\link{get_groundwater_options}}} \item{debug}{print debug messages (default: TRUE)} + +\item{stations_list}{list of station metadata as returned by +\code{\link{get_stations}(type = "list")}} } \value{ list with elements "groundwater.level" and "groundwater.quality" data From 1de92a30810a101ba8d6f3b03fdd321e4021bd18 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 24 Sep 2023 10:55:10 +0200 Subject: [PATCH 10/43] Use get_stations() with arg "type" if possible --- R/get_wasserportal_masters_data.R | 41 ++++++++++++++++------------ R/list_masters_data_to_csv.R | 6 ++-- R/list_timeseries_data_to_zip.R | 2 ++ R/read_wasserportal.R | 12 ++++---- R/read_wasserportal_raw.R | 4 +-- R/wp_masters_data_to_list.R | 7 ++--- R/wp_timeseries_data_to_list.R | 7 ++--- man/get_wasserportal_master_data.Rd | 24 ++++++++++------ man/get_wasserportal_masters_data.Rd | 17 ++++++------ man/list_masters_data_to_csv.Rd | 6 ++-- man/list_timeseries_data_to_zip.Rd | 2 ++ man/read_wasserportal.Rd | 12 ++++---- man/read_wasserportal_raw.Rd | 4 +-- man/wp_masters_data_to_list.Rd | 7 ++--- man/wp_timeseries_data_to_list.Rd | 7 ++--- vignettes/groundwater.Rmd | 21 ++++++++------ vignettes/surface-water.Rmd | 7 +++-- vignettes/tutorial.Rmd | 29 ++++++++++++-------- 18 files changed, 119 insertions(+), 96 deletions(-) diff --git a/R/get_wasserportal_masters_data.R b/R/get_wasserportal_masters_data.R index 7033a210..5db9501b 100644 --- a/R/get_wasserportal_masters_data.R +++ b/R/get_wasserportal_masters_data.R @@ -1,8 +1,8 @@ #' Wasserportal Berlin: get master data for a multiple stations #' -#' @param master_urls urls with master data as retrieved by -#' \code{\link{get_stations}} and one of "overview_list" sublist elements -#' column name "stammdaten_link" +#' @param master_urls URLs to master data as found in column "stammdaten_link" +#' of the data frame returned by +#' \code{\link{get_stations}}\code{(type = "list")} #' @param run_parallel default: TRUE #' #' @return data frame with metadata for selected master urls @@ -11,11 +11,12 @@ #' @importFrom data.table rbindlist #' @examples #' \dontrun{ -#' stations <- wasserportal::get_stations() -#' ### Reduce to monitoring stations maintained by Berlin -#' master_urls <- stations$overview_list$surface_water.water_level %>% -#' dplyr::filter(.data$Betreiber == "Land Berlin") %>% -#' dplyr::pull(.data$stammdaten_link) +#' stations_list <- wasserportal::get_stations(type = "list") +#' +#' # Reduce to monitoring stations maintained by Berlin +#' master_urls <- stations_list$surface_water.water_level %>% +#' dplyr::filter(.data$Betreiber == "Land Berlin") %>% +#' dplyr::pull(.data$stammdaten_link) #' #' system.time(master_parallel <- get_wasserportal_masters_data( #' master_urls @@ -77,18 +78,24 @@ get_wasserportal_masters_data <- function( #' @export #' @examples #' \dontrun{ -#' stations <- wasserportal::get_stations() +#' stations_list <- wasserportal::get_stations(type = "list") +#' +#' # GW Station +#' master_url <- stations_list %>% +#' kwb.utils::selectElements("groundwater.level") %>% +#' kwb.utils::selectColumns("stammdaten_link")[1L] #' -#' ## GW Station -#' master_url <- stations$overview_list$groundwater.level$stammdaten_link[1] #' get_wasserportal_master_data(master_url) #' -#' ## SW Station -#' ### Reduce to monitoring stations maintained by Berlin -#' master_urls <- stations$overview_list$surface_water.water_level %>% -#' dplyr::filter(.data$Betreiber == "Land Berlin") %>% -#' dplyr::pull(.data$stammdaten_link) -#' get_wasserportal_master_data(master_urls[1]) +#' # SW Station +#' +#' # Reduce to monitoring stations maintained by Berlin +#' master_urls <- stations_list %>% +#' kwb.utils::selectElements("surface_water.water_level") %>% +#' dplyr::filter(.data$Betreiber == "Land Berlin") %>% +#' dplyr::pull(.data$stammdaten_link) +#' +#' get_wasserportal_master_data(master_urls[1L]) #' } #' get_wasserportal_master_data <- function(master_url) diff --git a/R/list_masters_data_to_csv.R b/R/list_masters_data_to_csv.R index 962d71b8..f221426c 100644 --- a/R/list_masters_data_to_csv.R +++ b/R/list_masters_data_to_csv.R @@ -1,15 +1,15 @@ #' Helper function: list masters data to csv #' #' @param masters_data_list masters data in list form as retrieved by -#' \code{\link{get_stations}} sublist element "overview_list" +#' \code{\link{get_stations}}\code{(type = "list")} #' @return loops through list of data frames and uses list names as filenames #' @export #' @importFrom readr write_csv #' @importFrom stringr str_replace #' @examples #' \dontrun{ -#' stations <- wasserportal::get_stations() -#' masters_data_csv_files <- wasserportal:list_masters_data_to_csv(stations$overview_list) +#' stations_list <- get_stations(type = "list") +#' masters_data_csv_files <- list_masters_data_to_csv(stations_list) #' masters_data_csv_files #' } list_masters_data_to_csv <- function(masters_data_list) diff --git a/R/list_timeseries_data_to_zip.R b/R/list_timeseries_data_to_zip.R index 79bcfef1..52267375 100644 --- a/R/list_timeseries_data_to_zip.R +++ b/R/list_timeseries_data_to_zip.R @@ -9,9 +9,11 @@ #' @examples #' \dontrun{ #' stations <- wasserportal::get_stations() +#' #' # Groundwater Time Series #' gw_tsdata_list <- wasserportal::get_groundwater_data(stations) #' gw_tsdata_files <- wasserportal::list_timeseries_data_to_zip(gw_tsdata_list) +#' #' # Surface Water Time Series #' sw_tsdata_list <- wasserportal::get_daily_surfacewater_data(stations) #' sw_tsdata_files <- wasserportal::list_timeseries_data_to_zip(sw_tsdata_list) diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index 75ffa05a..7d0eb398 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -12,7 +12,8 @@ #' together with the additional information on the UTC offset (column #' \code{UTCOffset}, 1 in winter, 2 in summer). #' -#' @param station station number, as returned by \code{\link{get_stations}} +#' @param station station number, as found in column "Messstellennummer" of the +#' data frame returned by \code{\link{get_stations}(type = "crosstable")} #' @param variables vector of variable identifiers, as returned by #' \code{\link{get_station_variables}} #' @param from_date \code{Date} object (or string in format "yyyy-mm-dd" that @@ -22,8 +23,8 @@ #' @param include_raw_time if \code{TRUE} the original time column and the #' column with the corrected winter time are included in the output. The #' default is \code{FALSE}. -#' @param stations_crosstable sublist `crosstable` as retrieved from -#' \code{\link{get_stations}} i.e. `get_stations()$crosstable` +#' @param stations_crosstable data frame as returned by +#' \code{\link{get_stations}(type = "crosstable")} #' @return data frame read from the CSV file that the download provides. #' IMPORTANT: It is not yet clear how to interpret the timestamp, see example #' @importFrom httr POST content @@ -32,15 +33,14 @@ #' @examples #' \dontrun{ #' # Get a list of available water quality stations and variables -#' stations <- wasserportal::get_stations() -#' stations_crosstable <- stations$crosstable +#' stations_crosstable <- wasserportal::get_stations(type = "crosstable") #' #' # Set the start date #' from_date <- "2021-03-01" #' #' # Read the timeseries (multiple variables for one station) #' water_quality <- wasserportal::read_wasserportal( -#' station = stations_crosstable$Messstellennummer[1], +#' station = stations_crosstable$Messstellennummer[1L], #' from_date = from_date, #' include_raw_time = TRUE, #' stations_crosstable = stations_crosstable diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 0dfe56c1..c6d79404 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -8,8 +8,8 @@ #' @param type one of "single", "daily", "monthly" (default: "single") #' @param include_raw_time TRUE or FALSE (default: FALSE) #' @param handle handle (default: NULL) -#' @param stations_crosstable sublist `crosstable` as retrieved from \code{\link{get_stations}} -#' i.e. `get_stations()$crosstable` +#' @param stations_crosstable data frame as returned by +#' \code{\link{get_stations}(type = "crosstable")} #' @param api_version 1 integer number representing the version of #' wasserportal's API. 1L: before 2023, 2L: since 2023. Default: 2L #' @return ???? diff --git a/R/wp_masters_data_to_list.R b/R/wp_masters_data_to_list.R index 4d7c888c..ad79833b 100644 --- a/R/wp_masters_data_to_list.R +++ b/R/wp_masters_data_to_list.R @@ -1,7 +1,7 @@ #' Wasserportal Master Data: download and Import in R List #' -#' @param overview_list_names names of "overview_list" as retrieved by -#' \code{\link{get_stations}} +#' @param overview_list_names names of elements in the list returned by +#' \code{\link{get_stations}(type = "list")} #' @param target_dir target directory for downloading data (default: #' tempdir()) #' @param file_prefix prefix given to file names @@ -18,8 +18,7 @@ #' @importFrom withr with_dir #' @examples #' \dontrun{ -#' stations <- wasserportal::get_stations() -#' overview_list_names <- names(stations$overview_list) +#' overview_list_names <- names(wasserportal::get_stations(type = "list")) #' wp_masters_data_list <- wp_masters_data_to_list(overview_list_names) #' } wp_masters_data_to_list <- function( diff --git a/R/wp_timeseries_data_to_list.R b/R/wp_timeseries_data_to_list.R index f30363c3..31ffda8b 100644 --- a/R/wp_timeseries_data_to_list.R +++ b/R/wp_timeseries_data_to_list.R @@ -1,7 +1,7 @@ #' Wasserportal Time Series Data: download and Import in R List #' -#' @param overview_list_names names of "overview_list" as retrieved by -#' \code{\link{get_stations}} +#' @param overview_list_names names of elements in the list returned by +#' \code{\link{get_stations}(type = "list")} #' @param target_dir target directory for downloading data (default: #' tempdir()) #' @param is_zipped are the data to be downloaded zipped (default: @@ -17,8 +17,7 @@ #' @importFrom withr with_dir #' @examples #' \dontrun{ -#' stations <- wasserportal::get_stations() -#' overview_list_names <- names(stations$overview_list) +#' overview_list_names <- names(wasserportal::get_stations(type = "list")) #' wp_timeseries_data_list <- wp_timeseries_data_to_list(overview_list_names) #' } wp_timeseries_data_to_list <- function( diff --git a/man/get_wasserportal_master_data.Rd b/man/get_wasserportal_master_data.Rd index 5037b2c4..e321020c 100644 --- a/man/get_wasserportal_master_data.Rd +++ b/man/get_wasserportal_master_data.Rd @@ -18,18 +18,24 @@ Wasserportal Berlin: get master data for a single station } \examples{ \dontrun{ -stations <- wasserportal::get_stations() +stations_list <- wasserportal::get_stations(type = "list") + +# GW Station +master_url <- stations_list \%>\% + kwb.utils::selectElements("groundwater.level") \%>\% + kwb.utils::selectColumns("stammdaten_link")[1L] -## GW Station -master_url <- stations$overview_list$groundwater.level$stammdaten_link[1] get_wasserportal_master_data(master_url) -## SW Station -### Reduce to monitoring stations maintained by Berlin -master_urls <- stations$overview_list$surface_water.water_level \%>\% -dplyr::filter(.data$Betreiber == "Land Berlin") \%>\% -dplyr::pull(.data$stammdaten_link) -get_wasserportal_master_data(master_urls[1]) +# SW Station + +# Reduce to monitoring stations maintained by Berlin +master_urls <- stations_list \%>\% + kwb.utils::selectElements("surface_water.water_level") \%>\% + dplyr::filter(.data$Betreiber == "Land Berlin") \%>\% + dplyr::pull(.data$stammdaten_link) + +get_wasserportal_master_data(master_urls[1L]) } } diff --git a/man/get_wasserportal_masters_data.Rd b/man/get_wasserportal_masters_data.Rd index 23b4d418..f1763ac5 100644 --- a/man/get_wasserportal_masters_data.Rd +++ b/man/get_wasserportal_masters_data.Rd @@ -7,9 +7,9 @@ get_wasserportal_masters_data(master_urls, run_parallel = TRUE) } \arguments{ -\item{master_urls}{urls with master data as retrieved by -\code{\link{get_stations}} and one of "overview_list" sublist elements -column name "stammdaten_link"} +\item{master_urls}{URLs to master data as found in column "stammdaten_link" +of the data frame returned by +\code{\link{get_stations}}\code{(type = "list")}} \item{run_parallel}{default: TRUE} } @@ -21,11 +21,12 @@ Wasserportal Berlin: get master data for a multiple stations } \examples{ \dontrun{ -stations <- wasserportal::get_stations() -### Reduce to monitoring stations maintained by Berlin -master_urls <- stations$overview_list$surface_water.water_level \%>\% -dplyr::filter(.data$Betreiber == "Land Berlin") \%>\% -dplyr::pull(.data$stammdaten_link) +stations_list <- wasserportal::get_stations(type = "list") + +# Reduce to monitoring stations maintained by Berlin +master_urls <- stations_list$surface_water.water_level \%>\% + dplyr::filter(.data$Betreiber == "Land Berlin") \%>\% + dplyr::pull(.data$stammdaten_link) system.time(master_parallel <- get_wasserportal_masters_data( master_urls diff --git a/man/list_masters_data_to_csv.Rd b/man/list_masters_data_to_csv.Rd index ed841928..a3416ddb 100644 --- a/man/list_masters_data_to_csv.Rd +++ b/man/list_masters_data_to_csv.Rd @@ -8,7 +8,7 @@ list_masters_data_to_csv(masters_data_list) } \arguments{ \item{masters_data_list}{masters data in list form as retrieved by -\code{\link{get_stations}} sublist element "overview_list"} +\code{\link{get_stations}}\code{(type = "list")}} } \value{ loops through list of data frames and uses list names as filenames @@ -18,8 +18,8 @@ Helper function: list masters data to csv } \examples{ \dontrun{ -stations <- wasserportal::get_stations() -masters_data_csv_files <- wasserportal:list_masters_data_to_csv(stations$overview_list) +stations_list <- get_stations(type = "list") +masters_data_csv_files <- list_masters_data_to_csv(stations_list) masters_data_csv_files } } diff --git a/man/list_timeseries_data_to_zip.Rd b/man/list_timeseries_data_to_zip.Rd index 7ef81c6e..65238923 100644 --- a/man/list_timeseries_data_to_zip.Rd +++ b/man/list_timeseries_data_to_zip.Rd @@ -19,9 +19,11 @@ Helper function: list timeseries data to zip \examples{ \dontrun{ stations <- wasserportal::get_stations() + # Groundwater Time Series gw_tsdata_list <- wasserportal::get_groundwater_data(stations) gw_tsdata_files <- wasserportal::list_timeseries_data_to_zip(gw_tsdata_list) + # Surface Water Time Series sw_tsdata_list <- wasserportal::get_daily_surfacewater_data(stations) sw_tsdata_files <- wasserportal::list_timeseries_data_to_zip(sw_tsdata_list) diff --git a/man/read_wasserportal.Rd b/man/read_wasserportal.Rd index da707033..689264bb 100644 --- a/man/read_wasserportal.Rd +++ b/man/read_wasserportal.Rd @@ -14,7 +14,8 @@ read_wasserportal( ) } \arguments{ -\item{station}{station number, as returned by \code{\link{get_stations}}} +\item{station}{station number, as found in column "Messstellennummer" of the +data frame returned by \code{\link{get_stations}(type = "crosstable")}} \item{variables}{vector of variable identifiers, as returned by \code{\link{get_station_variables}}} @@ -29,8 +30,8 @@ which to request data. Default: \code{as.character(Sys.Date() - 90L)}} column with the corrected winter time are included in the output. The default is \code{FALSE}.} -\item{stations_crosstable}{sublist \code{crosstable} as retrieved from -\code{\link{get_stations}} i.e. \code{get_stations()$crosstable}} +\item{stations_crosstable}{data frame as returned by +\code{\link{get_stations}(type = "crosstable")}} } \value{ data frame read from the CSV file that the download provides. @@ -51,15 +52,14 @@ together with the additional information on the UTC offset (column \examples{ \dontrun{ # Get a list of available water quality stations and variables -stations <- wasserportal::get_stations() -stations_crosstable <- stations$crosstable +stations_crosstable <- wasserportal::get_stations(type = "crosstable") # Set the start date from_date <- "2021-03-01" # Read the timeseries (multiple variables for one station) water_quality <- wasserportal::read_wasserportal( - station = stations_crosstable$Messstellennummer[1], + station = stations_crosstable$Messstellennummer[1L], from_date = from_date, include_raw_time = TRUE, stations_crosstable = stations_crosstable diff --git a/man/read_wasserportal_raw.Rd b/man/read_wasserportal_raw.Rd index 9d752392..11d7d212 100644 --- a/man/read_wasserportal_raw.Rd +++ b/man/read_wasserportal_raw.Rd @@ -28,8 +28,8 @@ read_wasserportal_raw( \item{handle}{handle (default: NULL)} -\item{stations_crosstable}{sublist \code{crosstable} as retrieved from \code{\link{get_stations}} -i.e. \code{get_stations()$crosstable}} +\item{stations_crosstable}{data frame as returned by +\code{\link{get_stations}(type = "crosstable")}} \item{api_version}{1 integer number representing the version of wasserportal's API. 1L: before 2023, 2L: since 2023. Default: 2L} diff --git a/man/wp_masters_data_to_list.Rd b/man/wp_masters_data_to_list.Rd index fd5027a8..e5b7c809 100644 --- a/man/wp_masters_data_to_list.Rd +++ b/man/wp_masters_data_to_list.Rd @@ -12,8 +12,8 @@ wp_masters_data_to_list( ) } \arguments{ -\item{overview_list_names}{names of "overview_list" as retrieved by -\code{\link{get_stations}}} +\item{overview_list_names}{names of elements in the list returned by +\code{\link{get_stations}(type = "list")}} \item{target_dir}{target directory for downloading data (default: tempdir())} @@ -31,8 +31,7 @@ Wasserportal Master Data: download and Import in R List } \examples{ \dontrun{ -stations <- wasserportal::get_stations() -overview_list_names <- names(stations$overview_list) +overview_list_names <- names(wasserportal::get_stations(type = "list")) wp_masters_data_list <- wp_masters_data_to_list(overview_list_names) } } diff --git a/man/wp_timeseries_data_to_list.Rd b/man/wp_timeseries_data_to_list.Rd index 6c748b15..740d7d47 100644 --- a/man/wp_timeseries_data_to_list.Rd +++ b/man/wp_timeseries_data_to_list.Rd @@ -11,8 +11,8 @@ wp_timeseries_data_to_list( ) } \arguments{ -\item{overview_list_names}{names of "overview_list" as retrieved by -\code{\link{get_stations}}} +\item{overview_list_names}{names of elements in the list returned by +\code{\link{get_stations}(type = "list")}} \item{target_dir}{target directory for downloading data (default: tempdir())} @@ -28,8 +28,7 @@ Wasserportal Time Series Data: download and Import in R List } \examples{ \dontrun{ -stations <- wasserportal::get_stations() -overview_list_names <- names(stations$overview_list) +overview_list_names <- names(wasserportal::get_stations(type = "list")) wp_timeseries_data_list <- wp_timeseries_data_to_list(overview_list_names) } } diff --git a/vignettes/groundwater.Rmd b/vignettes/groundwater.Rmd index 134aacf4..d82918d5 100644 --- a/vignettes/groundwater.Rmd +++ b/vignettes/groundwater.Rmd @@ -50,11 +50,11 @@ cat_file_enumeration <- function(base_url, files) { ## Master Data ```{r master_data} -stations <- wasserportal::get_stations() +stations_list <- wasserportal::get_stations(type = "list") -is_gw <- stringr::str_detect(names(stations$overview_list), "groundwater") +is_gw <- stringr::str_detect(names(stations_list), "groundwater") -files <- wasserportal::list_masters_data_to_csv(stations$overview_list[is_gw]) +files <- wasserportal::list_masters_data_to_csv(stations_list[is_gw]) ``` The following groundwater master data `.csv` files are available for download: @@ -67,14 +67,17 @@ cat_file_enumeration(urls$gh_wasserportal, files) ```{r groundwater_data_raw_export} if (use_random_subset_of_stations) { - stations_bak <- stations - x <- stations$overview_list$groundwater.level[sample(876, 10), ] - stations$overview_list$groundwater.level <- x - x <- stations$overview_list$groundwater.quality[sample(208, 10), ] - stations$overview_list$groundwater.quality <- x + stations_list_bak <- stations_list + x <- stations_list$groundwater.level[sample(876, 10), ] + stations_list$groundwater.level <- x + x <- stations_list$groundwater.quality[sample(208, 10), ] + stations_list$groundwater.quality <- x } -gw_data_list <- wasserportal::get_groundwater_data(stations, debug = TRUE) +gw_data_list <- wasserportal::get_groundwater_data( + stations_list = stations_list, + debug = TRUE +) files <- wasserportal::list_timeseries_data_to_zip(gw_data_list) diff --git a/vignettes/surface-water.Rmd b/vignettes/surface-water.Rmd index 7d9ed22a..962592a5 100644 --- a/vignettes/surface-water.Rmd +++ b/vignettes/surface-water.Rmd @@ -37,10 +37,11 @@ cat_file_enumeration <- function(files) { library(wasserportal) stations <- wasserportal::get_stations() +stations_list <- kwb.utils::selectElements(stations, "overview_list") -is_sw <- stringr::str_detect(names(stations$overview_list), "surface") +is_sw <- stringr::str_detect(names(stations_list), "surface") -files <- wasserportal::list_masters_data_to_csv(stations$overview_list[is_sw]) +files <- wasserportal::list_masters_data_to_csv(stations_list[is_sw]) ``` The following surface water master data `.csv` files are available for download: @@ -85,7 +86,7 @@ cat_file_enumeration(files) ```{r surface_waterlevel} swl_master <- wasserportal::get_wasserportal_masters_data( - master_urls = stations$overview_list$surface_water.water_level %>% + master_urls = stations_list$surface_water.water_level %>% dplyr::filter(.data$Betreiber == "Land Berlin") %>% dplyr::pull(.data$stammdaten_link) ) diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 112d1600..1bd59fb5 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -74,16 +74,21 @@ overview_options <- wasserportal::get_overview_options() str(overview_options) -system.time(stations <- wasserportal::get_stations()) +system.time( + stations <- wasserportal::get_stations(type = c("list", "crosstable")) +) str(stations) -write_pretty_json(stations$crosstable, "stations_crosstable.json") +stations_list <- kwb.utils::selectElements(stations, "list") +stations_crosstable <- kwb.utils::selectElements(stations, "crosstable") + +write_pretty_json(stations_crosstable, "stations_crosstable.json") ``` ```{r stations_crosstable} top_filter_datatable( - stations$crosstable, + stations_crosstable, "Data availabilty per monitoring station" ) ``` @@ -106,14 +111,14 @@ also_available( Overview data of GW level stations can be requested as shown below: ```{r stations_gwl_table_overview} -top_filter_datatable(stations$overview_list$groundwater.level) +top_filter_datatable(stations_list$groundwater.level) ``` Master data of GW level stations can be requested as shown below: ```{r stations_gwl_table_master} stations_gwl_master <- wasserportal::get_wasserportal_masters_data( - master_urls = stations$overview_list$groundwater.level$stammdaten_link + master_urls = stations_list$groundwater.level$stammdaten_link ) write_pretty_json(stations_gwl_master, "stations_gwl_master.json") @@ -136,7 +141,7 @@ GW level trend classification (provided by SenWeb) is visualized below. ##### Trend Classification Histogramm ```{r stations_gwl_trend} -gwl <- stations$overview_list$groundwater.level %>% +gwl <- stations_list$groundwater.level %>% dplyr::mutate(Datum = as.Date(Datum, format = "%d.%m.%Y")) text_low_levels <- c("extrem niedrig", "sehr niedrig", "niedrig") @@ -285,7 +290,7 @@ also_available( for total period available. ```{r test_gwl_download_single, eval = FALSE} -station_gwl <- stations$overview_list$groundwater.level[1L, ] +station_gwl <- stations_list$groundwater.level[1L, ] ncols <- 2:ncol(station_gwl) @@ -315,7 +320,7 @@ plotly::ggplotly(g) %>% debug <- FALSE gw_level_multi <- data.table::rbindlist(lapply( - stations$overview_list$groundwater.level$Messstellennummer, + stations_list$groundwater.level$Messstellennummer, function(id) kwb.utils::catAndRun( sprintf("Downloading Messstellennummer == '%s'", id), wasserportal::read_wasserportal_raw_gw(station = id, stype = "gwl"), @@ -326,7 +331,7 @@ gw_level_multi <- data.table::rbindlist(lapply( readr::write_csv(gw_level_multi, file = "groundwater_level.csv") # Plot 10 GW level -selected_stations <- stations$overview_list$groundwater.level$Messstellennummer[1:10] +selected_stations <- stations_list$groundwater.level$Messstellennummer[1:10] g <- gw_level_multi %>% dplyr::filter(Messstellennummer %in% selected_stations) %>% @@ -381,7 +386,7 @@ also_available( #### GW Quality: Download and Plotting One Station ```{r test_gwq_download_single, eval = FALSE} -station_gwq <- stations$overview_list$groundwater.quality[1L, ] +station_gwq <- stations_list$groundwater.quality[1L, ] ncols <- 2:ncol(station_gwq) @@ -413,7 +418,7 @@ plotly::ggplotly(g) %>% debug <- FALSE gw_quality_multi <- data.table::rbindlist(lapply( - stations$overview_list$groundwater.quality$Messstellennummer, + stations_list$groundwater.quality$Messstellennummer, function(id) kwb.utils::catAndRun( sprintf("Downloading Messstellennummer == '%s'", id), wasserportal::read_wasserportal_raw_gw(station = id, stype = "gwq"), @@ -424,7 +429,7 @@ gw_quality_multi <- data.table::rbindlist(lapply( readr::write_csv(gw_quality_multi, "groundwater_quality.csv") # Plot 10 GW quality -selected_stations <- stations$overview_list$groundwater.quality$Messstellennummer[1:10] +selected_stations <- stations_list$groundwater.quality$Messstellennummer[1:10] g <- gw_quality_multi %>% dplyr::filter(Messstellennummer %in% selected_stations) %>% From 548c93d2a61659212e9f8dbc516f7fea3fa56314 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 25 Sep 2023 03:15:06 +0200 Subject: [PATCH 11/43] Set origin in as.Date() --- R/utils.R | 2 +- tests/testthat/test-function-assert_date.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 8bf3e233..409aaaaa 100644 --- a/R/utils.R +++ b/R/utils.R @@ -12,7 +12,7 @@ assert_date <- function(x) return(x) } - result <- try(as.Date(x), silent = TRUE) + result <- try(as.Date(x, origin = "1970-01-01"), silent = TRUE) if (kwb.utils::isTryError(result)) { stop(call. = FALSE, sprintf( diff --git a/tests/testthat/test-function-assert_date.R b/tests/testthat/test-function-assert_date.R index a3167d09..c1efa108 100644 --- a/tests/testthat/test-function-assert_date.R +++ b/tests/testthat/test-function-assert_date.R @@ -4,7 +4,7 @@ test_that("assert_date() works", { expect_error(f()) - expect_identical(f(1), as.Date(1)) + expect_identical(f(1), as.Date(1, origin = "1970-01-01")) expect_error(f("a")) }) From 44554c59ddedd32304044255c0b6dd812140399b Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 25 Sep 2023 03:57:24 +0200 Subject: [PATCH 12/43] Add more tests --- .../test-function-base_url_download.R | 8 ----- .../test-function-columns_to_labels.R | 19 +++++------- tests/testthat/test-function-date_string_de.R | 16 +++------- ...est-function-get_daily_surfacewater_data.R | 31 ++++++++++++------- 4 files changed, 32 insertions(+), 42 deletions(-) diff --git a/tests/testthat/test-function-base_url_download.R b/tests/testthat/test-function-base_url_download.R index 730abc81..d3b7901d 100644 --- a/tests/testthat/test-function-base_url_download.R +++ b/tests/testthat/test-function-base_url_download.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:46.168353. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("base_url_download() works", { result <- wasserportal:::base_url_download() @@ -13,4 +6,3 @@ test_that("base_url_download() works", { expect_type(result, "character") expect_true(startsWith(result, "https://")) }) - diff --git a/tests/testthat/test-function-columns_to_labels.R b/tests/testthat/test-function-columns_to_labels.R index 508f44ac..afd8cfae 100644 --- a/tests/testthat/test-function-columns_to_labels.R +++ b/tests/testthat/test-function-columns_to_labels.R @@ -1,16 +1,13 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:44.744923. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("columns_to_labels() works", { - expect_error( - wasserportal:::columns_to_labels() - # argument "columns" is missing, with no default - ) + f <- wasserportal:::columns_to_labels -}) + expect_error(f()) + + result <- f(data.frame(a = 1, b = "x"), c("a", "b")) + expect_identical(result, c("a: 1, b: x")) + +}) diff --git a/tests/testthat/test-function-date_string_de.R b/tests/testthat/test-function-date_string_de.R index 0df950c4..5b74922e 100644 --- a/tests/testthat/test-function-date_string_de.R +++ b/tests/testthat/test-function-date_string_de.R @@ -1,16 +1,8 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:44.744923. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("date_string_de() works", { - expect_error( - wasserportal:::date_string_de() - # argument "x" is missing, with no default - ) + f <- wasserportal:::date_string_de -}) + expect_error(f()) + expect_identical(f(as.Date("2023-09-25")), "25.09.2023") +}) diff --git a/tests/testthat/test-function-get_daily_surfacewater_data.R b/tests/testthat/test-function-get_daily_surfacewater_data.R index 96675e60..b2fae620 100644 --- a/tests/testthat/test-function-get_daily_surfacewater_data.R +++ b/tests/testthat/test-function-get_daily_surfacewater_data.R @@ -1,16 +1,25 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:09:31.879282. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_daily_surfacewater_data() works", { - expect_error( - wasserportal:::get_daily_surfacewater_data() - # argument "stations" is missing, with no default + f <- wasserportal:::get_daily_surfacewater_data + + expect_error(f()) + + stations <- wasserportal::get_stations( + type = c("list", "crosstable"), + debug = FALSE ) -}) + tmp <- stations$overview_list$surface_water.water_level[1L, ] + stations$overview_list$surface_water.water_level <- tmp + expect_warning(capture.output(result <- f( + stations, + variables = c(surface_water.water_level = "ows") + ))) + + expect_identical( + names(result$surface_water.water_level), + c("Messstellennummer", "Datum", "Tagesmittelwert", "Parameter", "Einheit") + ) + +}) From b7b72984f4c3b62b5ba09bd2b4fdef281bbedba4 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 26 Sep 2023 07:54:42 +0200 Subject: [PATCH 13/43] Add more tests or clean existing test files (remove header) --- .../test-function-get_groundwater_data.R | 1 + .../test-function-get_groundwater_options.R | 7 ---- ...st-function-get_non_external_station_ids.R | 23 ++++++------ .../test-function-get_overview_options.R | 7 ---- .../test-function-get_station_variables.R | 21 ++++++----- ...test-function-get_surfacewater_variables.R | 7 ---- ...l_and_body_for_groundwater_data_download.R | 20 +++++------ ...st-function-get_wasserportal_master_data.R | 36 ++++++++++++------- .../test-function-get_wasserportal_stations.R | 7 ---- ...function-get_wasserportal_stations_table.R | 7 ---- .../test-function-get_wasserportal_text.R | 23 ++++++------ .../test-function-get_wasserportal_url.R | 18 ++++------ ...test-function-get_wasserportal_variables.R | 7 ---- .../testthat/test-function-is_external_link.R | 17 +++------ .../test-function-merge_raw_results_daily.R | 8 ----- .../test-function-print_invalid_hrefs.R | 20 +++++------ ...-function-repair_wasserportal_timestamps.R | 14 ++------ .../testthat/test-function-to_base_filename.R | 18 ++++------ .../test-function-warning_not_implemented.R | 16 +++------ .../testthat/test-function-wp_data_to_list.R | 22 ++++++------ ...test-function-wp_timeseries_data_to_list.R | 15 +++----- 21 files changed, 114 insertions(+), 200 deletions(-) diff --git a/tests/testthat/test-function-get_groundwater_data.R b/tests/testthat/test-function-get_groundwater_data.R index 659edfea..ff17af42 100644 --- a/tests/testthat/test-function-get_groundwater_data.R +++ b/tests/testthat/test-function-get_groundwater_data.R @@ -30,4 +30,5 @@ test_that("get_groundwater_data() works", { expect_true(all( sapply(result, kwb.utils::mainClass) == "data.table" )) + }) diff --git a/tests/testthat/test-function-get_groundwater_options.R b/tests/testthat/test-function-get_groundwater_options.R index 8e164b4b..cdc67e50 100644 --- a/tests/testthat/test-function-get_groundwater_options.R +++ b/tests/testthat/test-function-get_groundwater_options.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:09:38.376262. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_groundwater_options() works", { result <- wasserportal:::get_groundwater_options() diff --git a/tests/testthat/test-function-get_non_external_station_ids.R b/tests/testthat/test-function-get_non_external_station_ids.R index e7abcac0..1eaeb24e 100644 --- a/tests/testthat/test-function-get_non_external_station_ids.R +++ b/tests/testthat/test-function-get_non_external_station_ids.R @@ -1,16 +1,19 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:09:31.879282. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("get_non_external_station_ids() works", { - expect_error( - wasserportal:::get_non_external_station_ids() - # argument "station_data" is missing, with no default + f <- wasserportal:::get_non_external_station_ids + + expect_error(f()) + + portal_url <- "https://wasserportal.berlin.de" + + station_data <- kwb.utils::noFactorDataFrame( + Messstellennummer = as.character(1:4), + Betreiber = c("any", "any", "Land Berlin", "Land Berlin"), + stammdaten_link = c("any", portal_url, "any", portal_url) ) -}) + expect_identical(f(station_data), "4") +}) diff --git a/tests/testthat/test-function-get_overview_options.R b/tests/testthat/test-function-get_overview_options.R index a5c48a9e..881356c7 100644 --- a/tests/testthat/test-function-get_overview_options.R +++ b/tests/testthat/test-function-get_overview_options.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:09:38.527944. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_overview_options() works", { result <- wasserportal:::get_overview_options() diff --git a/tests/testthat/test-function-get_station_variables.R b/tests/testthat/test-function-get_station_variables.R index 5a165915..f231aa25 100644 --- a/tests/testthat/test-function-get_station_variables.R +++ b/tests/testthat/test-function-get_station_variables.R @@ -1,16 +1,15 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:09:41.860028. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_station_variables() works", { - expect_error( - wasserportal:::get_station_variables() - # argument "station_df" is missing, with no default + f <- wasserportal:::get_station_variables + + expect_error(f()) + + station_df <- data.frame( + Messstellennummer = 1:3, + Messstellenname = LETTERS[1:3], + a = 2:4, + b = 3:5 ) + expect_identical(f(station_df), c("a", "b")) }) - diff --git a/tests/testthat/test-function-get_surfacewater_variables.R b/tests/testthat/test-function-get_surfacewater_variables.R index 81f73f8c..9c5477d9 100644 --- a/tests/testthat/test-function-get_surfacewater_variables.R +++ b/tests/testthat/test-function-get_surfacewater_variables.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:09:31.879282. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_surfacewater_variables() works", { result <- wasserportal:::get_surfacewater_variables() diff --git a/tests/testthat/test-function-get_url_and_body_for_groundwater_data_download.R b/tests/testthat/test-function-get_url_and_body_for_groundwater_data_download.R index 566ef5a1..e3bdae14 100644 --- a/tests/testthat/test-function-get_url_and_body_for_groundwater_data_download.R +++ b/tests/testthat/test-function-get_url_and_body_for_groundwater_data_download.R @@ -1,16 +1,12 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:43.430779. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_url_and_body_for_groundwater_data_download() works", { - expect_error( - wasserportal:::get_url_and_body_for_groundwater_data_download() - # argument "stype" is missing, with no default - ) + f <- wasserportal:::get_url_and_body_for_groundwater_data_download -}) + expect_error(f()) + result <- f(stype = 1, type = "daily", from_date = "2000", station = 1) + + expect_type(result, "list") + expect_identical(names(result), c("url", "body")) + expect_identical(result$body, list()) +}) diff --git a/tests/testthat/test-function-get_wasserportal_master_data.R b/tests/testthat/test-function-get_wasserportal_master_data.R index a0f46d3f..bca51122 100644 --- a/tests/testthat/test-function-get_wasserportal_master_data.R +++ b/tests/testthat/test-function-get_wasserportal_master_data.R @@ -1,16 +1,28 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:07.960625. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_wasserportal_master_data() works", { - expect_error( - wasserportal:::get_wasserportal_master_data() - # argument "master_url" is missing, with no default - ) + f <- wasserportal::get_wasserportal_master_data -}) + expect_error(f()) + expect_error(f("no-such-url"), "refers to an external") + expect_error(f("https://wasserportal.berlin.de/no-such-url"), "error 404") + + # wasserportal::get_wasserportal_stations_table()$stammdaten_link[1L] + url <- "https://wasserportal.berlin.de/station.php?anzeige=i&thema=gws&station=1" + result <- f(url) + expect_identical(names(result), c( + "Nummer", + "Bezirk", + "Betreiber", + "Auspraegung", + "Grundwasserleiter", + "Gelaendeoberkante_GOK_m_ue_NHN", + "Rohroberkante_m_ue_NHN", + "Filteroberkante_m_u_GOK", + "Filterunterkante_m_u_GOK", + "Rechtswert_UTM_33_N", + "Hochwert_UTM_33_N" + )) + + expect_identical(nrow(result), 1L) +}) diff --git a/tests/testthat/test-function-get_wasserportal_stations.R b/tests/testthat/test-function-get_wasserportal_stations.R index ddb26156..e0ca666c 100644 --- a/tests/testthat/test-function-get_wasserportal_stations.R +++ b/tests/testthat/test-function-get_wasserportal_stations.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:08.05047. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_wasserportal_stations() works", { result <- wasserportal:::get_wasserportal_stations() diff --git a/tests/testthat/test-function-get_wasserportal_stations_table.R b/tests/testthat/test-function-get_wasserportal_stations_table.R index f633b2e8..926fea5f 100644 --- a/tests/testthat/test-function-get_wasserportal_stations_table.R +++ b/tests/testthat/test-function-get_wasserportal_stations_table.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:16.145095. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_wasserportal_stations_table() works", { result <- wasserportal:::get_wasserportal_stations_table() diff --git a/tests/testthat/test-function-get_wasserportal_text.R b/tests/testthat/test-function-get_wasserportal_text.R index 75d89b56..3a8fce07 100644 --- a/tests/testthat/test-function-get_wasserportal_text.R +++ b/tests/testthat/test-function-get_wasserportal_text.R @@ -1,16 +1,17 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:33.492441. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_wasserportal_text() works", { - expect_error( - wasserportal:::get_wasserportal_text() - # argument "variable_ids" is missing, with no default + f <- wasserportal:::get_wasserportal_text + + expect_error(f()) + + expect_identical( + "Reading 'my_variable' for station 1 (my_station)", + f( + station = 1L, + variable = 2L, + station_ids = c(my_station = 1L), + variable_ids = c(my_variable = 2L) + ) ) }) - diff --git a/tests/testthat/test-function-get_wasserportal_url.R b/tests/testthat/test-function-get_wasserportal_url.R index 32c68346..9c2959b9 100644 --- a/tests/testthat/test-function-get_wasserportal_url.R +++ b/tests/testthat/test-function-get_wasserportal_url.R @@ -1,16 +1,12 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:33.492441. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_wasserportal_url() works", { - expect_error( - wasserportal:::get_wasserportal_url() - # argument "station" is missing, with no default + f <- wasserportal:::get_wasserportal_url + + expect_error(f()) + + expect_identical( + f(123, 456), + "https://wasserportal.berlin.de/station.php?sstation=123&anzeige=456d" ) }) - diff --git a/tests/testthat/test-function-get_wasserportal_variables.R b/tests/testthat/test-function-get_wasserportal_variables.R index 0c4a8862..43329280 100644 --- a/tests/testthat/test-function-get_wasserportal_variables.R +++ b/tests/testthat/test-function-get_wasserportal_variables.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:16.872839. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("get_wasserportal_variables() works", { result <- wasserportal:::get_wasserportal_variables() diff --git a/tests/testthat/test-function-is_external_link.R b/tests/testthat/test-function-is_external_link.R index 75be19d4..95cb928b 100644 --- a/tests/testthat/test-function-is_external_link.R +++ b/tests/testthat/test-function-is_external_link.R @@ -1,16 +1,9 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:17.601624. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("is_external_link() works", { - expect_error( - wasserportal:::is_external_link() - # argument "url" is missing, with no default - ) + f <- wasserportal:::is_external_link -}) + expect_error(f()) + expect_true(f("is-not-wasserportal-url")) + expect_false(f(wasserportal:::wasserportal_base_url())) +}) diff --git a/tests/testthat/test-function-merge_raw_results_daily.R b/tests/testthat/test-function-merge_raw_results_daily.R index da7eb2d3..e8600482 100644 --- a/tests/testthat/test-function-merge_raw_results_daily.R +++ b/tests/testthat/test-function-merge_raw_results_daily.R @@ -1,10 +1,3 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:31.576169. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("merge_raw_results_daily() works", { f <- wasserportal:::merge_raw_results_daily @@ -20,4 +13,3 @@ test_that("merge_raw_results_daily() works", { expect_identical(result, dfs) }) - diff --git a/tests/testthat/test-function-print_invalid_hrefs.R b/tests/testthat/test-function-print_invalid_hrefs.R index 871918c6..b896e843 100644 --- a/tests/testthat/test-function-print_invalid_hrefs.R +++ b/tests/testthat/test-function-print_invalid_hrefs.R @@ -1,16 +1,12 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:16.145095. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("print_invalid_hrefs() works", { - expect_error( - wasserportal:::print_invalid_hrefs() - # argument "hrefs" is missing, with no default - ) + f <- wasserportal:::print_invalid_hrefs -}) + expect_error(f()) + + expect_null(f(1)) + invalid <- c("a", "b", "c") + + expect_message(capture.output(f(structure(1, invalid = invalid)))) +}) diff --git a/tests/testthat/test-function-repair_wasserportal_timestamps.R b/tests/testthat/test-function-repair_wasserportal_timestamps.R index 55e75acc..d1f4eacb 100644 --- a/tests/testthat/test-function-repair_wasserportal_timestamps.R +++ b/tests/testthat/test-function-repair_wasserportal_timestamps.R @@ -1,16 +1,6 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:33.492441. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("repair_wasserportal_timestamps() works", { - expect_error( - wasserportal:::repair_wasserportal_timestamps() - # argument "timestamps" is missing, with no default - ) + f <- wasserportal:::repair_wasserportal_timestamps + expect_error(f()) }) - diff --git a/tests/testthat/test-function-to_base_filename.R b/tests/testthat/test-function-to_base_filename.R index 9af65de9..97d85b16 100644 --- a/tests/testthat/test-function-to_base_filename.R +++ b/tests/testthat/test-function-to_base_filename.R @@ -1,16 +1,10 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:17.601624. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("to_base_filename() works", { - expect_error( - wasserportal:::to_base_filename() - # argument "x" is missing, with no default - ) + f <- wasserportal:::to_base_filename -}) + expect_error(f()) + expect_identical(f("a_b"), "a-b") + expect_identical(f("a.b"), "a_b") + expect_identical(f("a_b.c"), "a-b_c") +}) diff --git a/tests/testthat/test-function-warning_not_implemented.R b/tests/testthat/test-function-warning_not_implemented.R index 7e174d33..b57ae6b9 100644 --- a/tests/testthat/test-function-warning_not_implemented.R +++ b/tests/testthat/test-function-warning_not_implemented.R @@ -1,16 +1,8 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:31.576169. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("warning_not_implemented() works", { - expect_error( - wasserportal:::warning_not_implemented() - # argument "x" is missing, with no default - ) + f <- wasserportal:::warning_not_implemented -}) + expect_error(f()) + expect_warning(f("abc")) +}) diff --git a/tests/testthat/test-function-wp_data_to_list.R b/tests/testthat/test-function-wp_data_to_list.R index 240c5425..771072b9 100644 --- a/tests/testthat/test-function-wp_data_to_list.R +++ b/tests/testthat/test-function-wp_data_to_list.R @@ -1,16 +1,14 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:46.168353. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - test_that("wp_data_to_list() works", { - expect_error( - wasserportal:::wp_data_to_list() - # argument "target_dir" is missing, with no default - ) + f <- wasserportal:::wp_data_to_list -}) + expect_error(f()) + # f( + # overview_list_names = "no-such-file", + # target_dir = "abc", + # modify_filenames = identity, + # is_zipped = FALSE + # ) + +}) diff --git a/tests/testthat/test-function-wp_timeseries_data_to_list.R b/tests/testthat/test-function-wp_timeseries_data_to_list.R index 3d014858..48135f1c 100644 --- a/tests/testthat/test-function-wp_timeseries_data_to_list.R +++ b/tests/testthat/test-function-wp_timeseries_data_to_list.R @@ -1,16 +1,9 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:58.111249. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("wp_timeseries_data_to_list() works", { - expect_error( - wasserportal:::wp_timeseries_data_to_list() - # argument "overview_list_names" is missing, with no default - ) + f <- wasserportal:::wp_timeseries_data_to_list + expect_error(f()) + expect_error(suppressWarnings(f("no-such-file"))) }) - From 4e53e8fc70b6488696dcd7b1ff92eb6042b94998 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 2 Oct 2023 12:50:40 +0200 Subject: [PATCH 14/43] Fix :bug: in tutorial.Rmd: correct element name --- vignettes/tutorial.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 1bd59fb5..8a858014 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -68,7 +68,6 @@ ggplot2_date_value <- function(data, col) { ```{r stations_overview} # install.packages("remotes") # remotes::install_github("kwb-r/wasserportal", upgrade = "never", force = TRUE) -library(wasserportal) overview_options <- wasserportal::get_overview_options() @@ -80,7 +79,7 @@ system.time( str(stations) -stations_list <- kwb.utils::selectElements(stations, "list") +stations_list <- kwb.utils::selectElements(stations, "overview_list") stations_crosstable <- kwb.utils::selectElements(stations, "crosstable") write_pretty_json(stations_crosstable, "stations_crosstable.json") From cc755a2685e81d4a211f1cdc17133d06ab8b7323 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 2 Oct 2023 13:07:11 +0200 Subject: [PATCH 15/43] Load the pipe operator --- vignettes/tutorial.Rmd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 8a858014..46adb13f 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -20,9 +20,11 @@ knitr::opts_chunk$set( is_ghactions <- identical(Sys.getenv("CI"), "true") ``` -## Define Helper Functions +## Load Pipe Operator and Define Helper Functions ```{r define_helpers} +`%>%` <- magrittr::`%>%` + write_pretty_json <- function(x, path) { jsonlite::write_json(x, path = path, pretty = TRUE) } From a9e44f4b30edc028704ce77bcc92b3cdc7213cc6 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 2 Oct 2023 13:07:45 +0200 Subject: [PATCH 16/43] Separate failing test into three single tests --- tests/testthat/test-function-get_stations.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-function-get_stations.R b/tests/testthat/test-function-get_stations.R index fdf86b87..61bbe48c 100644 --- a/tests/testthat/test-function-get_stations.R +++ b/tests/testthat/test-function-get_stations.R @@ -1,3 +1,5 @@ +#library(testthat) + test_that("get_stations() works", { f <- wasserportal:::get_stations @@ -45,10 +47,16 @@ test_that("get_stations() works", { # Check output of all types expect_output(result_all <- f()) - expect_identical(result_all, list( - overview_list = result_list, - overview_df = result_df, - crosstable = result_crosstable - )) + expect_identical( + names(result_all), + c("overview_list", "overview_df", "crosstable") + ) + + # It is possible that new data arrived since the two calls of the function... + # Which check fails? + + expect_identical(result_all[["overview_list"]], result_list) + expect_identical(result_all[["overview_df"]], result_df) + expect_identical(result_all[["crosstable"]], result_crosstable) }) From f3efeb9c987ec82d23b40c1bfe87d0c88ab5639f Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 2 Oct 2023 13:23:25 +0200 Subject: [PATCH 17/43] Check for missing header row assuming that "start_line:length(textlines)" caused the "argument of length 0" error that occurred during CI --- R/read_wasserportal_raw_gw.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index fa81dbbb..09ed62d5 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -47,7 +47,16 @@ read_wasserportal_raw_gw <- function( # Split the text into separate lines textlines <- strsplit(text, "\n")[[1L]] - start_line <- which(startsWith(textlines, "Datum")) + date_pattern <- "Datum" + start_line <- which(startsWith(textlines, date_pattern)) + + if (length(start_line) == 0L) { + kwb.utils::stopFormatted( + "Could not find the header row (starting with '%s')", + date_pattern + ) + } + textlines <- textlines[start_line:length(textlines)] # Split the header row into fields From b84a98c85d3f3afbe212935bbb21a034dc3c8720 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 2 Oct 2023 13:55:41 +0200 Subject: [PATCH 18/43] Ignore measurement columns in test because they may differ when the time difference between two function calls is too big --- tests/testthat/test-function-get_stations.R | 35 +++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-function-get_stations.R b/tests/testthat/test-function-get_stations.R index 61bbe48c..23abd755 100644 --- a/tests/testthat/test-function-get_stations.R +++ b/tests/testthat/test-function-get_stations.R @@ -56,7 +56,38 @@ test_that("get_stations() works", { # It is possible that new data arrived since the two calls of the function... # Which check fails? - expect_identical(result_all[["overview_list"]], result_list) - expect_identical(result_all[["overview_df"]], result_df) + remove_measurements <- function(x) { + position_date <- which(names(x) == "Datum") + x[, -c(position_date, position_date + 1L)] + } + + # Compare the list versions (without measurement columns) + x <- result_all[["overview_list"]] + y <- result_list + + expect_identical(names(x), names(y)) + + expect_true(all(sapply(names(x), function(name) identical( + remove_measurements(x[[name]]), + remove_measurements(y[[name]]) + )))) + + # Compare the data frame versions + x <- result_all[["overview_df"]] + y <- result_df + + expect_identical(names(x), names(y)) + + expect_identical(x, y) # may fail + + skip_columns <- c("Datum", "Wasserstand") + + for (column in setdiff(names(x), skip_columns)) { + if (!identical(x[[column]], y[[column]])) { + stop("difference in column '", column, "'") + } + } + + # Compare crosstable versions expect_identical(result_all[["crosstable"]], result_crosstable) }) From a0fb7993509bf730e3bf9fb1caa64994ca7245da Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 2 Oct 2023 14:04:28 +0200 Subject: [PATCH 19/43] Remove the failing test, use alternative below! --- tests/testthat/test-function-get_stations.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-function-get_stations.R b/tests/testthat/test-function-get_stations.R index 23abd755..3cf8dc8b 100644 --- a/tests/testthat/test-function-get_stations.R +++ b/tests/testthat/test-function-get_stations.R @@ -78,8 +78,6 @@ test_that("get_stations() works", { expect_identical(names(x), names(y)) - expect_identical(x, y) # may fail - skip_columns <- c("Datum", "Wasserstand") for (column in setdiff(names(x), skip_columns)) { From 2d7fe905fbdd89991152aa3a7b5db480761d5aa0 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 10 Oct 2023 14:14:52 +0200 Subject: [PATCH 20/43] Handle NA in get_non_external_station_ids() and improve the readability of the test --- R/get_daily_surfacewater_data.R | 2 +- .../test-function-get_non_external_station_ids.R | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/R/get_daily_surfacewater_data.R b/R/get_daily_surfacewater_data.R index d8bd402b..f4456491 100644 --- a/R/get_daily_surfacewater_data.R +++ b/R/get_daily_surfacewater_data.R @@ -97,7 +97,7 @@ get_non_external_station_ids <- function(station_data) pull <- kwb.utils::createAccessor(station_data) is_external <- is_external_link(pull("stammdaten_link")) - is_berlin <- pull("Betreiber") == "Land Berlin" + is_berlin <- kwb.utils::defaultIfNA(pull("Betreiber"), "") == "Land Berlin" # Identifiers of monitoring stations to loop through as.character(pull("Messstellennummer")[is_berlin & !is_external]) diff --git a/tests/testthat/test-function-get_non_external_station_ids.R b/tests/testthat/test-function-get_non_external_station_ids.R index 1eaeb24e..80293c29 100644 --- a/tests/testthat/test-function-get_non_external_station_ids.R +++ b/tests/testthat/test-function-get_non_external_station_ids.R @@ -8,12 +8,20 @@ test_that("get_non_external_station_ids() works", { portal_url <- "https://wasserportal.berlin.de" - station_data <- kwb.utils::noFactorDataFrame( - Messstellennummer = as.character(1:4), - Betreiber = c("any", "any", "Land Berlin", "Land Berlin"), - stammdaten_link = c("any", portal_url, "any", portal_url) + station_data <- read.table(sep = ",", header = TRUE, text = " + Messstellennummer,Betreiber,stammdaten_link + 1,any,any + 2,any,https://wasserportal.berlin.de + 3,Land Berlin,any + 4,Land Berlin,https://wasserportal.berlin.de + 5,,https://wasserportal.berlin.de" ) + is_empty <- station_data$Betreiber == "" + expect_identical(f(station_data), "4") + station_data$Betreiber[is_empty] <- NA + + expect_identical(f(station_data), "4") }) From 1a1fa7f253f45cc3129c804fdea1fcd83a4ed070 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 10 Oct 2023 16:40:39 +0200 Subject: [PATCH 21/43] Allow read_wasserportal_raw() to fail and give the names of the corresponding variables in the error message --- R/read_wasserportal.R | 49 ++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index 7d0eb398..6a36ed88 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -87,11 +87,22 @@ read_wasserportal <- function( ) { #kwb.utils::assignPackageObjects("wasserportal") - #station=get_wasserportal_stations(type = "flow")$Tiefwerder - #variables = get_wasserportal_variables(station);from_date = "2019-01-01";include_raw_time = FALSE - station_crosstable <- stations_crosstable[stations_crosstable$Messstellennummer == station,] + + #station <- "5825500" + #variables <- c("ows", "odf") + #from_date <- as.character(Sys.Date() - 90L) + #type = "single" + #include_raw_time = FALSE + #stations_crosstable <- get_stations(type = "crosstable") + + station_crosstable <- stations_crosstable[stations_crosstable$Messstellennummer == station, ] + variable_ids <- get_station_variables(station_crosstable) - if(is.null(variables)) variables <- variable_ids + + if (is.null(variables)) { + variables <- variable_ids + } + station_ids <- stations_crosstable[["Messstellennummer"]] stopifnot(all(station %in% station_ids)) @@ -101,26 +112,30 @@ read_wasserportal <- function( handle <- httr::handle_find(get_wasserportal_url(0, 0)) - dfs <- lapply( - X = variables, - FUN = read_wasserportal_raw, - station = station, - from_date = from_date, - type = type, - include_raw_time = include_raw_time, - handle = handle, - stations_crosstable = stations_crosstable - - ) + dfs <- lapply(variables, function(variable) { + try(read_wasserportal_raw( + variable, + station = station, + from_date = from_date, + type = type, + include_raw_time = include_raw_time, + handle = handle, + stations_crosstable = stations_crosstable + )) + }) # Remove elements of class "response" that are returned in case of an error failed <- sapply(dfs, function(df) { - inherits(df, "response") || length(df) == 0 + kwb.utils::isTryError(df) || inherits(df, "response") || length(df) == 0 }) if (any(failed)) { kwb.utils::catAndRun( - sprintf("Removing %d elements that are empty or failed", sum(failed)), + sprintf( + "Removing %d elements that are empty or failed (variables: %s)", + sum(failed), + kwb.utils::stringList(variables[failed]) + ), expr = { failures <- dfs[failed] dfs <- dfs[! failed] From 59f52c059ca001a7ff237d278c358447841a0900 Mon Sep 17 00:00:00 2001 From: ma-z-am <43271536+ma-z-am@users.noreply.github.com> Date: Mon, 11 Mar 2024 23:38:23 +0100 Subject: [PATCH 22/43] Add variables as list names in merge_raw_results_single(), so they can be used by mergeAll() --- R/read_wasserportal.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index 75ffa05a..38b0a3d3 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -182,7 +182,9 @@ merge_raw_results_single <- function(dfs, variables, include_raw_time) backbone$row <- seq_len(nrow(backbone)) - data_frames <- c(list(base = backbone), dfs) + data_frames <- c(list(backbone), dfs) + + names(data_frames) <- c("base", variables) result <- kwb.utils::mergeAll( data_frames, by = keys, all.x = TRUE, dbg = FALSE From 4e0a7307f3a190e8e322b7fe9c4c8021b34d5418 Mon Sep 17 00:00:00 2001 From: mrustl Date: Wed, 3 Apr 2024 12:15:40 +0200 Subject: [PATCH 23/43] Fix GW level and quality --- DESCRIPTION | 60 ++++++++++++++------------------- LICENSE | 2 +- LICENSE.md | 2 +- NEWS.md | 7 ++++ R/get_groundwater_data.R | 3 +- R/get_overview_options.R | 3 +- R/read_wasserportal_raw_gw.R | 18 +++++----- _pkgdown.yml | 4 +++ man/read_wasserportal_raw_gw.Rd | 8 ++--- 9 files changed, 55 insertions(+), 52 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 418b81b5..d5380e23 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,29 +1,20 @@ Package: wasserportal -Title: R Package with Functions for Scraping Data of - Wasserportal Berlin -Version: 0.3.0 -Authors@R: - c(person(given = "Hauke", - family = "Sonnenberg", - role = "aut", - email = "hauke.sonnenberg@kompetenz-wasser.de", - comment = c(ORCID = "0000-0001-9134-2871")), - person(given = "Michael", - family = "Rustler", - role = c("aut","cre"), - email = "michael.rustler@kompetenz-wasser.de", - comment = c(ORCID = "0000-0003-0647-7726")), - person(given = "DWC", - role = "fnd"), - person(given = "IMPETUS", - role = "fnd"), - person(given = "PROMISCES", - role = "fnd"), - person(given = "Kompetenzzentrum Wasser Berlin gGmbH (KWB)", - role = "cph")) -Description: R Package with Functions for Scraping Data of - Wasserportal Berlin (https://wasserportal.berlin.de), which contains - real-time data of surface water and groundwater monitoring stations. +Title: R Package with Functions for Scraping Data of Wasserportal Berlin +Version: 0.3.1 +Authors@R: c( + person("Hauke", "Sonnenberg", , "hauke.sonnenberg@kompetenz-wasser.de", role = "aut", + comment = c(ORCID = "0000-0001-9134-2871")), + person("Michael", "Rustler", , "michael.rustler@kompetenz-wasser.de", role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-0647-7726")), + person("AD4GD", role = "fnd"), + person("DWC", role = "fnd"), + person("IMPETUS", role = "fnd"), + person("PROMISCES", role = "fnd"), + person("Kompetenzzentrum Wasser Berlin gGmbH (KWB)", role = "cph") + ) +Description: R Package with Functions for Scraping Data of Wasserportal + Berlin (https://wasserportal.berlin.de), which contains real-time data + of surface water and groundwater monitoring stations. License: MIT + file LICENSE URL: https://github.com/KWB-R/wasserportal BugReports: https://github.com/KWB-R/wasserportal/issues @@ -48,28 +39,29 @@ Suggests: covr, DT, forcats, - htmlwidgets, - janitor, - jsonlite, - leaflet, ggplot2, gridExtra, htmltools, + htmlwidgets, + janitor, + jsonlite, knitr, kwb.pkgbuild, + leaflet, openxlsx, plotly, rmarkdown, sf, - tidyselect, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + tidyselect +VignetteBuilder: + knitr Remotes: github::kwb-r/kwb.datetime, github::kwb-r/kwb.pkgbuild, github::kwb-r/kwb.utils +Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 -VignetteBuilder: knitr -Config/testthat/edition: 3 +RoxygenNote: 7.3.1 diff --git a/LICENSE b/LICENSE index 6ea097a0..80b34b19 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021-2022 Kompetenzzentrum Wasser Berlin gGmbH (KWB) +Copyright (c) 2021-2024 Kompetenzzentrum Wasser Berlin gGmbH (KWB) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/LICENSE.md b/LICENSE.md index a385d77a..484ddea3 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2021-2022 Kompetenzzentrum Wasser Berlin gGmbH (KWB) +Copyright (c) 2021-2024 Kompetenzzentrum Wasser Berlin gGmbH (KWB) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NEWS.md b/NEWS.md index 33d3f17d..0e48cf17 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# [wasserportal 0.3.1](https://github.com/KWB-R/wasserportal/releases/tag/v0.3.1) 2024-04-04 + +* Bugfix for groundwater level and quality due to new Wasserportal API +* Add project [AD4GD](https://www.kompetenz-wasser.de/de/forschung/projekte/ad4gd) +as funder + + # [wasserportal 0.3.0](https://github.com/KWB-R/wasserportal/releases/tag/v0.3.0) 2023-02-19 * Fix errors in GitHub actions: use actions from branches `v2`, `v3`, not from diff --git a/R/get_groundwater_data.R b/R/get_groundwater_data.R index 09573292..c3c2c622 100644 --- a/R/get_groundwater_data.R +++ b/R/get_groundwater_data.R @@ -82,6 +82,5 @@ get_groundwater_options <- function () is_groundwater <- startsWith(names(overview_options), "groundwater") - overview_options[is_groundwater] %>% - gsub(pattern = "gws", replacement = "gwl") + overview_options[is_groundwater] } diff --git a/R/get_overview_options.R b/R/get_overview_options.R index 80159343..d631108d 100644 --- a/R/get_overview_options.R +++ b/R/get_overview_options.R @@ -17,7 +17,8 @@ get_overview_options <- function() conductivity = "olf", ph = "oph", oxygen_concentration = "oog", - oxygen_saturation = "oos" + oxygen_saturation = "oos" #, + #quality = "opq" ), groundwater = list( level = "gws", diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index 09ed62d5..d1f7e5a2 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -3,25 +3,25 @@ #' read_wasserportal_raw_gw #' #' @param station station id -#' @param stype "gwl" or "gwq" +#' @param stype "gws" or "gwq" #' @param type "single" or "single_all" (if stype = "gwq") #' @param from_date (default: "") #' @param include_raw_time default: FALSE #' @param handle default: NULL #' -#' @return data.frame with values (currently only if stype == "gwl") +#' @return data.frame with values #' @export #' @importFrom stringr str_remove str_extract #' @importFrom tidyr pivot_longer #' @importFrom dplyr select filter mutate #' @examples #' \dontrun{ -#' read_wasserportal_raw_gw(station = 149, stype = "gwl") +#' read_wasserportal_raw_gw(station = 149, stype = "gws") #' read_wasserportal_raw_gw(station = 149, stype = "gwq") #' } read_wasserportal_raw_gw <- function( station = 149, - stype = "gwl", + stype = "gws", type = "single_all", from_date = "", include_raw_time = FALSE, @@ -72,7 +72,7 @@ read_wasserportal_raw_gw <- function( data <- read(text, header = FALSE, skip = start_line) # Get the numbers of the data columns - if (type != "monthly" && stype == "gwl") { + if (type != "monthly" && stype == "gws") { stopifnot(ncol(data) == 2L) } @@ -80,7 +80,7 @@ read_wasserportal_raw_gw <- function( names(data) <- header_fields[seq_len(ncol(data))] stype_options <- list( - gwl = list( + gws = list( par_remove_pattern = "\\s+\\(.*\\)", unit_extract_pattern = "\\(.*\\)", unit_remove_pattern = "\\(|\\)" @@ -148,7 +148,7 @@ get_url_and_body_for_groundwater_data_download <- function( ) } - download_shortcuts <- list(gwl = "g", gwq = "q") + download_shortcuts <- list(gws = "g", gwq = "q") download_shortcut <- if (stype %in% names(download_shortcuts)) { download_shortcuts[[stype]] @@ -193,9 +193,9 @@ get_url_and_body_for_groundwater_data_download <- function( "/station.php?", "anzeige=d", # download "&station=", station, - "&sreihe=ew", + "&sreihe=", sreihe, "&smode=c", # data format (= csv?) - "&thema=gws", + "&thema=", stype, "&exportthema=gw", "&sdatum=", sdatum, "&senddatum=", senddatum diff --git a/_pkgdown.yml b/_pkgdown.yml index dd461709..22b3b5ab 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -5,6 +5,10 @@ authors: href: https://github.com/hsonne Michael Rustler: href: https://mrustl.de + AD4GD: + href: https://www.kompetenz-wasser.de/en/forschung/projekte/ad4gd + html: Project AD4GD DWC: href: https://www.kompetenz-wasser.de/en/forschung/projekte/dwc html: Date: Wed, 3 Apr 2024 12:30:57 +0200 Subject: [PATCH 24/43] Fix test --- tests/testthat/test-function-get_groundwater_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-function-get_groundwater_data.R b/tests/testthat/test-function-get_groundwater_data.R index ff17af42..96fde00e 100644 --- a/tests/testthat/test-function-get_groundwater_data.R +++ b/tests/testthat/test-function-get_groundwater_data.R @@ -14,10 +14,10 @@ test_that("get_groundwater_data() works", { stations <- list( overview_list = list( groundwater.level = data.frame( - Messstellennummer = 1 + Messstellennummer = 3 ), groundwater.quality = data.frame( - Messstellennummer = 1 + Messstellennummer = 3 ) ) ) From 4cac918905c0499828f3dcbc46299dbe6a12f8a3 Mon Sep 17 00:00:00 2001 From: mrustl Date: Thu, 4 Apr 2024 15:41:58 +0200 Subject: [PATCH 25/43] Add surface water quality download function --- DESCRIPTION | 2 +- NAMESPACE | 3 ++ NEWS.md | 4 +- R/get_overview_options.R | 4 +- R/get_surfacewater_qualities.R | 45 ++++++++++++++++ R/get_surfacewater_quality.R | 87 +++++++++++++++++++++++++++++++ man/get_surfacewater_qualities.Rd | 27 ++++++++++ man/get_surfacewater_quality.Rd | 26 +++++++++ 8 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 R/get_surfacewater_qualities.R create mode 100644 R/get_surfacewater_quality.R create mode 100644 man/get_surfacewater_qualities.Rd create mode 100644 man/get_surfacewater_quality.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d5380e23..d9d59bdd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: wasserportal Title: R Package with Functions for Scraping Data of Wasserportal Berlin -Version: 0.3.1 +Version: 0.4.0 Authors@R: c( person("Hauke", "Sonnenberg", , "hauke.sonnenberg@kompetenz-wasser.de", role = "aut", comment = c(ORCID = "0000-0001-9134-2871")), diff --git a/NAMESPACE b/NAMESPACE index 8cf724a8..26d33fe9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,8 @@ export(get_groundwater_options) export(get_overview_options) export(get_station_variables) export(get_stations) +export(get_surfacewater_qualities) +export(get_surfacewater_quality) export(get_surfacewater_variables) export(get_wasserportal_master_data) export(get_wasserportal_masters_data) @@ -41,6 +43,7 @@ importFrom(dplyr,select_if) importFrom(fs,dir_create) importFrom(httr,POST) importFrom(httr,content) +importFrom(httr,http_error) importFrom(kwb.datetime,textToEuropeBerlinPosix) importFrom(kwb.utils,catAndRun) importFrom(kwb.utils,getAttribute) diff --git a/NEWS.md b/NEWS.md index 0e48cf17..8ae781c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ -# [wasserportal 0.3.1](https://github.com/KWB-R/wasserportal/releases/tag/v0.3.1) 2024-04-04 +# [wasserportal 0.4.0](https://github.com/KWB-R/wasserportal/releases/tag/v0.4.0) 2024-04-05 +* New feature: add support for downloading all available surface water quality +data for one or multiple monitoring stations. For details see `get_surfacewater_qualities()` * Bugfix for groundwater level and quality due to new Wasserportal API * Add project [AD4GD](https://www.kompetenz-wasser.de/de/forschung/projekte/ad4gd) as funder diff --git a/R/get_overview_options.R b/R/get_overview_options.R index d631108d..2187f4be 100644 --- a/R/get_overview_options.R +++ b/R/get_overview_options.R @@ -17,8 +17,8 @@ get_overview_options <- function() conductivity = "olf", ph = "oph", oxygen_concentration = "oog", - oxygen_saturation = "oos" #, - #quality = "opq" + oxygen_saturation = "oos", + quality = "opq" ), groundwater = list( level = "gws", diff --git a/R/get_surfacewater_qualities.R b/R/get_surfacewater_qualities.R new file mode 100644 index 00000000..2adc2ef7 --- /dev/null +++ b/R/get_surfacewater_qualities.R @@ -0,0 +1,45 @@ +#' Get Surface Water Quality for Multiple Monitoring Stations +#' +#' @param station_ids vector with ids of multiple (or one) monitoring stations +#' @param dbg print debug messages (default: TRUE) +#' @return data frame with water quality data for multiple monitoring stations +#' @export +#' @importFrom dplyr bind_rows +#' @examples +#' \dontrun{ +#' stations <- wasserportal::get_stations() +#' station_ids <- stations$overview_list$surface_water.quality$Messstellennummer +#' swq <- wasserportal::get_surfacewater_qualities(station_ids) +#' str(swq) +#' } +get_surfacewater_qualities <- function(station_ids, dbg = TRUE) { + n_stations <- length(station_ids) + kwb.utils::catAndRun( + messageText = "Downloading surface water quality data", + newLine = 3, + expr = { + swq_list <- lapply( + station_ids, + FUN = function (station_id) { + n <- which(station_id == station_ids) + + kwb.utils::catAndRun( + messageText = sprintf( + "%02d/%02d: station_id = '%s'", + n, + n_stations, + station_id + ), + expr = { + get_surfacewater_quality(station_id) + }, + dbg = dbg + ) + } + ) + }, + dbg = dbg) + + dplyr::bind_rows(swq_list) + +} diff --git a/R/get_surfacewater_quality.R b/R/get_surfacewater_quality.R new file mode 100644 index 00000000..743fc642 --- /dev/null +++ b/R/get_surfacewater_quality.R @@ -0,0 +1,87 @@ +#' Get Surface Water Quality for One Monitoring Station +#' +#' @param station_id id of surface water measurement station +#' +#' @return data frame with water quality data for one monitoring station +#' @export +#' @importFrom kwb.utils stopFormatted +#' @importFrom httr content http_error +#' @importFrom stringr str_detect str_remove +#' @examples +#' \dontrun{ +#' stations <- wasserportal::get_stations() +#' station_id <- stations$overview_list$surface_water.quality$Messstellennummer[1] +#' swq <- wasserportal::get_surfacewater_quality(station_id) +#' str(swq) +#' } +#' +get_surfacewater_quality <- function(station_id) { + + sreihe <- "wa" + stype <- "opq" + exportthema <- "pq" + sdatum <- "01.01.1900" + senddatum <- date_string_de(Sys.Date()) + + url <- paste0( + wasserportal_base_url(), + "/station.php?", + "anzeige=d", # download + "&station=", station_id, + "&sreihe=", sreihe, + "&smode=c", # data format (= csv?) + "&thema=", stype, + "&exportthema=", exportthema, + "&sdatum=", sdatum, + "&senddatum=", senddatum + ) + + # Post the request to the web server + response <- httr::POST(url) + + if (httr::http_error(response)) { + message("POST request failed. Returning the response object.") + return(response) + } + + # Read the response of the web server as text + text <- httr::content(response, as = "text", encoding = "Latin1") + + # Split the text into separate lines + textlines <- strsplit(text, "\n")[[1L]] + + + date_pattern <- "Datum" + start_line <- which(stringr::str_detect(textlines, date_pattern)) + + if (length(start_line) == 0L) { + kwb.utils::stopFormatted( + "Could not find the header row (starting with '%s')", + date_pattern + ) + } + + textlines <- textlines[start_line:length(textlines)] + + # Split the header row into fields + header_fields <- as.character(read(textlines[1L])) %>% + stringr::str_remove("/Parameter:$") + + # Return empty list with metadata if no data rows are available + if (length(textlines) == 1L) { + return(add_wasserportal_metadata(list(), header_fields)) + } + + # Read the data rows + data <- read(text, header = FALSE, skip = start_line) + + # Get the numbers of the data columns + if (stype == "opq") { + stopifnot(ncol(data) == 10L) + } + + # Name the data columns as given in the first columns of the header row + names(data) <- header_fields[seq_len(ncol(data))] + + data +} diff --git a/man/get_surfacewater_qualities.Rd b/man/get_surfacewater_qualities.Rd new file mode 100644 index 00000000..52c7c72c --- /dev/null +++ b/man/get_surfacewater_qualities.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_surfacewater_qualities.R +\name{get_surfacewater_qualities} +\alias{get_surfacewater_qualities} +\title{Get Surface Water Quality for Multiple Monitoring Stations} +\usage{ +get_surfacewater_qualities(station_ids, dbg = TRUE) +} +\arguments{ +\item{station_ids}{vector with ids of multiple (or one) monitoring stations} + +\item{dbg}{print debug messages (default: TRUE)} +} +\value{ +data frame with water quality data for multiple monitoring stations +} +\description{ +Get Surface Water Quality for Multiple Monitoring Stations +} +\examples{ +\dontrun{ +stations <- wasserportal::get_stations() +station_ids <- stations$overview_list$surface_water.quality$Messstellennummer +swq <- wasserportal::get_surfacewater_qualities(station_ids) +str(swq) +} +} diff --git a/man/get_surfacewater_quality.Rd b/man/get_surfacewater_quality.Rd new file mode 100644 index 00000000..0514a5f8 --- /dev/null +++ b/man/get_surfacewater_quality.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_surfacewater_quality.R +\name{get_surfacewater_quality} +\alias{get_surfacewater_quality} +\title{Get Surface Water Quality for One Monitoring Station} +\usage{ +get_surfacewater_quality(station_id) +} +\arguments{ +\item{station_id}{id of surface water measurement station} +} +\value{ +data frame with water quality data for one monitoring station +} +\description{ +Get Surface Water Quality for One Monitoring Station +} +\examples{ +\dontrun{ +stations <- wasserportal::get_stations() +station_id <- stations$overview_list$surface_water.quality$Messstellennummer[1] +swq <- wasserportal::get_surfacewater_quality(station_id) +str(swq) +} + +} From c71546d30403c9d6431e0ff6fd31372b943c4bfa Mon Sep 17 00:00:00 2001 From: mrustl Date: Thu, 4 Apr 2024 16:32:22 +0200 Subject: [PATCH 26/43] Fix to get daily SW data working again! --- R/get_daily_surfacewater_data.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/get_daily_surfacewater_data.R b/R/get_daily_surfacewater_data.R index f4456491..8f259e33 100644 --- a/R/get_daily_surfacewater_data.R +++ b/R/get_daily_surfacewater_data.R @@ -87,7 +87,8 @@ get_daily_surfacewater_data <- function( get_surfacewater_variables <- function() { variables <- unlist(get_overview_options()) - variables[startsWith(names(variables), "surface")] + variables <- variables[startsWith(names(variables), "surface")] + variables[variables != "opq"] } # get_non_external_station_ids ------------------------------------------------- From 8efabcfa15ead36e19006e7f5c36a219e5d1b73d Mon Sep 17 00:00:00 2001 From: mrustl Date: Fri, 5 Apr 2024 09:06:13 +0200 Subject: [PATCH 27/43] Fix DWC logo --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 22b3b5ab..3397fe9e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -11,7 +11,7 @@ authors: alt='Project AD4GD' width='72' /> DWC: href: https://www.kompetenz-wasser.de/en/forschung/projekte/dwc - html: Project DWC IMPETUS: href: https://www.kompetenz-wasser.de/en/forschung/projekte/impetus From 614760bf5cc6b6dd3fbefc5ab443cf0dca6b0e10 Mon Sep 17 00:00:00 2001 From: mrustl Date: Fri, 5 Apr 2024 09:56:47 +0200 Subject: [PATCH 28/43] Fix example for new API --- vignettes/tutorial.Rmd | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 46adb13f..bbaa63a7 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -322,12 +322,11 @@ debug <- FALSE gw_level_multi <- data.table::rbindlist(lapply( stations_list$groundwater.level$Messstellennummer, - function(id) kwb.utils::catAndRun( + function(id) { kwb.utils::catAndRun( sprintf("Downloading Messstellennummer == '%s'", id), - wasserportal::read_wasserportal_raw_gw(station = id, stype = "gwl"), - dbg = debug - ) -)) + wasserportal::read_wasserportal_raw_gw(station = id, stype = "gws"), + dbg = debug) } + )) readr::write_csv(gw_level_multi, file = "groundwater_level.csv") From b40f9e9bf33d2ef5761ce3bdead5922cb2605e9a Mon Sep 17 00:00:00 2001 From: mrustl Date: Fri, 5 Apr 2024 10:27:14 +0200 Subject: [PATCH 29/43] Set default < 1900 as oldest GW level data range to 1869-10-14 --- R/read_wasserportal_raw_gw.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index d1f7e5a2..3281719a 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -162,7 +162,7 @@ get_url_and_body_for_groundwater_data_download <- function( } if (sreihe == "wa") { - sdatum <- "01.01.1900" + sdatum <- "01.01.1850" } # Format the end date (today) From 84479d7b81348f5e4e43ae103fbfedeba0748972 Mon Sep 17 00:00:00 2001 From: mrustl Date: Fri, 5 Apr 2024 11:53:18 +0200 Subject: [PATCH 30/43] Add zip export for SW quality --- vignettes/surface-water.Rmd | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/vignettes/surface-water.Rmd b/vignettes/surface-water.Rmd index 962592a5..1810373c 100644 --- a/vignettes/surface-water.Rmd +++ b/vignettes/surface-water.Rmd @@ -50,7 +50,7 @@ The following surface water master data `.csv` files are available for download: cat_file_enumeration(files) ``` -## Daily Surface Water Data +## Daily Surface Water Data & Overall Surface Water Quality By running the code below all available `daily surface water` data of monitoring stations from Wasserportal Berlin will be downloaded and exported into one `.json` @@ -67,19 +67,31 @@ sw_data_daily_list <- wasserportal::get_daily_surfacewater_data( ) files <- wasserportal::list_timeseries_data_to_zip(sw_data_daily_list) - files # Data availability per parameter sw_data_daily_list %>% dplyr::bind_rows() %>% dplyr::count(Parameter, Einheit) + + +station_ids <- stations$overview_list$surface_water.quality$Messstellennummer + +swq_data <- wasserportal::get_surfacewater_qualities(station_ids) + +files1 <- wasserportal::list_timeseries_data_to_zip( + list("surface-water_quality" = swq_data) + ) + +files2 <- "surface-water_quality.zip" + +file.rename(files1, files2) ``` The following `.zip` files are available for download: ```{r daily_surface_water_data_zip, echo = FALSE, results ='asis'} -cat_file_enumeration(files) +cat_file_enumeration(c(files, files2)) ``` ## Daily Surface Water Levels From 1da08956179ce704b2419eb866847fc018db487b Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 16:38:22 +0200 Subject: [PATCH 31/43] Use "shortcuts" to functions from kwb.utils --- R/get_daily_surfacewater_data.R | 6 +++--- R/get_groundwater_data.R | 6 +++--- R/get_wasserportal_stations.R | 25 ++++++++++++------------- R/read_wasserportal.R | 8 +++++--- R/read_wasserportal_raw.R | 13 +++++-------- R/read_wasserportal_raw_gw.R | 16 +++++++++++----- R/utils.R | 14 +++++++++++++- 7 files changed, 52 insertions(+), 36 deletions(-) diff --git a/R/get_daily_surfacewater_data.R b/R/get_daily_surfacewater_data.R index 8f259e33..3636885c 100644 --- a/R/get_daily_surfacewater_data.R +++ b/R/get_daily_surfacewater_data.R @@ -23,8 +23,8 @@ get_daily_surfacewater_data <- function( ) { #kwb.utils::assignPackageObjects("wasserportal") - overviews <- kwb.utils::selectElements(stations, "overview_list") - crosstable <- kwb.utils::selectElements(stations, "crosstable") + overviews <- select_elements(stations, "overview_list") + crosstable <- select_elements(stations, "crosstable") data_frames <- lapply(names(variables), function(variable_name) { @@ -33,7 +33,7 @@ get_daily_surfacewater_data <- function( kwb.utils::catAndRun(sprintf("Importing '%s'", variable_name), expr = { # data frame with stations at which is measured - station_data <- kwb.utils::selectElements(overviews, variable_name) + station_data <- select_elements(overviews, variable_name) # Identifiers of non-external monitoring stations to loop through station_ids <- get_non_external_station_ids(station_data) diff --git a/R/get_groundwater_data.R b/R/get_groundwater_data.R index c3c2c622..dc2e2127 100644 --- a/R/get_groundwater_data.R +++ b/R/get_groundwater_data.R @@ -30,7 +30,7 @@ get_groundwater_data <- function( #kwb.utils::assignPackageObjects("wasserportal") if (is.null(stations_list)) { - stations_list <- kwb.utils::selectElements(stations, "overview_list") + stations_list <- select_elements(stations, "overview_list") } result <- lapply( @@ -46,8 +46,8 @@ get_groundwater_data <- function( dbg = debug, expr = { ids <- stations_list %>% - kwb.utils::selectElements(option_name) %>% - kwb.utils::selectColumns("Messstellennummer") + select_elements(option_name) %>% + select_columns("Messstellennummer") lapply( X = ids, FUN = function(id) { diff --git a/R/get_wasserportal_stations.R b/R/get_wasserportal_stations.R index 476f4aad..b2b6499d 100644 --- a/R/get_wasserportal_stations.R +++ b/R/get_wasserportal_stations.R @@ -1,29 +1,28 @@ # get_wasserportal_stations ---------------------------------------------------- #' Get Names and IDs of the Stations of wasserportal.berlin.de -#' +#' #' @param type one of "quality", "level", "flow" #' @export get_wasserportal_stations <- function(type = "quality") { if (! is.null(type)) { - type <- match.arg(type, c("quality", "level", "flow")) + type <- match.arg(type, c("quality", "level", "flow")) } - + file <- "stations_wasserportal.csv" - + stations <- readPackageFile(file, fileEncoding = "UTF-8") - - get <- kwb.utils::selectColumns - - stations$id <- as.character(get(stations, "id")) - stations$name <- kwb.utils::substSpecialChars(get(stations, "name")) - + + stations$id <- as.character(select_columns(stations, "id")) + stations$name <- subst_special_chars(select_columns(stations, "name")) + is_available <- if (is.null(type)) { seq_len(nrow(stations)) } else { - nzchar(get(stations, type)) + nzchar(select_columns(stations, type)) } - - kwb.utils::toLookupList(data = get(stations, c("name", "id"))[is_available, ]) + + select_columns(stations, c("name", "id"))[is_available, ] %>% + kwb.utils::toLookupList(data = .) } diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index 6a36ed88..9b2cad46 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -95,7 +95,9 @@ read_wasserportal <- function( #include_raw_time = FALSE #stations_crosstable <- get_stations(type = "crosstable") - station_crosstable <- stations_crosstable[stations_crosstable$Messstellennummer == station, ] + site_ids <- select_columns(stations_crosstable, "Messstellennummer") + + station_crosstable <- stations_crosstable[site_ids == station, ] variable_ids <- get_station_variables(station_crosstable) @@ -177,7 +179,7 @@ read_wasserportal <- function( # merge_raw_results_single ----------------------------------------------------- merge_raw_results_single <- function(dfs, variables, include_raw_time) { - date_vectors <- lapply(dfs, kwb.utils::selectColumns, "LocalDateTime") + date_vectors <- lapply(dfs, select_columns, "LocalDateTime") if (length(variables) > 1 && ! kwb.utils::allAreIdentical(date_vectors)) { message("Not all requests return the same timestamp column:") @@ -189,7 +191,7 @@ merge_raw_results_single <- function(dfs, variables, include_raw_time) "LocalDateTime" ) - backbones <- lapply(dfs, kwb.utils::selectColumns, keys, drop = FALSE) + backbones <- lapply(dfs, select_columns, keys, drop = FALSE) backbone <- unique(do.call(rbind, backbones)) diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index c6d79404..96c12dfc 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -37,10 +37,7 @@ read_wasserportal_raw <- function( from_date <- assert_date(from_date) - station_ids <- kwb.utils::selectColumns( - stations_crosstable, - "Messstellennummer" - ) + station_ids <- select_columns(stations_crosstable, "Messstellennummer") stopifnot(station %in% station_ids) @@ -60,7 +57,7 @@ read_wasserportal_raw <- function( list(single = "ew", daily = "tw", monthly = "mw") } - sreihe <- kwb.utils::selectElements(sreihe_options, type) + sreihe <- select_elements(sreihe_options, type) # Compose the URL and the body for the request if (api_version == 1L) { @@ -75,7 +72,7 @@ read_wasserportal_raw <- function( oos = "s" ) - variable <- kwb.utils::selectElements(variable_mapping, variable) + variable <- select_elements(variable_mapping, variable) variable_ids <- unlist(variable_mapping) url <- get_wasserportal_url(station, variable) @@ -190,7 +187,7 @@ add_wasserportal_metadata <- function(x, header_fields) # clean_timestamp_columns ------------------------------------------------------ clean_timestamp_columns <- function(data, include_raw_time) { - raw_timestamps <- kwb.utils::selectColumns(data, "Datum") + raw_timestamps <- select_columns(data, "Datum") data <- kwb.utils::renameColumns(data, list(Datum = "timestamp_raw")) @@ -259,7 +256,7 @@ repair_wasserportal_timestamps <- function(timestamps, dbg = FALSE) # remove_remaining_duplicates -------------------------------------------------- remove_remaining_duplicates <- function(data) { - timestamps <- kwb.utils::selectColumns(data, "timestamp_corr") + timestamps <- select_columns(data, "timestamp_corr") is_duplicate <- duplicated(timestamps) diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index 3281719a..9f36aded 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -117,7 +117,7 @@ read_wasserportal_raw_gw <- function( ) ) %>% dplyr::filter(!is.na(.data$Messwert)) %>% - kwb.utils::selectColumns(c( + select_columns(c( "Messstellennummer", "Datum", "Parameter", @@ -140,12 +140,18 @@ get_url_and_body_for_groundwater_data_download <- function( ) { sreihe <- if (stype == "gwq") { + "wa" + } else { - kwb.utils::selectElements( - list(single = "w", single_all = "wa", daily = "m", monthly = "j"), - type - ) + + select_elements(elements = type, x = list( + single = "w", + single_all = "wa", + daily = "m", + monthly = "j" + )) + } download_shortcuts <- list(gws = "g", gwq = "q") diff --git a/R/utils.R b/R/utils.R index 409aaaaa..809b4ed2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -43,7 +43,7 @@ assert_date <- function(x) columns_to_labels <- function(data, columns, fmt = "%s: %s", sep = ", ") { do.call(paste, c(list(sep = sep), lapply(columns, function(column) sprintf( - fmt, column, kwb.utils::selectColumns(data, column) + fmt, column, select_columns(data, column) )))) } @@ -91,3 +91,15 @@ readPackageFile <- function(file, ...) { kwb.utils::readPackageFile(file, package = "wasserportal", ...) } + +# select_columns --------------------------------------------------------------- +#' @importFrom kwb.utils selectColumns +select_columns <- kwb.utils::selectColumns + +# select_elements -------------------------------------------------------------- +#' @importFrom kwb.utils selectElements +select_elements <- kwb.utils::selectElements + +# subst_special_chars ---------------------------------------------------------- +#' @importFrom package kwb.utils substSpecialChars +subst_special_chars <- kwb.utils::substSpecialChars From 95797be1fb529d1cda1c1c4ef3d8c3f4effb7d3a Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 16:44:57 +0200 Subject: [PATCH 32/43] Fix Roxygen import directive --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 809b4ed2..23c57e62 100644 --- a/R/utils.R +++ b/R/utils.R @@ -101,5 +101,5 @@ select_columns <- kwb.utils::selectColumns select_elements <- kwb.utils::selectElements # subst_special_chars ---------------------------------------------------------- -#' @importFrom package kwb.utils substSpecialChars +#' @importFrom kwb.utils substSpecialChars subst_special_chars <- kwb.utils::substSpecialChars From a9420a7371616955b6b7f5959b18281da21a7803 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:05:45 +0200 Subject: [PATCH 33/43] Improve get_station_variables() - add documentation, improve error message - return named vector --- R/get_station_variables.R | 23 +++++++++++++++---- R/utils.R | 14 +++++++++++ .../test-function-get_station_variables.R | 17 +++++++++----- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/R/get_station_variables.R b/R/get_station_variables.R index a855925b..b6602d8a 100644 --- a/R/get_station_variables.R +++ b/R/get_station_variables.R @@ -1,6 +1,11 @@ #' Helper function: get available station variables #' -#' @param station_df station_df +#' @param station_df data frame with one row per station and columns +#' "Messstellennummer", "Messstellenname" and additional columns each of which +#' represents a variable that is measured at that station. If the variable +#' columns contain the value "x" it means that the corresponding variable is +#' measured and the name of the column is contained in the returned vector of +#' variable names. #' #' @return returns names of available variables for station #' @export @@ -9,8 +14,16 @@ #' get_station_variables <- function(station_df) { - station_df %>% - dplyr::select_if(function(x){!all(is.na(x))}) %>% - names() %>% - setdiff(c("Messstellennummer", "Messstellenname")) + stopifnot(is.data.frame(station_df)) + + variables <- station_df %>% + kwb.utils::removeColumns(c("Messstellennummer", "Messstellenname")) %>% + kwb.utils::removeEmptyColumns(dbg = FALSE) %>% + names() + + all_variables <- unlist(get_overview_options()) + + stop_if_not_all_in(variables, all_variables, type = "variable code") + + all_variables[match(variables, all_variables)] } diff --git a/R/utils.R b/R/utils.R index 23c57e62..72f0d5c4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -100,6 +100,20 @@ select_columns <- kwb.utils::selectColumns #' @importFrom kwb.utils selectElements select_elements <- kwb.utils::selectElements +# stop_if_not_all_in ----------------------------------------------------------- +stop_if_not_all_in <- function(x, set, type = "element") +{ + is_missing <- !(x %in% set) + + if (any(is_missing)) { + kwb.utils::stopFormatted(kwb.utils::noSuchElements( + x = x[is_missing], + available = set, + type = type + )) + } +} + # subst_special_chars ---------------------------------------------------------- #' @importFrom kwb.utils substSpecialChars subst_special_chars <- kwb.utils::substSpecialChars diff --git a/tests/testthat/test-function-get_station_variables.R b/tests/testthat/test-function-get_station_variables.R index f231aa25..a4bd0d8e 100644 --- a/tests/testthat/test-function-get_station_variables.R +++ b/tests/testthat/test-function-get_station_variables.R @@ -4,12 +4,17 @@ test_that("get_station_variables() works", { expect_error(f()) - station_df <- data.frame( - Messstellennummer = 1:3, - Messstellenname = LETTERS[1:3], - a = 2:4, - b = 3:5 + df1 <- data.frame( + Messstellennummer = 1:2, + Messstellenname = c("a", "b"), + my_var = c("x", NA) ) - expect_identical(f(station_df), c("a", "b")) + df2 <- kwb.utils::renameColumns(df1, list(my_var = "gwq")) + + expect_error(f(df1), "No such variable code") + + result <- f(df2) + + expect_identical(result, c(groundwater.quality = "gwq")) }) From 0ad8d1cdbfea0e776c3686ad73e3e5f3c16a20f3 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:08:08 +0200 Subject: [PATCH 34/43] Improve read_wasserportal_raw() - improve error messages - (hopefully) get rid of NA in text output by setting "variable_ids = variable" --- R/read_wasserportal_raw.R | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 96c12dfc..5cacda5a 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -39,22 +39,30 @@ read_wasserportal_raw <- function( station_ids <- select_columns(stations_crosstable, "Messstellennummer") - stopifnot(station %in% station_ids) + stop_if_not_all_in(station, station_ids) - station_df <- stations_crosstable[station_ids == station, , drop = FALSE] %>% - dplyr::select_if(function(x){!all(is.na(x))}) - - variable_ids <- get_station_variables(station_df) + variable_ids <- get_station_variables( + stations_crosstable[station_ids == station, , drop = FALSE] + ) - stopifnot(variable %in% variable_ids) + stop_if_not_all_in(variable, variable_ids) sreihe_options <- if (api_version == 1L) { - list(single = "w", single_all = "wa", daily = "m", monthly = "j") + + list( + single = "w", + single_all = "wa", + daily = "m", + monthly = "j" + ) + } else { - # ew = Einzelwerte - # tw = Tageswerte - # mw = Monatswerte - list(single = "ew", daily = "tw", monthly = "mw") + + list( + single = "ew", # ew = Einzelwerte + daily = "tw", # tw = Tageswerte + monthly = "mw" # mw = Monatswerte + ) } sreihe <- select_elements(sreihe_options, type) @@ -86,8 +94,6 @@ read_wasserportal_raw <- function( } else { - variable_ids <- "NOT_REQRUIRED_ISNT_IT" - url <- paste0( "https://wasserportal.berlin.de", "/station.php", @@ -104,7 +110,7 @@ read_wasserportal_raw <- function( # Post the request to the web server response <- kwb.utils::catAndRun( - get_wasserportal_text(station, variable, station_ids, variable_ids), + get_wasserportal_text(station, variable, station_ids, variable_ids = variable), httr::POST(url = url, body = body, handle = handle) ) From 3eb7ea63db057316dcaf0a2ac41a840d6b301981 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:09:37 +0200 Subject: [PATCH 35/43] Improve names and errors in read_wasserportal() --- R/read_wasserportal.R | 13 +++-- .../test-function-read_wasserportal.R | 50 +++++++++++++++---- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index 9b2cad46..ae261387 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -95,26 +95,25 @@ read_wasserportal <- function( #include_raw_time = FALSE #stations_crosstable <- get_stations(type = "crosstable") - site_ids <- select_columns(stations_crosstable, "Messstellennummer") + station_ids <- select_columns(stations_crosstable, "Messstellennummer") - station_crosstable <- stations_crosstable[site_ids == station, ] + station_info <- stations_crosstable[station_ids == station, , drop = FALSE] - variable_ids <- get_station_variables(station_crosstable) + variable_ids <- get_station_variables(station_info) if (is.null(variables)) { variables <- variable_ids } - station_ids <- stations_crosstable[["Messstellennummer"]] - - stopifnot(all(station %in% station_ids)) - stopifnot(all(variables %in% variable_ids)) + stop_if_not_all_in(station, station_ids, type = "station id") + stop_if_not_all_in(variables, variable_ids, type = "variable code") names(variables) <- names(variable_ids)[match(variables, variable_ids)] handle <- httr::handle_find(get_wasserportal_url(0, 0)) dfs <- lapply(variables, function(variable) { + #variable <- variables[1L] try(read_wasserportal_raw( variable, station = station, diff --git a/tests/testthat/test-function-read_wasserportal.R b/tests/testthat/test-function-read_wasserportal.R index 746ccbe5..31b93fee 100644 --- a/tests/testthat/test-function-read_wasserportal.R +++ b/tests/testthat/test-function-read_wasserportal.R @@ -1,16 +1,46 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:31.576169. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# - +#library(testthat) test_that("read_wasserportal() works", { + f <- wasserportal::read_wasserportal + + expect_error(f()) + expect_error( - wasserportal:::read_wasserportal() - # argument "stations_crosstable" is missing, with no default + f( + station = "my_station", + variables = "my_variable", + stations_crosstable = data.frame(Messstellennummer = "my_station") + ), + "No such variable code" ) -}) + expect_error( + f( + station = "my_station", + variables = "my_variable", + stations_crosstable = data.frame( + Messstellennummer = "my_station", + my_variable = "x" + ) + ), + "No such variable code" + ) + + expect_output(result <- f( + station = c(my_station = "5865900"), + variables = c(surface_water.water_level = "ows"), + stations_crosstable = data.frame( + Messstellennummer = "5865900", + ows = "x" + ) + )) + + expect_s3_class(result, "data.frame") + expect_identical(names(result), c( + "LocalDateTime", + "UTCOffset", + "surface_water.water_level" + )) + +}) From 4073f15a7090c20626c9ec5cbe5bcdfe7baad687 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:10:04 +0200 Subject: [PATCH 36/43] Add and improve test functions --- ...est-function-get_daily_surfacewater_data.R | 7 ++--- ...test-function-get_surfacewater_qualities.R | 30 ++++++++++++++++++ .../test-function-get_surfacewater_quality.R | 30 ++++++++++++++++++ .../test-function-get_wasserportal_text.R | 5 +++ .../test-function-merge_raw_results_single.R | 31 +++++++++++++------ 5 files changed, 90 insertions(+), 13 deletions(-) create mode 100644 tests/testthat/test-function-get_surfacewater_qualities.R create mode 100644 tests/testthat/test-function-get_surfacewater_quality.R diff --git a/tests/testthat/test-function-get_daily_surfacewater_data.R b/tests/testthat/test-function-get_daily_surfacewater_data.R index b2fae620..b156e1df 100644 --- a/tests/testthat/test-function-get_daily_surfacewater_data.R +++ b/tests/testthat/test-function-get_daily_surfacewater_data.R @@ -12,10 +12,9 @@ test_that("get_daily_surfacewater_data() works", { tmp <- stations$overview_list$surface_water.water_level[1L, ] stations$overview_list$surface_water.water_level <- tmp - expect_warning(capture.output(result <- f( - stations, - variables = c(surface_water.water_level = "ows") - ))) + variables <- c(surface_water.water_level = "ows") + + expect_warning(capture.output(result <- f(stations, variables = variables))) expect_identical( names(result$surface_water.water_level), diff --git a/tests/testthat/test-function-get_surfacewater_qualities.R b/tests/testthat/test-function-get_surfacewater_qualities.R new file mode 100644 index 00000000..8bce8b7c --- /dev/null +++ b/tests/testthat/test-function-get_surfacewater_qualities.R @@ -0,0 +1,30 @@ +#library(testthat) + +test_that("get_surfacewater_qualities() works", { + + f <- wasserportal:::get_surfacewater_qualities + + expect_error(f()) + + stations <- wasserportal::get_stations(type = "list", debug = FALSE) + + station_ids <- stations$surface_water.quality$Messstellennummer[1:2] + + expect_output(result <- f(station_ids)) + + expect_s3_class(result, "data.frame") + + expect_identical(names(result), c( + "Messstelle", + "Messstellennummer", + "Datum", + "Parameter", + "Entnahmetiefe [m]", + "Messmethode", + "Vorzeichen", + "Wert", + "Einheit", + "Bestimmungsgrenze" + )) + +}) diff --git a/tests/testthat/test-function-get_surfacewater_quality.R b/tests/testthat/test-function-get_surfacewater_quality.R new file mode 100644 index 00000000..2d153409 --- /dev/null +++ b/tests/testthat/test-function-get_surfacewater_quality.R @@ -0,0 +1,30 @@ +library(testthat) + +test_that("get_surfacewater_quality() works", { + + f <- wasserportal:::get_surfacewater_quality + + expect_error(f()) + + stations <- wasserportal::get_stations(type = "list", debug = FALSE) + + station_id <- stations$surface_water.quality$Messstellennummer[1L] + + result <- f(station_id) + + expect_s3_class(result, "data.frame") + + expect_identical(names(result), c( + "Messstelle", + "Messstellennummer", + "Datum", + "Parameter", + "Entnahmetiefe [m]", + "Messmethode", + "Vorzeichen", + "Wert", + "Einheit", + "Bestimmungsgrenze" + )) + +}) diff --git a/tests/testthat/test-function-get_wasserportal_text.R b/tests/testthat/test-function-get_wasserportal_text.R index 3a8fce07..ef479f7e 100644 --- a/tests/testthat/test-function-get_wasserportal_text.R +++ b/tests/testthat/test-function-get_wasserportal_text.R @@ -14,4 +14,9 @@ test_that("get_wasserportal_text() works", { ) ) + expect_identical( + "Reading 'variable_2' for station 1 (station_1)", + f(station = 1, variable = 2, station_ids = 1:2, variable_ids = 1:2) + ) + }) diff --git a/tests/testthat/test-function-merge_raw_results_single.R b/tests/testthat/test-function-merge_raw_results_single.R index 974fe34d..b2688d89 100644 --- a/tests/testthat/test-function-merge_raw_results_single.R +++ b/tests/testthat/test-function-merge_raw_results_single.R @@ -1,16 +1,29 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user hsonne on 2023-09-23 23:10:31.576169. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("merge_raw_results_single() works", { - expect_error( - wasserportal:::merge_raw_results_single() - # argument "dfs" is missing, with no default + f <- wasserportal:::merge_raw_results_single + + expect_error(f()) + + df1 <- data.frame( + LocalDateTime = Sys.time(), + a = 1 + ) + + df2 <- data.frame( + LocalDateTime = Sys.time(), + a = 2 ) + df3 <- data.frame( + LocalDateTime = Sys.time(), + a = 3 + ) + + dfs <- list(df1, df2, df3) + + f(dfs, variables = "a", include_raw_time = FALSE) + }) From e73f713500633e06a85d805581cda46e0739b22f Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:52:33 +0200 Subject: [PATCH 37/43] Use new function url_parameter_string() and reuse wasserportal_base_url() --- R/get_surfacewater_quality.R | 18 +++++---- R/get_wasserportal_stations_table.R | 6 +-- R/read_wasserportal_raw.R | 18 +++++---- R/read_wasserportal_raw_gw.R | 37 ++++++++++--------- R/utils.R | 10 +++++ ...st-function-get_non_external_station_ids.R | 2 +- .../test-function-url_parameter_string.R | 10 +++++ 7 files changed, 64 insertions(+), 37 deletions(-) create mode 100644 tests/testthat/test-function-url_parameter_string.R diff --git a/R/get_surfacewater_quality.R b/R/get_surfacewater_quality.R index 743fc642..0301bca7 100644 --- a/R/get_surfacewater_quality.R +++ b/R/get_surfacewater_quality.R @@ -26,14 +26,16 @@ get_surfacewater_quality <- function(station_id) { url <- paste0( wasserportal_base_url(), "/station.php?", - "anzeige=d", # download - "&station=", station_id, - "&sreihe=", sreihe, - "&smode=c", # data format (= csv?) - "&thema=", stype, - "&exportthema=", exportthema, - "&sdatum=", sdatum, - "&senddatum=", senddatum + url_parameter_string( + anzeige = "d", # download + station = station_id, + sreihe = sreihe, + smode = "c", # data format (= csv?) + thema = stype, + exportthema = exportthema, + sdatum = sdatum, + senddatum = senddatum + ) ) # Post the request to the web server diff --git a/R/get_wasserportal_stations_table.R b/R/get_wasserportal_stations_table.R index 2f35b916..26c0fe1a 100644 --- a/R/get_wasserportal_stations_table.R +++ b/R/get_wasserportal_stations_table.R @@ -27,10 +27,10 @@ get_wasserportal_stations_table <- function ( type <- match.arg(type, unlist(get_overview_options())) } - overview_url <- sprintf( - "%s/messwerte.php?anzeige=tabelle&thema=%s", + overview_url <- paste0( url_wasserportal, - type + "/messwerte.php?", + url_parameter_string(anzeige = "tabelle", thema = type) ) html <- xml2::read_html(overview_url) diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 5cacda5a..0041f03e 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -95,14 +95,16 @@ read_wasserportal_raw <- function( } else { url <- paste0( - "https://wasserportal.berlin.de", - "/station.php", - "?anzeige=d", # = download - "&station=", station, - "&thema=", variable, # type of measurement - "&sreihe=", sreihe, # type of time value - "&smode=c", # output format: csv (?) - "&sdatum=", date_string_de(from_date) # start date + wasserportal_base_url(), + "/station.php?", + url_parameter_string( + anzeige = "d", # = download + station = station, + thema = variable, # type of measurement + sreihe = sreihe, # type of time value + smode = "c", # output format: csv (?) + sdatum = date_string_de(from_date) # start date + ) ) body <- list() diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index 9f36aded..3ed3fe97 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -176,11 +176,9 @@ get_url_and_body_for_groundwater_data_download <- function( if (api_version == 1L) { - url <- sprintf( - "%s/station.php?anzeige=%sd&sstation=%s", - wasserportal_base_url(), - download_shortcut, - station + url_parameters <- list( + anzeige = download_shortcut, + sstation = station ) # Compose the body of the request @@ -194,21 +192,26 @@ get_url_and_body_for_groundwater_data_download <- function( } else { - url <- paste0( - wasserportal_base_url(), - "/station.php?", - "anzeige=d", # download - "&station=", station, - "&sreihe=", sreihe, - "&smode=c", # data format (= csv?) - "&thema=", stype, - "&exportthema=gw", - "&sdatum=", sdatum, - "&senddatum=", senddatum + url_parameters <- list( + anzeige = "d", # download + station = station, + sreihe = sreihe, + smode = "c", # data format (= csv?) + thema = stype, + exportthema = "gw", + sdatum = sdatum, + senddatum = senddatum ) body <- list() } - list(url = url, body = body) + list( + url = paste0( + wasserportal_base_url(), + "/station.php?", + do.call(url_parameter_string, url_parameters) + ), + body = body + ) } diff --git a/R/utils.R b/R/utils.R index 72f0d5c4..91b1b3f4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -117,3 +117,13 @@ stop_if_not_all_in <- function(x, set, type = "element") # subst_special_chars ---------------------------------------------------------- #' @importFrom kwb.utils substSpecialChars subst_special_chars <- kwb.utils::substSpecialChars + +# url_parameter_string --------------------------------------------------------- +url_parameter_string <- function(...) +{ + arguments <- list(...) + + stopifnot(!any(kwb.utils::is.unnamed(arguments))) + + paste(names(arguments), arguments, sep = "=", collapse = "&") +} diff --git a/tests/testthat/test-function-get_non_external_station_ids.R b/tests/testthat/test-function-get_non_external_station_ids.R index 80293c29..77ff63cb 100644 --- a/tests/testthat/test-function-get_non_external_station_ids.R +++ b/tests/testthat/test-function-get_non_external_station_ids.R @@ -6,7 +6,7 @@ test_that("get_non_external_station_ids() works", { expect_error(f()) - portal_url <- "https://wasserportal.berlin.de" + portal_url <- wasserportal::wasserportal_base_url() station_data <- read.table(sep = ",", header = TRUE, text = " Messstellennummer,Betreiber,stammdaten_link diff --git a/tests/testthat/test-function-url_parameter_string.R b/tests/testthat/test-function-url_parameter_string.R new file mode 100644 index 00000000..aa18cd9a --- /dev/null +++ b/tests/testthat/test-function-url_parameter_string.R @@ -0,0 +1,10 @@ +#library(testthat) +test_that("url_parameter_string() works", { + + f <- wasserportal:::url_parameter_string + + expect_identical(f(), "") + + expect_identical(f(a = 1, b = 2), "a=1&b=2") + expect_identical(f(a = 1, b = "abc"), "a=1&b=abc") +}) From 29b62689877bff51bfc127ba8f295cb00f803082 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:54:01 +0200 Subject: [PATCH 38/43] Add test for stop_if_not_all_in() --- tests/testthat/test-function-stop_if_not_all_in.R | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/testthat/test-function-stop_if_not_all_in.R diff --git a/tests/testthat/test-function-stop_if_not_all_in.R b/tests/testthat/test-function-stop_if_not_all_in.R new file mode 100644 index 00000000..8a7d04fd --- /dev/null +++ b/tests/testthat/test-function-stop_if_not_all_in.R @@ -0,0 +1,8 @@ +test_that("stop_if_not_all_in() works", { + + f <- wasserportal:::stop_if_not_all_in + + expect_error(f()) + + expect_error(f("a", c("b", "c"), type = "animal"), "No such animal") +}) From 999bca2a48b565592d4194a396553994cbe80470 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 20:59:01 +0200 Subject: [PATCH 39/43] Extract and reuse split_into_lines() --- R/get_surfacewater_quality.R | 3 +-- R/read_wasserportal_raw.R | 2 +- R/read_wasserportal_raw_gw.R | 2 +- R/utils.R | 8 ++++++++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/get_surfacewater_quality.R b/R/get_surfacewater_quality.R index 0301bca7..1b73ddfc 100644 --- a/R/get_surfacewater_quality.R +++ b/R/get_surfacewater_quality.R @@ -50,8 +50,7 @@ get_surfacewater_quality <- function(station_id) { text <- httr::content(response, as = "text", encoding = "Latin1") # Split the text into separate lines - textlines <- strsplit(text, "\n")[[1L]] - + textlines <- split_into_lines(text) date_pattern <- "Datum" start_line <- which(stringr::str_detect(textlines, date_pattern)) diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 0041f03e..4be794a0 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -130,7 +130,7 @@ read_wasserportal_raw <- function( } # Split the text into separate lines - textlines <- strsplit(text, "\n")[[1L]] + textlines <- split_into_lines(text) # Split the header row into fields header_fields <- as.character(read(textlines[1L])) diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index 3ed3fe97..61a71623 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -45,7 +45,7 @@ read_wasserportal_raw_gw <- function( text <- httr::content(response, as = "text", encoding = "Latin1") # Split the text into separate lines - textlines <- strsplit(text, "\n")[[1L]] + textlines <- split_into_lines(text) date_pattern <- "Datum" start_line <- which(startsWith(textlines, date_pattern)) diff --git a/R/utils.R b/R/utils.R index 91b1b3f4..ada7a636 100644 --- a/R/utils.R +++ b/R/utils.R @@ -100,6 +100,14 @@ select_columns <- kwb.utils::selectColumns #' @importFrom kwb.utils selectElements select_elements <- kwb.utils::selectElements +# split_into_lines ------------------------------------------------------------- +split_into_lines <- function(text) +{ + stopifnot(is.character(x), length(x) == 1L) + + strsplit(text, "\n")[[1L]] +} + # stop_if_not_all_in ----------------------------------------------------------- stop_if_not_all_in <- function(x, set, type = "element") { From 0c3f94d08ac2adcd31d153600392f93113f5c69c Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 21:06:47 +0200 Subject: [PATCH 40/43] Fix and test split_into_lines() --- R/utils.R | 4 ++-- tests/testthat/test-function-split_into_lines.R | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test-function-split_into_lines.R diff --git a/R/utils.R b/R/utils.R index ada7a636..57ac6cc9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -101,11 +101,11 @@ select_columns <- kwb.utils::selectColumns select_elements <- kwb.utils::selectElements # split_into_lines ------------------------------------------------------------- -split_into_lines <- function(text) +split_into_lines <- function(x) { stopifnot(is.character(x), length(x) == 1L) - strsplit(text, "\n")[[1L]] + strsplit(x, "\n")[[1L]] } # stop_if_not_all_in ----------------------------------------------------------- diff --git a/tests/testthat/test-function-split_into_lines.R b/tests/testthat/test-function-split_into_lines.R new file mode 100644 index 00000000..ca464405 --- /dev/null +++ b/tests/testthat/test-function-split_into_lines.R @@ -0,0 +1,10 @@ +#library(testthat) +test_that("split_into_lines() works", { + + f <- wasserportal:::split_into_lines + + expect_error(f()) + expect_error(f(1)) + expect_error(f(c("a", "b"))) + expect_identical(f("a\nb"), c("a", "b")) +}) From e40f0aad669bfa8415848c5f221254230708f69d Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 21:31:51 +0200 Subject: [PATCH 41/43] Extract get_text_response_of_httr_post_request() --- R/get_surfacewater_quality.R | 12 +----------- R/helpers.R | 33 +++++++++++++++++++++++++++++++++ R/read_wasserportal.R | 2 +- R/read_wasserportal_raw.R | 25 ++++++++++++------------- R/read_wasserportal_raw_gw.R | 15 +++++---------- 5 files changed, 52 insertions(+), 35 deletions(-) diff --git a/R/get_surfacewater_quality.R b/R/get_surfacewater_quality.R index 1b73ddfc..80489e4a 100644 --- a/R/get_surfacewater_quality.R +++ b/R/get_surfacewater_quality.R @@ -5,7 +5,6 @@ #' @return data frame with water quality data for one monitoring station #' @export #' @importFrom kwb.utils stopFormatted -#' @importFrom httr content http_error #' @importFrom stringr str_detect str_remove #' @examples #' \dontrun{ @@ -38,16 +37,7 @@ get_surfacewater_quality <- function(station_id) { ) ) - # Post the request to the web server - response <- httr::POST(url) - - if (httr::http_error(response)) { - message("POST request failed. Returning the response object.") - return(response) - } - - # Read the response of the web server as text - text <- httr::content(response, as = "text", encoding = "Latin1") + text <- get_text_response_of_httr_post_request(url) # Split the text into separate lines textlines <- split_into_lines(text) diff --git a/R/helpers.R b/R/helpers.R index 62425731..7d58ac45 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,3 +1,36 @@ +# get_text_response_of_httr_post_request --------------------------------------- +#' @importFrom httr content http_error POST +get_text_response_of_httr_post_request <- function( + url, + body = NULL, + handle = NULL, + text = paste("Sending POST request to", url), + dbg = FALSE, + encoding = "Latin1" +) +{ + kwb.utils::catAndRun( + text, + dbg = dbg, + expr = { + + # Post the request to the web server + response <- httr::POST(url, body = body, handle = handle) + + if (httr::http_error(response)) { + + message("POST request failed. Returning the response object.") + response + + } else { + + # Read the response of the web server as text + httr::content(response, as = "text", encoding = encoding) + } + } + ) +} + # is_external_link ------------------------------------------------------------- is_external_link <- function(url) { diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index ae261387..6b809683 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -27,7 +27,7 @@ #' \code{\link{get_stations}(type = "crosstable")} #' @return data frame read from the CSV file that the download provides. #' IMPORTANT: It is not yet clear how to interpret the timestamp, see example -#' @importFrom httr POST content +#' @importFrom httr handle_find #' @importFrom utils read.table #' @export #' @examples diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 4be794a0..764d33d3 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -16,7 +16,6 @@ #' @export #' @importFrom kwb.utils catAndRun selectColumns selectElements #' @importFrom kwb.datetime textToEuropeBerlinPosix -#' @importFrom httr content POST read_wasserportal_raw <- function( variable, station, @@ -110,20 +109,20 @@ read_wasserportal_raw <- function( body <- list() } - # Post the request to the web server - response <- kwb.utils::catAndRun( - get_wasserportal_text(station, variable, station_ids, variable_ids = variable), - httr::POST(url = url, body = body, handle = handle) + text <- kwb.utils::catAndRun( + get_wasserportal_text( + station, + variable, + station_ids, + variable_ids = variable + ), + expr = get_text_response_of_httr_post_request( + url, + body = body, + handle = handle + ) ) - if (httr::http_error(response)) { - message("POST request failed. Returning the response object.") - return(response) - } - - # Read the response of the web server as text - text <- httr::content(response, as = "text", encoding = "Latin1") - if (text == "") { message("Wasserportal returned an empty string. Returning NULL.") return(NULL) diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index 61a71623..a5494469 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -33,16 +33,11 @@ read_wasserportal_raw_gw <- function( stype, type, station, from_date ) - # Post the request to the web server - response <- httr::POST(info$url, body = info$body, handle = handle) - - if (httr::http_error(response)) { - message("POST request failed. Returning the response object.") - return(response) - } - - # Read the response of the web server as text - text <- httr::content(response, as = "text", encoding = "Latin1") + text <- get_text_response_of_httr_post_request( + url = info$url, + body = info$body, + handle = handle + ) # Split the text into separate lines textlines <- split_into_lines(text) From f2b02aeaf59382505a5e69b8fec88595ab0c9cdd Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 21:40:11 +0200 Subject: [PATCH 42/43] Use "shortcut" to kwb.utils::catAndRun() --- R/.test-surface-water_download.R | 2 +- R/get_daily_surfacewater_data.R | 3 +-- R/get_groundwater_data.R | 5 ++--- R/get_stations.R | 3 +-- R/get_surfacewater_qualities.R | 5 ++--- R/get_wasserportal_masters_data.R | 2 +- R/helpers.R | 2 +- R/list_data_to_csv_or_zip.R | 2 +- R/read_wasserportal.R | 2 +- R/read_wasserportal_raw.R | 3 +-- R/utils.R | 3 +++ 11 files changed, 15 insertions(+), 17 deletions(-) diff --git a/R/.test-surface-water_download.R b/R/.test-surface-water_download.R index 816e4544..6b42ebcb 100644 --- a/R/.test-surface-water_download.R +++ b/R/.test-surface-water_download.R @@ -35,7 +35,7 @@ if (FALSE) sw_stations_berlin_daily <- stations_crosstable_berlin %>% kwb.utils::selectColumns("Messstellennummer") %>% - lapply(function(station) kwb.utils::catAndRun( + lapply(function(station) cat_and_run( sprintf("Fetching data for station '%s'", station), expr = wasserportal::read_wasserportal( station = station, diff --git a/R/get_daily_surfacewater_data.R b/R/get_daily_surfacewater_data.R index 3636885c..2e29cfbb 100644 --- a/R/get_daily_surfacewater_data.R +++ b/R/get_daily_surfacewater_data.R @@ -13,7 +13,6 @@ #' variables #' sw_data_daily <- wasserportal::get_daily_surfacewater_data(stations, variables) #' } -#' @importFrom kwb.utils catAndRun #' @importFrom dplyr bind_rows filter pull #' @importFrom stats setNames get_daily_surfacewater_data <- function( @@ -30,7 +29,7 @@ get_daily_surfacewater_data <- function( #variable_name <- names(variables)[1L] - kwb.utils::catAndRun(sprintf("Importing '%s'", variable_name), expr = { + cat_and_run(sprintf("Importing '%s'", variable_name), expr = { # data frame with stations at which is measured station_data <- select_elements(overviews, variable_name) diff --git a/R/get_groundwater_data.R b/R/get_groundwater_data.R index dc2e2127..ba86bc71 100644 --- a/R/get_groundwater_data.R +++ b/R/get_groundwater_data.R @@ -12,7 +12,6 @@ #' frames #' @export #' @importFrom stats setNames -#' @importFrom kwb.utils catAndRun #' @importFrom data.table rbindlist #' @examples #' \dontrun{ @@ -38,7 +37,7 @@ get_groundwater_data <- function( FUN = function(i) { option_key <- groundwater_options[i] option_name <- names(option_key) - kwb.utils::catAndRun( + cat_and_run( messageText = sprintf( "Importing '%s' data (%d/%d)", option_name, i, length(groundwater_options) @@ -51,7 +50,7 @@ get_groundwater_data <- function( lapply( X = ids, FUN = function(id) { - kwb.utils::catAndRun( + cat_and_run( sprintf( "Downloading Messstellennummer '%s' (%d/%d)", id, which(id == ids), length(ids) diff --git a/R/get_stations.R b/R/get_stations.R index 73e4e4d1..99744af1 100644 --- a/R/get_stations.R +++ b/R/get_stations.R @@ -15,7 +15,6 @@ #' @export #' @importFrom data.table rbindlist #' @importFrom dplyr left_join mutate select -#' @importFrom kwb.utils catAndRun #' @importFrom parallel makeCluster parLapply stopCluster #' @importFrom rlang .data #' @importFrom tidyr pivot_wider separate @@ -50,7 +49,7 @@ get_stations <- function( } # Loop through overview_options, either in parallel or sequentially - overview_list <- kwb.utils::catAndRun( + overview_list <- cat_and_run( sprintf( "Importing %d station overviews from Wasserportal Berlin", length(overview_options) diff --git a/R/get_surfacewater_qualities.R b/R/get_surfacewater_qualities.R index 2adc2ef7..f5240078 100644 --- a/R/get_surfacewater_qualities.R +++ b/R/get_surfacewater_qualities.R @@ -14,7 +14,7 @@ #' } get_surfacewater_qualities <- function(station_ids, dbg = TRUE) { n_stations <- length(station_ids) - kwb.utils::catAndRun( + cat_and_run( messageText = "Downloading surface water quality data", newLine = 3, expr = { @@ -22,8 +22,7 @@ get_surfacewater_qualities <- function(station_ids, dbg = TRUE) { station_ids, FUN = function (station_id) { n <- which(station_id == station_ids) - - kwb.utils::catAndRun( + cat_and_run( messageText = sprintf( "%02d/%02d: station_id = '%s'", n, diff --git a/R/get_wasserportal_masters_data.R b/R/get_wasserportal_masters_data.R index 5db9501b..6273de1b 100644 --- a/R/get_wasserportal_masters_data.R +++ b/R/get_wasserportal_masters_data.R @@ -44,7 +44,7 @@ get_wasserportal_masters_data <- function( try(get_wasserportal_master_data(master_url)) } - master_list <- kwb.utils::catAndRun( + master_list <- cat_and_run( messageText = sprintf( "Importing %d station metadata from Wasserportal Berlin", length(master_urls) diff --git a/R/helpers.R b/R/helpers.R index 7d58ac45..14c7e097 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -9,7 +9,7 @@ get_text_response_of_httr_post_request <- function( encoding = "Latin1" ) { - kwb.utils::catAndRun( + cat_and_run( text, dbg = dbg, expr = { diff --git a/R/list_data_to_csv_or_zip.R b/R/list_data_to_csv_or_zip.R index d9455ae6..8273108e 100644 --- a/R/list_data_to_csv_or_zip.R +++ b/R/list_data_to_csv_or_zip.R @@ -23,7 +23,7 @@ list_data_to_csv_or_zip <- function(data_list, file_prefix, to_zip) filename <- ifelse(to_zip, filename_zip, filename_csv) - kwb.utils::catAndRun( + cat_and_run( messageText = sprintf("Writing '%s'", filename), expr = { diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index 6b809683..b40f6372 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -131,7 +131,7 @@ read_wasserportal <- function( }) if (any(failed)) { - kwb.utils::catAndRun( + cat_and_run( sprintf( "Removing %d elements that are empty or failed (variables: %s)", sum(failed), diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 764d33d3..1ca03b3b 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -14,7 +14,6 @@ #' wasserportal's API. 1L: before 2023, 2L: since 2023. Default: 2L #' @return ???? #' @export -#' @importFrom kwb.utils catAndRun selectColumns selectElements #' @importFrom kwb.datetime textToEuropeBerlinPosix read_wasserportal_raw <- function( variable, @@ -109,7 +108,7 @@ read_wasserportal_raw <- function( body <- list() } - text <- kwb.utils::catAndRun( + text <- cat_and_run( get_wasserportal_text( station, variable, diff --git a/R/utils.R b/R/utils.R index 57ac6cc9..a353231c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,6 +25,9 @@ assert_date <- function(x) result } +# cat_and_run ------------------------------------------------------------------ +#' @importFrom kwb.utils catAndRun +cat_and_run <- kwb.utils::catAndRun # columns_to_labels ------------------------------------------------------------ #' Create Text Labels from Data Frame Columns #' From e2632a31ce31bd91dad970d5dded68a772744b41 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Apr 2024 21:52:18 +0200 Subject: [PATCH 43/43] Use more "shortcuts" to kwb.utils' functions --- R/get_daily_surfacewater_data.R | 7 +-- R/get_station_variables.R | 4 +- R/get_surfacewater_quality.R | 3 +- R/get_wasserportal_masters_data.R | 9 ++- R/get_wasserportal_stations.R | 2 +- R/get_wasserportal_stations_table.R | 9 ++- R/read_wasserportal.R | 20 +++---- R/read_wasserportal_raw.R | 14 ++--- R/read_wasserportal_raw_gw.R | 2 +- R/utils.R | 89 +++++++++++++++++++++++++++-- inst/extdata/test_wasserportal.R | 2 +- 11 files changed, 116 insertions(+), 45 deletions(-) diff --git a/R/get_daily_surfacewater_data.R b/R/get_daily_surfacewater_data.R index 2e29cfbb..e28db259 100644 --- a/R/get_daily_surfacewater_data.R +++ b/R/get_daily_surfacewater_data.R @@ -94,10 +94,10 @@ get_surfacewater_variables <- function() get_non_external_station_ids <- function(station_data) { # Function to safely select columns from station_data - pull <- kwb.utils::createAccessor(station_data) + pull <- create_accessor(station_data) is_external <- is_external_link(pull("stammdaten_link")) - is_berlin <- kwb.utils::defaultIfNA(pull("Betreiber"), "") == "Land Berlin" + is_berlin <- default_if_na(pull("Betreiber"), "") == "Land Berlin" # Identifiers of monitoring stations to loop through as.character(pull("Messstellennummer")[is_berlin & !is_external]) @@ -115,7 +115,6 @@ get_non_external_station_ids <- function(station_data) #' @importFrom stringr str_detect str_split_fixed #' @importFrom tibble tibble #' @importFrom dplyr bind_cols bind_rows -#' @importFrom kwb.utils getAttribute sw_data_list_to_df <- function (sw_data_list) { # Helper function to split parameter string into parameter and unit @@ -136,7 +135,7 @@ sw_data_list_to_df <- function (sw_data_list) # Get its metadata metadata <- if (!is.null(data)) { - kwb.utils::getAttribute(data, "metadata") + get_attribute(data, "metadata") } else { message(sprintf( "Empty data frame when looping through '%s' in %s", diff --git a/R/get_station_variables.R b/R/get_station_variables.R index b6602d8a..52fbb7d1 100644 --- a/R/get_station_variables.R +++ b/R/get_station_variables.R @@ -17,8 +17,8 @@ get_station_variables <- function(station_df) stopifnot(is.data.frame(station_df)) variables <- station_df %>% - kwb.utils::removeColumns(c("Messstellennummer", "Messstellenname")) %>% - kwb.utils::removeEmptyColumns(dbg = FALSE) %>% + remove_columns(c("Messstellennummer", "Messstellenname")) %>% + remove_empty_columns(dbg = FALSE) %>% names() all_variables <- unlist(get_overview_options()) diff --git a/R/get_surfacewater_quality.R b/R/get_surfacewater_quality.R index 80489e4a..4caa3aa2 100644 --- a/R/get_surfacewater_quality.R +++ b/R/get_surfacewater_quality.R @@ -4,7 +4,6 @@ #' #' @return data frame with water quality data for one monitoring station #' @export -#' @importFrom kwb.utils stopFormatted #' @importFrom stringr str_detect str_remove #' @examples #' \dontrun{ @@ -46,7 +45,7 @@ get_surfacewater_quality <- function(station_id) { start_line <- which(stringr::str_detect(textlines, date_pattern)) if (length(start_line) == 0L) { - kwb.utils::stopFormatted( + stop_formatted( "Could not find the header row (starting with '%s')", date_pattern ) diff --git a/R/get_wasserportal_masters_data.R b/R/get_wasserportal_masters_data.R index 6273de1b..9a77c9c6 100644 --- a/R/get_wasserportal_masters_data.R +++ b/R/get_wasserportal_masters_data.R @@ -56,7 +56,7 @@ get_wasserportal_masters_data <- function( } ) - failed <- sapply(master_list, kwb.utils::isTryError) + failed <- sapply(master_list, is_try_error) if (any(failed)) { message("Failed fetching data from the following URLs:") @@ -72,7 +72,6 @@ get_wasserportal_masters_data <- function( #' \code{\link{get_wasserportal_stations_table}} #' @return data frame with metadata for selected station #' @importFrom dplyr mutate rename -#' @importFrom kwb.utils stopFormatted #' @importFrom rlang .data #' @importFrom tidyr pivot_wider #' @export @@ -108,13 +107,13 @@ get_wasserportal_master_data <- function(master_url) rvest::html_table() if (nrow(master_table) == 0L) { - kwb.utils::stopFormatted("No master table available at '%s'", master_url) + stop_formatted("No master table available at '%s'", master_url) } master_table %>% dplyr::rename("key" = "X1", "value" = "X2") %>% dplyr::mutate(key = stringr::str_remove_all(.data$key, "-")) %>% - dplyr::mutate(key = kwb.utils::substSpecialChars(.data$key)) %>% + dplyr::mutate(key = subst_special_chars(.data$key)) %>% tidyr::pivot_wider(names_from = "key", values_from = "value") } @@ -123,7 +122,7 @@ stop_on_external_data_provider <- function(url) { if (is_external_link(url)) { - kwb.utils::stopFormatted( + stop_formatted( paste0( "The master_url '%s' you provided refers to an external ", "data provider. Currently only master data within '%s' can be ", diff --git a/R/get_wasserportal_stations.R b/R/get_wasserportal_stations.R index b2b6499d..44bccde1 100644 --- a/R/get_wasserportal_stations.R +++ b/R/get_wasserportal_stations.R @@ -24,5 +24,5 @@ get_wasserportal_stations <- function(type = "quality") } select_columns(stations, c("name", "id"))[is_available, ] %>% - kwb.utils::toLookupList(data = .) + to_lookup_list(data = .) } diff --git a/R/get_wasserportal_stations_table.R b/R/get_wasserportal_stations_table.R index 26c0fe1a..57e86d78 100644 --- a/R/get_wasserportal_stations_table.R +++ b/R/get_wasserportal_stations_table.R @@ -6,7 +6,6 @@ #' \code{\link{wasserportal_base_url}} #' @return data frame with master data of selected monitoring stations #' @export -#' @importFrom kwb.utils substSpecialChars #' @importFrom rvest html_node html_table html_nodes html_attr #' @importFrom stringr str_remove_all #' @importFrom xml2 read_html @@ -80,14 +79,14 @@ get_wasserportal_stations_table <- function ( # # different from those in column "Messstellennummer". Adapt the links in # # column "Ganglinie" before "merging" them with the links in column # # "Messstellennummer". - # hrefs_graph <- kwb.utils::multiSubstitute(hrefs_graph, list( + # hrefs_graph <- multi_substitute(hrefs_graph, list( # "anzeige=[^&]+" = "anzeige=i", # "stable=gwq" = "stable=gws" # )) # # # "Merge" hrefs_id with hrefs_graph: Use hrefs_id if not NA else hrefs_graph # # and warn if both are given but different - # hrefs <- kwb.utils::parallelNonNA(hrefs_id, hrefs_graph) + # hrefs <- parallel_non_na(hrefs_id, hrefs_graph) # # # Report about differing hrefs in the two columns # #print_invalid_hrefs(hrefs) @@ -95,7 +94,7 @@ get_wasserportal_stations_table <- function ( # Prefix the wasserportal-related hyperlinks with the wasserportal base URL add_baseurl <- function(hrefs) { - is_not_na <- ! kwb.utils::isNaOrEmpty(hrefs) + is_not_na <- !is_na_or_empty(hrefs) if(sum(is_not_na) > 0) { is_wasserportal <- startsWith(hrefs, "station.php") & is_not_na @@ -116,7 +115,7 @@ get_wasserportal_stations_table <- function ( names(overview_table) <- names(overview_table) %>% stringr::str_remove_all("-") %>% - kwb.utils::substSpecialChars() + subst_special_chars() dplyr::bind_cols( diff --git a/R/read_wasserportal.R b/R/read_wasserportal.R index b40f6372..aa4c2b5f 100644 --- a/R/read_wasserportal.R +++ b/R/read_wasserportal.R @@ -127,7 +127,7 @@ read_wasserportal <- function( # Remove elements of class "response" that are returned in case of an error failed <- sapply(dfs, function(df) { - kwb.utils::isTryError(df) || inherits(df, "response") || length(df) == 0 + is_try_error(df) || inherits(df, "response") || length(df) == 0 }) if (any(failed)) { @@ -135,7 +135,7 @@ read_wasserportal <- function( sprintf( "Removing %d elements that are empty or failed (variables: %s)", sum(failed), - kwb.utils::stringList(variables[failed]) + string_list(variables[failed]) ), expr = { failures <- dfs[failed] @@ -166,7 +166,7 @@ read_wasserportal <- function( stop("type must be one of 'single', 'daily', 'monthly'") } - metadata <- lapply(dfs, kwb.utils::getAttribute, "metadata") + metadata <- lapply(dfs, get_attribute, "metadata") structure( result, @@ -180,9 +180,9 @@ merge_raw_results_single <- function(dfs, variables, include_raw_time) { date_vectors <- lapply(dfs, select_columns, "LocalDateTime") - if (length(variables) > 1 && ! kwb.utils::allAreIdentical(date_vectors)) { + if (length(variables) > 1 && ! all_are_identical(date_vectors)) { message("Not all requests return the same timestamp column:") - kwb.utils::printIf(TRUE, lengths(date_vectors)) + print_if(TRUE, lengths(date_vectors)) } keys <- c( @@ -200,11 +200,9 @@ merge_raw_results_single <- function(dfs, variables, include_raw_time) data_frames <- c(list(base = backbone), dfs) - result <- kwb.utils::mergeAll( - data_frames, by = keys, all.x = TRUE, dbg = FALSE - ) + result <- merge_all(data_frames, by = keys, all.x = TRUE, dbg = FALSE) - result <- kwb.utils::removeColumns(result[order(result$row), ], "row.base") + result <- remove_columns(result[order(result$row), ], "row.base") names(result) <- gsub("Einzelwert\\.", "", names(result)) @@ -213,9 +211,7 @@ merge_raw_results_single <- function(dfs, variables, include_raw_time) DateTimeUTC = format(result$LocalDateTime, tz = "UTC") ) - kwb.utils::insertColumns( - result, after = "LocalDateTime", UTCOffset = utc_offset - ) + insert_columns(result, after = "LocalDateTime", UTCOffset = utc_offset) } # merge_raw_results_daily ------------------------------------------------------ diff --git a/R/read_wasserportal_raw.R b/R/read_wasserportal_raw.R index 1ca03b3b..7f3fca86 100644 --- a/R/read_wasserportal_raw.R +++ b/R/read_wasserportal_raw.R @@ -170,7 +170,7 @@ get_wasserportal_url <- function(station, variable) get_wasserportal_text <- function(station, variable, station_ids, variable_ids) { default_names <- function(ids, prefix) { - kwb.utils::defaultIfNULL(names(ids), paste0(prefix, ids)) + default_if_null(names(ids), paste0(prefix, ids)) } variable_names <- default_names(variable_ids, "variable_") @@ -195,7 +195,7 @@ clean_timestamp_columns <- function(data, include_raw_time) { raw_timestamps <- select_columns(data, "Datum") - data <- kwb.utils::renameColumns(data, list(Datum = "timestamp_raw")) + data <- rename_columns(data, list(Datum = "timestamp_raw")) data$timestamp_corr <- repair_wasserportal_timestamps(raw_timestamps) @@ -212,10 +212,10 @@ clean_timestamp_columns <- function(data, include_raw_time) keys <- c("timestamp_raw", "timestamp_corr", "LocalDateTime") - data <- kwb.utils::moveColumnsToFront(data, keys) + data <- move_columns_to_front(data, keys) if (! include_raw_time) { - data <- kwb.utils::removeColumns(data, keys[1:2]) + data <- remove_columns(data, keys[1:2]) } remove_timestep_outliers(data, data$LocalDateTime, 60 * 15) @@ -234,13 +234,13 @@ repair_wasserportal_timestamps <- function(timestamps, dbg = FALSE) stopifnot(all(lengths(index_pairs) == 2L)) - first_indices <- sapply(index_pairs, kwb.utils::firstElement) + first_indices <- sapply(index_pairs, first_element) if (dbg && ! all(is_expected <- grepl(" 03", timestamps[first_indices]))) { message( "There are unexpected duplicated timestamps: ", - kwb.utils::stringList(timestamps[first_indices][! is_expected]) + string_list(timestamps[first_indices][! is_expected]) ) } @@ -250,7 +250,7 @@ repair_wasserportal_timestamps <- function(timestamps, dbg = FALSE) indices <- sort(unlist(index_pairs)) - kwb.utils::printIf(dbg, caption = "After timestamp repair", data.frame( + print_if(dbg, caption = "After timestamp repair", data.frame( row = indices, old = timestamps_old[indices], new = timestamps[indices] diff --git a/R/read_wasserportal_raw_gw.R b/R/read_wasserportal_raw_gw.R index a5494469..07e700b4 100644 --- a/R/read_wasserportal_raw_gw.R +++ b/R/read_wasserportal_raw_gw.R @@ -46,7 +46,7 @@ read_wasserportal_raw_gw <- function( start_line <- which(startsWith(textlines, date_pattern)) if (length(start_line) == 0L) { - kwb.utils::stopFormatted( + stop_formatted( "Could not find the header row (starting with '%s')", date_pattern ) diff --git a/R/utils.R b/R/utils.R index a353231c..9461c068 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,7 @@ +# all_are_identical ------------------------------------------------------------ +#' @importFrom kwb.utils allAreIdentical +all_are_identical <- kwb.utils::allAreIdentical + # as_date_de ------------------------------------------------------------------- as_date_de <- function(x) { @@ -5,7 +9,6 @@ as_date_de <- function(x) } # assert_date ------------------------------------------------------------------ -#' @importFrom kwb.utils isTryError assert_date <- function(x) { if (inherits(x, "Date")) { @@ -14,7 +17,7 @@ assert_date <- function(x) result <- try(as.Date(x, origin = "1970-01-01"), silent = TRUE) - if (kwb.utils::isTryError(result)) { + if (is_try_error(result)) { stop(call. = FALSE, sprintf( "%s cannot be converted to a Date object: %s", deparse(substitute(x)), @@ -50,6 +53,10 @@ columns_to_labels <- function(data, columns, fmt = "%s: %s", sep = ", ") )))) } +# create_accessor -------------------------------------------------------------- +#' @importFrom kwb.utils createAccessor +create_accessor <- kwb.utils::createAccessor + # date_string_de --------------------------------------------------------------- date_string_de <- function(x) { @@ -63,7 +70,6 @@ date_string_de <- function(x) #' #' @return data frame with values #' @export -#' @importFrom kwb.utils isTryError #' @importFrom utils read.table #' read <- function(text, ...) { @@ -72,13 +78,61 @@ read <- function(text, ...) { text = text, sep = ";", dec = ",", stringsAsFactors = FALSE, ... )) - if (kwb.utils::isTryError(result)) { + if (is_try_error(result)) { return(NULL) } result } +# default_if_na ---------------------------------------------------------------- +#' @importFrom kwb.utils defaultIfNA +default_if_na <- kwb.utils::defaultIfNA + +# default_if_null -------------------------------------------------------------- +#' @importFrom kwb.utils defaultIfNULL +default_if_null <- kwb.utils::defaultIfNULL + +# first_element ---------------------------------------------------------------- +#' @importFrom kwb.utils firstElement +first_element <- kwb.utils::firstElement + +# get_attribute ---------------------------------------------------------------- +#' @importFrom kwb.utils getAttribute +get_attribute <- kwb.utils::getAttribute + +# insert_columns --------------------------------------------------------------- +#' @importFrom kwb.utils::insertColumns +insert_columns <- kwb.utils::insertColumns + +# is_na_or_empty --------------------------------------------------------------- +#' @importFrom kwb.utils::isNaOrEmpty(hrefs) +is_na_or_empty <- kwb.utils::isNaOrEmpty + +# is_try_error ----------------------------------------------------------------- +#' @importFrom kwb.utils isTryError +is_try_error <- kwb.utils::isTryError + +# merge_all -------------------------------------------------------------------- +#' @importFrom kwb.utils mergeAll +merge_all <- kwb.utils::mergeAll + +# move_columns_to_front -------------------------------------------------------- +#' @importFrom kwb.utils moveColumnsToFront +move_columns_to_front <- kwb.utils::moveColumnsToFront + +# multi_substitute ------------------------------------------------------------- +#' @importFrom kwb.utils::multiSubstitute +multi_substitute <- kwb.utils::multiSubstitute + +# parallel_non_na -------------------------------------------------------------- +#' @importFrom kwb.utils parallelNonNA +parallel_non_na <- kwb.utils::parallelNonNA + +# print_if --------------------------------------------------------------------- +#' @importFrom kwb.utils printIf +print_if <- kwb.utils::printIf + # readPackageFile -------------------------------------------------------------- #' Read CSV File from Package's "extdata" Folder @@ -95,6 +149,18 @@ readPackageFile <- function(file, ...) kwb.utils::readPackageFile(file, package = "wasserportal", ...) } +# remove_columns --------------------------------------------------------------- +#' @importFrom kwb.utils removeColumns +remove_columns <- kwb.utils::removeColumns + +# remove_empty_columns --------------------------------------------------------- +#' @importFrom kwb.utils removeEmptyColumns +remove_empty_columns <- kwb.utils::removeEmptyColumns + +# rename_columns --------------------------------------------------------------- +#' @importFrom kwb.utils renameColumns +rename_columns <- kwb.utils::renameColumns + # select_columns --------------------------------------------------------------- #' @importFrom kwb.utils selectColumns select_columns <- kwb.utils::selectColumns @@ -111,13 +177,17 @@ split_into_lines <- function(x) strsplit(x, "\n")[[1L]] } +# stop_formatted --------------------------------------------------------------- +#' @importFrom kwb.utils stopFormatted +stop_formatted <- kwb.utils::stopFormatted + # stop_if_not_all_in ----------------------------------------------------------- stop_if_not_all_in <- function(x, set, type = "element") { is_missing <- !(x %in% set) if (any(is_missing)) { - kwb.utils::stopFormatted(kwb.utils::noSuchElements( + stop_formatted(kwb.utils::noSuchElements( x = x[is_missing], available = set, type = type @@ -125,10 +195,18 @@ stop_if_not_all_in <- function(x, set, type = "element") } } +# string_list ------------------------------------------------------------------ +#' @importFrom kwb.utils stringList +string_list <- kwb.utils::stringList + # subst_special_chars ---------------------------------------------------------- #' @importFrom kwb.utils substSpecialChars subst_special_chars <- kwb.utils::substSpecialChars +# to_lookup_list --------------------------------------------------------------- +#' @importFrom kwb.utils toLookupList +to_lookup_list <- kwb.utils::toLookupList + # url_parameter_string --------------------------------------------------------- url_parameter_string <- function(...) { @@ -138,3 +216,4 @@ url_parameter_string <- function(...) paste(names(arguments), arguments, sep = "=", collapse = "&") } + diff --git a/inst/extdata/test_wasserportal.R b/inst/extdata/test_wasserportal.R index 68642d08..5f2d9ecb 100644 --- a/inst/extdata/test_wasserportal.R +++ b/inst/extdata/test_wasserportal.R @@ -24,7 +24,7 @@ if (FALSE) # Show data sections where the 15 minute timestep is broken lapply(dfs, function(df) { diffs <- diff(df$LocalDateTime) - kwb.utils::printIf(TRUE, table(diffs)) + print_if(TRUE, table(diffs)) indices <- which(diffs != 15) df[sort(unique(c(indices - 1, indices, indices + 1))), ] })