From 554e59e843241df51af00de543bb8754264c0985 Mon Sep 17 00:00:00 2001 From: wlandau Date: Tue, 24 Sep 2024 11:18:35 -0400 Subject: [PATCH 1/5] record one advisory per package version --- R/issues_descriptions.R | 71 ++++++++++++++++++++--- R/utils_issues.R | 4 +- man/issues_descriptions.Rd | 11 +++- tests/testthat/test-issues_descriptions.R | 20 +++++++ tests/testthat/test-utils_issues.R | 25 ++++++++ 5 files changed, 120 insertions(+), 11 deletions(-) create mode 100644 tests/testthat/test-utils_issues.R diff --git a/R/issues_descriptions.R b/R/issues_descriptions.R index ef86047..e20b9b1 100644 --- a/R/issues_descriptions.R +++ b/R/issues_descriptions.R @@ -3,9 +3,12 @@ #' @family issues #' @description Report issues with the `DESCRIPTION` files of packages. #' @details [issues_descriptions()] scans downloaded metadata from the -#' `PACKAGES.json` file of an R universe and reports issues with a -#' package's description file, such as the presence of a -#' `"Remotes"` field. +#' `PACKAGES.json` file of an R universe and scans for specific issues in a +#' package's description file: +#' 1. The presence of a `"Remotes"` field. +#' 2. There is a security advisory at +#' +#' for the given package version. #' @inheritSection record_issues Package issues #' @return A named list of information about packages which do not comply #' with `DESCRPTION` checks. Each name is a package name, @@ -18,14 +21,68 @@ #' issues <- issues_descriptions(meta = meta) #' str(issues) issues_descriptions <- function(meta = meta_packages()) { + meta$issue <- FALSE + meta <- issues_descriptions_advisories(meta) meta <- issues_descriptions_remotes(meta) - fields <- "remotes" - meta <- meta[, c("package", fields)] - issues_list(meta) + meta <- meta[meta$issue,, drop = FALSE] # nolint + issues_list(meta[, c("package", "advisory", "remotes")]) +} + +issues_descriptions_advisories <- function(meta) { + advisories <- read_advisories() + meta <- merge( + x = meta, + y = advisories, + by = c("package", "version"), + all.x = TRUE, + all.y = FALSE + ) + meta$issue <- meta$issue | !is.na(meta$advisory) + meta } issues_descriptions_remotes <- function(meta) { meta[["remotes"]] <- meta[["remotes"]] %||% replicate(nrow(meta), NULL) meta$remotes <- lapply(meta$remotes, function(x) x[nzchar(x)]) - meta[vapply(meta$remotes, length, integer(1L)) > 0L, ] + meta$issue <- meta$issue | vapply(meta$remotes, length, integer(1L)) > 0L + meta +} + +read_advisories <- function() { + path <- tempfile() + on.exit(unlink(path, recursive = TRUE, force = TRUE)) + gert::git_clone( + url = "https://github.com/RConsortium/r-advisory-database", + path = path, + verbose = FALSE + ) + advisories <- list.files( + file.path(path, "vulns"), + recursive = TRUE, + full.names = TRUE + ) + out <- do.call(vctrs::vec_rbind, lapply(advisories, read_advisory)) + keep <- !duplicated(out[, c("package", "version")]) + out[keep,, drop = FALSE] # nolint +} + +read_advisory <- function(path) { + out <- lapply( + yaml::read_yaml(file = path)$affected, + advisory_entry, + path = path + ) + do.call(vctrs::vec_rbind, out) +} + +advisory_entry <- function(entry, path) { + data.frame( + package = entry$package$name, + version = entry$versions, + advisory = file.path( + "https://github.com/RConsortium/r-advisory-database/blob/main/vulns", + entry$package$name, + basename(path) + ) + ) } diff --git a/R/utils_issues.R b/R/utils_issues.R index 4b7bf97..593e5a4 100644 --- a/R/utils_issues.R +++ b/R/utils_issues.R @@ -3,7 +3,9 @@ issues_list <- function(x) { out <- list() for (index in seq_len(nrow(x))) { for (field in setdiff(colnames(x), "package")) { - out[[x$package[index]]][[field]] <- x[[field]][[index]] + if (!all(is.na(x[[field]][[index]]))) { + out[[x$package[index]]][[field]] <- x[[field]][[index]] + } } } out[order(as.character(names(out)))] diff --git a/man/issues_descriptions.Rd b/man/issues_descriptions.Rd index 7233edd..0af48ac 100644 --- a/man/issues_descriptions.Rd +++ b/man/issues_descriptions.Rd @@ -21,9 +21,14 @@ Report issues with the \code{DESCRIPTION} files of packages. } \details{ \code{\link[=issues_descriptions]{issues_descriptions()}} scans downloaded metadata from the -\code{PACKAGES.json} file of an R universe and reports issues with a -package's description file, such as the presence of a -\code{"Remotes"} field. +\code{PACKAGES.json} file of an R universe and scans for specific issues in a +package's description file: +\enumerate{ +\item The presence of a \code{"Remotes"} field. +\item There is a security advisory at +\url{https://github.com/RConsortium/r-advisory-database} +for the given package version. +} } \section{Package issues}{ diff --git a/tests/testthat/test-issues_descriptions.R b/tests/testthat/test-issues_descriptions.R index 1ccd4a2..8f7cf05 100644 --- a/tests/testthat/test-issues_descriptions.R +++ b/tests/testthat/test-issues_descriptions.R @@ -15,3 +15,23 @@ test_that("issues_descriptions() on a small repo", { issues <- issues_descriptions(meta = meta) expect_true(is.list(issues)) }) + +test_that("issues_descriptions() with security advisories", { + example <- mock_meta_packages$package == "nanonext" + readxl <- mock_meta_packages[example,, drop = FALSE] # nolint + readxl$package <- "readxl" + readxl$version <- "1.4.1" + meta <- rbind(mock_meta_packages, readxl) + out <- issues_descriptions(meta) + url <- file.path( + "https://github.com/RConsortium/r-advisory-database", + "blob/main/vulns/readxl/RSEC-2023-2.yaml" + ) + exp <- list( + audio.whisper = list(remotes = "bnosac/audio.vadwebrtc"), + readxl = list(advisory = url), + stantargets = list(remotes = c("hyunjimoon/SBC", "stan-dev/cmdstanr")), + tidypolars = list(remotes = "markvanderloo/tinytest/pkg") + ) + expect_equal(out, exp) +}) diff --git a/tests/testthat/test-utils_issues.R b/tests/testthat/test-utils_issues.R new file mode 100644 index 0000000..1276678 --- /dev/null +++ b/tests/testthat/test-utils_issues.R @@ -0,0 +1,25 @@ +test_that("issues_list() handles missing and empty values correctly", { + meta <- data.frame( + package = c("audio.whisper", "readxl", "stantargets", "tidypolars"), + advisory = c( + NA_character_, + "url", + NA_character_, + NA_character_ + ) + ) + meta$remotes <- list( + "bnosac/audio.vadwebrtc", + NULL, + c("hyunjimoon/SBC", "stan-dev/cmdstanr"), + "markvanderloo/tinytest/pkg" + ) + out <- issues_list(meta) + exp <- list( + audio.whisper = list(remotes = "bnosac/audio.vadwebrtc"), + readxl = list(advisory = "url"), + stantargets = list(remotes = c("hyunjimoon/SBC", "stan-dev/cmdstanr")), + tidypolars = list(remotes = "markvanderloo/tinytest/pkg") + ) + expect_equal(out, exp) +}) From 8b9efa36063a1c38af1c6d18776d93a879ad9024 Mon Sep 17 00:00:00 2001 From: wlandau Date: Tue, 24 Sep 2024 11:41:43 -0400 Subject: [PATCH 2/5] detect all reported vulnerabilities in r-advisory-database --- DESCRIPTION | 5 +++-- NAMESPACE | 2 ++ NEWS.md | 4 ++++ R/issues_descriptions.R | 9 ++++---- R/package.R | 2 ++ tests/testthat/test-issues_descriptions.R | 25 ++++++++++++++++------- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f54f4f5..1ab3173 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: multiverse.internals Title: Internal Infrastructure for R-multiverse Description: R-multiverse requires this internal infrastructure package to automate contribution reviews and populate universes. -Version: 0.2.12 +Version: 0.2.13 License: MIT + file LICENSE URL: https://github.com/r-multiverse/multiverse.internals BugReports: https://github.com/r-multiverse/multiverse.internals/issues @@ -28,15 +28,16 @@ Authors@R: c( Depends: R (>= 3.6) Imports: + gert, gh, igraph, jsonlite, nanonext, pkgsearch, + stats, utils, vctrs Suggests: - gert, testthat (>= 3.0.0) Encoding: UTF-8 Language: en-US diff --git a/NAMESPACE b/NAMESPACE index 361447d..b80544c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(review_pull_requests) export(staging_is_active) export(try_message) export(update_staging) +importFrom(gert,git_clone) importFrom(gh,gh) importFrom(igraph,V) importFrom(igraph,graph) @@ -32,6 +33,7 @@ importFrom(nanonext,ncurl) importFrom(nanonext,parse_url) importFrom(nanonext,status_code) importFrom(pkgsearch,cran_package) +importFrom(stats,aggregate) importFrom(utils,available.packages) importFrom(utils,compareVersion) importFrom(vctrs,vec_rbind) diff --git a/NEWS.md b/NEWS.md index 2f93b7d..2c7ad40 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# multiverse.internals 0.2.13 + +* Record issues for vulnerabilities in . + # multiverse.internals 0.2.12 * Amend argument defaults in `propose_snapshot()` to include source files. diff --git a/R/issues_descriptions.R b/R/issues_descriptions.R index e20b9b1..2b2de78 100644 --- a/R/issues_descriptions.R +++ b/R/issues_descriptions.R @@ -25,7 +25,7 @@ issues_descriptions <- function(meta = meta_packages()) { meta <- issues_descriptions_advisories(meta) meta <- issues_descriptions_remotes(meta) meta <- meta[meta$issue,, drop = FALSE] # nolint - issues_list(meta[, c("package", "advisory", "remotes")]) + issues_list(meta[, c("package", "advisories", "remotes")]) } issues_descriptions_advisories <- function(meta) { @@ -37,7 +37,7 @@ issues_descriptions_advisories <- function(meta) { all.x = TRUE, all.y = FALSE ) - meta$issue <- meta$issue | !is.na(meta$advisory) + meta$issue <- meta$issue | !vapply(meta$advisories, anyNA, logical(1L)) meta } @@ -62,8 +62,7 @@ read_advisories <- function() { full.names = TRUE ) out <- do.call(vctrs::vec_rbind, lapply(advisories, read_advisory)) - keep <- !duplicated(out[, c("package", "version")]) - out[keep,, drop = FALSE] # nolint + stats::aggregate(x = advisories ~ package + version, data = out, FUN = list) } read_advisory <- function(path) { @@ -79,7 +78,7 @@ advisory_entry <- function(entry, path) { data.frame( package = entry$package$name, version = entry$versions, - advisory = file.path( + advisories = file.path( "https://github.com/RConsortium/r-advisory-database/blob/main/vulns", entry$package$name, basename(path) diff --git a/R/package.R b/R/package.R index a2bb795..528b8f0 100644 --- a/R/package.R +++ b/R/package.R @@ -1,8 +1,10 @@ +#' @importFrom gert git_clone #' @importFrom gh gh #' @importFrom igraph graph neighbors subcomponent V #' @importFrom jsonlite parse_json read_json stream_in write_json #' @importFrom nanonext ncurl parse_url status_code #' @importFrom pkgsearch cran_package +#' @importFrom stats aggregate #' @importFrom utils available.packages compareVersion #' @importFrom vctrs vec_rbind vec_slice NULL diff --git a/tests/testthat/test-issues_descriptions.R b/tests/testthat/test-issues_descriptions.R index 8f7cf05..0fac6f3 100644 --- a/tests/testthat/test-issues_descriptions.R +++ b/tests/testthat/test-issues_descriptions.R @@ -18,18 +18,29 @@ test_that("issues_descriptions() on a small repo", { test_that("issues_descriptions() with security advisories", { example <- mock_meta_packages$package == "nanonext" + commonmark <- mock_meta_packages[example,, drop = FALSE] # nolint + commonmark$package <- "commonmark" + commonmark$version <- "0.2" readxl <- mock_meta_packages[example,, drop = FALSE] # nolint readxl$package <- "readxl" readxl$version <- "1.4.1" - meta <- rbind(mock_meta_packages, readxl) + meta <- rbind(mock_meta_packages, commonmark, readxl) out <- issues_descriptions(meta) - url <- file.path( - "https://github.com/RConsortium/r-advisory-database", - "blob/main/vulns/readxl/RSEC-2023-2.yaml" - ) exp <- list( - audio.whisper = list(remotes = "bnosac/audio.vadwebrtc"), - readxl = list(advisory = url), + audio.whisper = list(remotes = "bnosac/audio.vadwebrtc"), + commonmark = list( + advisories = file.path( + "https://github.com/RConsortium/r-advisory-database", + "blob/main/vulns/commonmark", + c("RSEC-2023-6.yaml", "RSEC-2023-7.yaml", "RSEC-2023-8.yaml") + ) + ), + readxl = list( + advisories = file.path( + "https://github.com/RConsortium/r-advisory-database", + "blob/main/vulns/readxl/RSEC-2023-2.yaml" + ) + ), stantargets = list(remotes = c("hyunjimoon/SBC", "stan-dev/cmdstanr")), tidypolars = list(remotes = "markvanderloo/tinytest/pkg") ) From 46f0eab9943f8da6ba538c42d5211ed1ab11c8ee Mon Sep 17 00:00:00 2001 From: wlandau Date: Tue, 24 Sep 2024 12:36:08 -0400 Subject: [PATCH 3/5] yaml --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/package.R | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ab3173..0da0763 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,8 @@ Imports: pkgsearch, stats, utils, - vctrs + vctrs, + yaml Suggests: testthat (>= 3.0.0) Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index b80544c..6a10a10 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,3 +38,4 @@ importFrom(utils,available.packages) importFrom(utils,compareVersion) importFrom(vctrs,vec_rbind) importFrom(vctrs,vec_slice) +importFrom(yaml,read_yaml) diff --git a/R/package.R b/R/package.R index 528b8f0..c17fbfa 100644 --- a/R/package.R +++ b/R/package.R @@ -7,4 +7,5 @@ #' @importFrom stats aggregate #' @importFrom utils available.packages compareVersion #' @importFrom vctrs vec_rbind vec_slice +#' @importFrom yaml read_yaml NULL From e37dd6ea4da60ff818383f6fc909f51ca0cc4030 Mon Sep 17 00:00:00 2001 From: shikokuchuo <53399081+shikokuchuo@users.noreply.github.com> Date: Sun, 29 Sep 2024 21:51:48 +0100 Subject: [PATCH 4/5] alternative to cloning repo --- DESCRIPTION | 2 +- NAMESPACE | 2 +- R/issues_descriptions.R | 33 +++++++++++++++++++++------------ R/package.R | 3 +-- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0da0763..256f94b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,6 @@ Authors@R: c( Depends: R (>= 3.6) Imports: - gert, gh, igraph, jsonlite, @@ -39,6 +38,7 @@ Imports: vctrs, yaml Suggests: + gert, testthat (>= 3.0.0) Encoding: UTF-8 Language: en-US diff --git a/NAMESPACE b/NAMESPACE index 6a10a10..7be3fcd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,7 +19,6 @@ export(review_pull_requests) export(staging_is_active) export(try_message) export(update_staging) -importFrom(gert,git_clone) importFrom(gh,gh) importFrom(igraph,V) importFrom(igraph,graph) @@ -36,6 +35,7 @@ importFrom(pkgsearch,cran_package) importFrom(stats,aggregate) importFrom(utils,available.packages) importFrom(utils,compareVersion) +importFrom(utils,unzip) importFrom(vctrs,vec_rbind) importFrom(vctrs,vec_slice) importFrom(yaml,read_yaml) diff --git a/R/issues_descriptions.R b/R/issues_descriptions.R index 2b2de78..e00641a 100644 --- a/R/issues_descriptions.R +++ b/R/issues_descriptions.R @@ -29,7 +29,7 @@ issues_descriptions <- function(meta = meta_packages()) { } issues_descriptions_advisories <- function(meta) { - advisories <- read_advisories() + advisories <- read_advisories(timeout = 60000L, retries = 3L) meta <- merge( x = meta, y = advisories, @@ -48,19 +48,28 @@ issues_descriptions_remotes <- function(meta) { meta } -read_advisories <- function() { +read_advisories <- function(timeout, retries) { path <- tempfile() + dir.create(path) on.exit(unlink(path, recursive = TRUE, force = TRUE)) - gert::git_clone( - url = "https://github.com/RConsortium/r-advisory-database", - path = path, - verbose = FALSE - ) - advisories <- list.files( - file.path(path, "vulns"), - recursive = TRUE, - full.names = TRUE - ) + zipfile <- file.path(path, "file.zip") + for (i in seq_len(retries)) { + res <- nanonext::ncurl( + "https://github.com/RConsortium/r-advisory-database/zipball/main", + convert = FALSE, + follow = TRUE, + timeout = timeout + ) + res[["status"]] == 200L && break + i == retries && stop( + "Obtaining advisories from R Consortium database failed with status: ", + status_code(res[["status"]]), + call. = FALSE + ) + } + writeBin(res[["data"]], zipfile) + unzip(zipfile, exdir = path, junkpaths = TRUE) + advisories <- Sys.glob(file.path(path, "RSEC*.yaml")) out <- do.call(vctrs::vec_rbind, lapply(advisories, read_advisory)) stats::aggregate(x = advisories ~ package + version, data = out, FUN = list) } diff --git a/R/package.R b/R/package.R index c17fbfa..a100aea 100644 --- a/R/package.R +++ b/R/package.R @@ -1,11 +1,10 @@ -#' @importFrom gert git_clone #' @importFrom gh gh #' @importFrom igraph graph neighbors subcomponent V #' @importFrom jsonlite parse_json read_json stream_in write_json #' @importFrom nanonext ncurl parse_url status_code #' @importFrom pkgsearch cran_package #' @importFrom stats aggregate -#' @importFrom utils available.packages compareVersion +#' @importFrom utils available.packages compareVersion unzip #' @importFrom vctrs vec_rbind vec_slice #' @importFrom yaml read_yaml NULL From 1f62d816b4d47f9f54101d9ad5597450a1676fc0 Mon Sep 17 00:00:00 2001 From: wlandau Date: Wed, 2 Oct 2024 13:48:36 -0400 Subject: [PATCH 5/5] Readability of conditional logic --- R/issues_descriptions.R | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/R/issues_descriptions.R b/R/issues_descriptions.R index e00641a..a25e5fc 100644 --- a/R/issues_descriptions.R +++ b/R/issues_descriptions.R @@ -53,21 +53,25 @@ read_advisories <- function(timeout, retries) { dir.create(path) on.exit(unlink(path, recursive = TRUE, force = TRUE)) zipfile <- file.path(path, "file.zip") - for (i in seq_len(retries)) { - res <- nanonext::ncurl( + for (try in seq_len(retries)) { + response <- nanonext::ncurl( "https://github.com/RConsortium/r-advisory-database/zipball/main", convert = FALSE, follow = TRUE, timeout = timeout ) - res[["status"]] == 200L && break - i == retries && stop( - "Obtaining advisories from R Consortium database failed with status: ", - status_code(res[["status"]]), - call. = FALSE - ) + if (all(response[["status"]] == 200L)) { + break + } + if (all(try == retries)) { + stop( + "Failed to download R Consortium advisories database. Status: ", + status_code(response[["status"]]), + call. = FALSE + ) + } } - writeBin(res[["data"]], zipfile) + writeBin(response[["data"]], zipfile) unzip(zipfile, exdir = path, junkpaths = TRUE) advisories <- Sys.glob(file.path(path, "RSEC*.yaml")) out <- do.call(vctrs::vec_rbind, lapply(advisories, read_advisory))