diff --git a/DESCRIPTION b/DESCRIPTION index 0fb2f50..d7f673c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,7 +47,8 @@ Suggests: knitr, rmarkdown, rosv, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + withr Config/Needs/vignettes: knitr, rmarkdown, @@ -84,6 +85,7 @@ Collate: 'data_desc.R' 'data_downloads_total.R' 'data_r_cmd_check.R' + 'data_recognized_source.R' 'data_vignettes.R' 'data_web_html.R' 'generic_metric_coerce.R' @@ -96,7 +98,7 @@ Collate: 'zzz.R' Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 3.5) LazyData: true diff --git a/R/data_recognized_source.R b/R/data_recognized_source.R new file mode 100644 index 0000000..31ca79a --- /dev/null +++ b/R/data_recognized_source.R @@ -0,0 +1,81 @@ +#' @include impl_data.R + +# Source control URL extraction and inference +impl_data( + "recognized_source_url", + class = class_character, + title = "Recognized Source Control URL", + description = paste( + "The \\acronym{URL} of the package's source control repository on a", + "recognized hosting platform, determined by matching against an", + "allow-list of known domains. Extracted from the \\acronym{URL} and", + "\\code{BugReports} fields in the \\code{DESCRIPTION} file. The", + "allow-list can be customized; see \\code{?options} for details." + ), + function(pkg, resource, field, ...) { + # Get URLs from DESCRIPTION file + desc_urls <- tryCatch( + pkg$desc$get_urls(), + error = function(e) character(0) + ) + + # Get BugReports field if available + bug_reports <- tryCatch( + pkg$desc$get_field("BugReports"), + error = function(e) character(0) + ) + + # Combine all URLs to check, filtering out empty strings and NAs + all_urls <- c(desc_urls, bug_reports) + all_urls <- all_urls[!is.na(all_urls) & nzchar(all_urls)] + + if (length(all_urls) == 0) { + return(character(0)) + } + + # Get recognized source control domains from options + source_control_domains <- opt("source_control_domains") + + # Find all URLs matching known source control domains + # We use case-insensitive matching by converting to lowercase + # and fixed = TRUE to avoid regex special character issues (e.g., '.') + is_src_url <- vapply( + tolower(source_control_domains), + grepl, + logical(length(all_urls)), + x = tolower(all_urls), + fixed = TRUE + ) + + all_urls[rowSums(is_src_url) > 0] + } +) + +impl_data( + "has_recognized_source", + class = class_logical, + metric = TRUE, + tags = c("best practice"), + permissions = c(), + title = "Has Recognized Source Repository", + description = paste( + "Indicates whether the package has a source code repository on a", + "recognized hosting platform from an allow-list of known domains.", + "Inferred from the \\acronym{URL} and \\code{BugReports} fields in the", + "\\code{DESCRIPTION} file. See \\code{?options} for customizing the", + "allow-list." + ), + function(pkg, resource, field, ...) { + length(pkg$recognized_source_url) > 0L + } +) + +# Mock implementation for random packages +impl_data( + "has_recognized_source", + for_resource = mock_resource, + function(pkg, resource, field, ...) { + # Simulate realistic distribution - most packages have recognized source + runif(1) > 0.2 + } +) diff --git a/R/options.R b/R/options.R index b38c869..c59bbd4 100644 --- a/R/options.R +++ b/R/options.R @@ -26,5 +26,20 @@ define_options( "Silences console output during evaluation. This applies when pulling package resources (such as download and installation output) and executing code (for example, running `R CMD check`)", - quiet = TRUE + quiet = TRUE, + + fmt("Recognized source control hosting domains used when inferring whether a + package has a source code repository on a recognized hosting platform. + Customize this to add additional git hosting services (e.g., self-hosted + GitLab instances or other federated git providers)."), + source_control_domains = c( + "github.com", + "gitlab.com", + "bitbucket.org", + "r-forge.r-project.org", + "codeberg.org", + "sr.ht", # sourcehut + "gitea.com", + "git.sr.ht" + ) ) diff --git a/man/metrics.Rd b/man/metrics.Rd index 0541b5d..95cb6eb 100644 --- a/man/metrics.Rd +++ b/man/metrics.Rd @@ -61,6 +61,13 @@ For access to \emph{all} the internally calculated data, pass \code{all = TRUE}. \Sexpr[stage=render,results=rd]{if (!is.na(match("network", getOption("val.meter.permissions")))) "\\\\ifelse{html}{\\\\figure{badge-req-network-x-flat-square-green.svg}{options: alt = \\"[network]\\"}}{\\\\strong{[req::network]}}" else "\\\\ifelse{html}{\\\\figure{badge-req-network-x-flat-square-red.svg}{options: alt = \\"[network]\\"}}{\\\\strong{[req::network]}}"} \Sexpr[stage=install,results=rd]{if (numeric_version(paste0(R.version$major, ".", R.version$minor)) < "4.5.0") { "\\\\ifelse{html}{\\\\figure{badge-adoption-x-flat-square-blue.svg}{options: alt = \\"[adoption]\\"}}{\\\\strong{[adoption]}}\\n\\\\ifelse{html}{\\\\figure{badge-transient-x-flat-square-blue.svg}{options: alt = \\"[transient]\\"}}{\\\\strong{[transient]}}\\n\\\\ifelse{html}{\\\\figure{badge-version--independent-x-flat-square-blue.svg}{options: alt = \\"[version-independent]\\"}}{\\\\strong{[version-independent]}}" } else { "\\\\link[val.meter:tags]{\\\\ifelse{html}{\\\\figure{badge-adoption-x-flat-square-blue.svg}{options: alt = \\"[adoption]\\"}}{\\\\strong{[adoption]}}}\\n\\\\link[val.meter:tags]{\\\\ifelse{html}{\\\\figure{badge-transient-x-flat-square-blue.svg}{options: alt = \\"[transient]\\"}}{\\\\strong{[transient]}}}\\n\\\\link[val.meter:tags]{\\\\ifelse{html}{\\\\figure{badge-version--independent-x-flat-square-blue.svg}{options: alt = \\"[version-independent]\\"}}{\\\\strong{[version-independent]}}}" }} +} + \subsection{Has Recognized Source Repository}{ +\code{} Indicates whether the package has a source code repository on a recognized hosting platform from an allow-list of known domains. Inferred from the \acronym{URL} and \code{BugReports} fields in the \code{DESCRIPTION} file. See \code{?options} for customizing the allow-list. + + + +\Sexpr[stage=install,results=rd]{if (numeric_version(paste0(R.version$major, ".", R.version$minor)) < "4.5.0") { "\\\\ifelse{html}{\\\\figure{badge-best_practice-x-flat-square-blue.svg}{options: alt = \\"[best practice]\\"}}{\\\\strong{[best practice]}}" } else { "\\\\link[val.meter:tags]{\\\\ifelse{html}{\\\\figure{badge-best_practice-x-flat-square-blue.svg}{options: alt = \\"[best practice]\\"}}{\\\\strong{[best practice]}}}" }} } \subsection{Dependency Count}{ \code{} the number of required dependencies diff --git a/man/options.Rd b/man/options.Rd index 9436013..b861199 100644 --- a/man/options.Rd +++ b/man/options.Rd @@ -51,6 +51,16 @@ resources (such as download and installation output) and executing code \item{envvar: }{R_VAL_METER_QUIET (evaluated if possible, raw string otherwise)} }} +\item{source_control_domains}{\describe{ +Recognized source control hosting domains used when inferring whether a +package has a source code repository on a recognized hosting platform. +Customize this to add additional git hosting services (e.g., self-hosted +GitLab instances or other federated git providers).\item{default: }{\preformatted{c("github.com", "gitlab.com", "bitbucket.org", "r-forge.r-project.org", + "codeberg.org", "sr.ht", "gitea.com", "git.sr.ht")}} +\item{option: }{val.meter.source_control_domains} +\item{envvar: }{R_VAL_METER_SOURCE_CONTROL_DOMAINS (evaluated if possible, raw string otherwise)} +}} + } } diff --git a/man/options_params.Rd b/man/options_params.Rd index e3a9d6f..5e588b2 100644 --- a/man/options_params.Rd +++ b/man/options_params.Rd @@ -18,6 +18,11 @@ calculating metrics. (Defaults to \code{policy()}, overwritable using option 'va \item{logs}{Logging directory where artifacts will be stored. Defaults to a temporary directory. (Defaults to \code{ns_tmp_root()}, overwritable using option 'val.meter.logs' or environment variable 'R_VAL_METER_LOGS')} + +\item{source_control_domains}{Recognized source control hosting domains used when inferring whether a +package has a source code repository on a recognized hosting platform. +Customize this to add additional git hosting services (e.g., self-hosted +GitLab instances or other federated git providers). (Defaults to \verb{c("github.com", "gitlab.com", "bitbucket.org", "r-forge.r-project.org", ; "codeberg.org", "sr.ht", "gitea.com", "git.sr.ht")}, overwritable using option 'val.meter.source_control_domains' or environment variable 'R_VAL_METER_SOURCE_CONTROL_DOMAINS')} } \description{ Options As Parameters diff --git a/tests/testthat/test-data_recognized_source.R b/tests/testthat/test-data_recognized_source.R new file mode 100644 index 0000000..a03c31c --- /dev/null +++ b/tests/testthat/test-data_recognized_source.R @@ -0,0 +1,224 @@ +describe("source control metrics implementation details", { + # Test the actual implementation logic with mocked desc objects + it("extracts URLs from DESCRIPTION and matches against domains", { + # Create a temporary DESCRIPTION file + tmp_dir <- withr::local_tempdir() + desc_file <- file.path(tmp_dir, "DESCRIPTION") + + writeLines(c( + "Package: testpkg", + "Version: 1.0.0", + "Title: Test Package", + "Description: A test package.", + "URL: https://github.com/user/testpkg", + "BugReports: https://github.com/user/testpkg/issues" + ), desc_file) + + d <- desc::desc(file = desc_file) + + # Get URLs and check matching logic + urls <- d$get_urls() + domains <- opt("source_control_domains") + + expect_true(length(urls) > 0) + expect_true(any(sapply(urls, function(url) { + any(sapply(domains, function(domain) { + grepl(domain, url, ignore.case = TRUE) + })) + }))) + }) + + it("returns empty vector when no URLs match recognized domains", { + tmp_dir <- withr::local_tempdir() + desc_file <- file.path(tmp_dir, "DESCRIPTION") + + writeLines(c( + "Package: testpkg", + "Version: 1.0.0", + "Title: Test Package", + "Description: A test package.", + "URL: https://example.com/testpkg" + ), desc_file) + + d <- desc::desc(file = desc_file) + urls <- d$get_urls() + domains <- opt("source_control_domains") + + # None should match + matches <- any(sapply(urls, function(url) { + any(sapply(domains, function(domain) { + grepl(domain, url, ignore.case = TRUE) + })) + })) + + expect_false(matches) + }) + + it("is case-insensitive when matching domains", { + tmp_dir <- withr::local_tempdir() + desc_file <- file.path(tmp_dir, "DESCRIPTION") + + writeLines(c( + "Package: testpkg", + "Version: 1.0.0", + "Title: Test Package", + "Description: A test package.", + "URL: https://GitHub.COM/user/testpkg" + ), desc_file) + + d <- desc::desc(file = desc_file) + urls <- d$get_urls() + + # Should match despite different case + matched <- any(sapply(urls, function(url) { + grepl("github.com", url, ignore.case = TRUE) + })) + + expect_true(matched) + }) + + it("handles BugReports field", { + tmp_dir <- withr::local_tempdir() + desc_file <- file.path(tmp_dir, "DESCRIPTION") + + writeLines(c( + "Package: testpkg", + "Version: 1.0.0", + "Title: Test Package", + "Description: A test package.", + "BugReports: https://github.com/user/testpkg/issues" + ), desc_file) + + d <- desc::desc(file = desc_file) + + # BugReports should be available + bug_reports <- tryCatch( + d$get_field("BugReports"), + error = function(e) character(0) + ) + + expect_true(length(bug_reports) > 0) + expect_true(grepl("github.com", bug_reports, ignore.case = TRUE)) + }) +}) + +describe("source control option customization", { + it("respects custom source_control_domains option", { + skip_if_offline() + skip_on_cran() + + # Save original and set custom domains + old_domains <- opt_set("source_control_domains", c("custom.org")) + on.exit(opt_set("source_control_domains", old_domains)) + + # Create test DESCRIPTION with custom domain + tmp_dir <- withr::local_tempdir() + desc_file <- file.path(tmp_dir, "DESCRIPTION") + + writeLines(c( + "Package: testpkg", + "Version: 1.0.0", + "Title: Test Package", + "Description: A test package.", + "URL: https://custom.org/testpkg" + ), desc_file) + + d <- desc::desc(file = desc_file) + urls <- d$get_urls() + domains <- opt("source_control_domains") + + # Should match custom domain + expect_equal(domains, "custom.org") + expect_true(any(sapply(urls, function(url) { + grepl("custom.org", url, ignore.case = TRUE) + }))) + }) +}) + +describe("source control metrics metadata", { + it("has_recognized_source is registered as a metric", { + info <- pkg_data_info("has_recognized_source") + expect_true(info@metric) + }) + + it("has_recognized_source has 'best practice' tag", { + info <- pkg_data_info("has_recognized_source") + expect_true("best practice" %in% info@tags) + }) + + it("recognized_source_url is not a metric", { + info <- pkg_data_info("recognized_source_url") + expect_false(info@metric) + }) + + it("metrics have non-empty titles", { + info_metric <- pkg_data_info("has_recognized_source") + info_data <- pkg_data_info("recognized_source_url") + + expect_true(length(info_metric@title) > 0) + expect_true(nchar(info_metric@title) > 0) + expect_true(length(info_data@title) > 0) + expect_true(nchar(info_data@title) > 0) + }) + + it("metrics have non-empty descriptions", { + info_metric <- pkg_data_info("has_recognized_source") + info_data <- pkg_data_info("recognized_source_url") + + expect_true(length(info_metric@description) > 0) + expect_true(length(info_data@description) > 0) + }) +}) + +describe("source control option integration", { + it("default domains include major platforms", { + domains <- opt("source_control_domains") + + expect_true("github.com" %in% domains) + expect_true("gitlab.com" %in% domains) + expect_true("bitbucket.org" %in% domains) + expect_true(length(domains) >= 5) + }) + + it("can extend default domains", { + defaults <- opt("source_control_domains") + old_domains <- opt_set("source_control_domains", c( + defaults, + "git.company.com" + )) + on.exit(opt_set("source_control_domains", old_domains)) + + extended <- opt("source_control_domains") + expect_true("git.company.com" %in% extended) + expect_true("github.com" %in% extended) + expect_equal(length(extended), length(defaults) + 1) + }) + + it("can replace domains entirely", { + old_domains <- opt_set("source_control_domains", c("custom.org")) + on.exit(opt_set("source_control_domains", old_domains)) + + domains <- opt("source_control_domains") + expect_equal(domains, "custom.org") + expect_equal(length(domains), 1) + }) +}) + +describe("source control mock implementation", { + it("random packages have realistic source control distribution", { + # Generate multiple random packages and check distribution + n <- 100 + results <- replicate(n, { + p <- random_pkg() + p$has_recognized_source + }) + + # Should be logical + expect_type(results, "logical") + + # Most packages should have recognized source (around 80%) + prop_with_source <- mean(results, na.rm = TRUE) + expect_true(prop_with_source > 0.5) + expect_true(prop_with_source < 1.0) + }) +})