Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ Suggests:
knitr,
rmarkdown,
rosv,
testthat (>= 3.0.0)
testthat (>= 3.0.0),
withr
Config/Needs/vignettes:
knitr,
rmarkdown,
Expand Down Expand Up @@ -84,6 +85,7 @@ Collate:
'data_desc.R'
'data_downloads_total.R'
'data_r_cmd_check.R'
'data_recognized_source.R'
'data_vignettes.R'
'data_web_html.R'
'generic_metric_coerce.R'
Expand All @@ -96,7 +98,7 @@ Collate:
'zzz.R'
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
Depends:
R (>= 3.5)
LazyData: true
Expand Down
81 changes: 81 additions & 0 deletions R/data_recognized_source.R
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename this to R/data_recognized_source.R to match the new name?

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#' @include impl_data.R

# Source control URL extraction and inference
impl_data(
"recognized_source_url",
class = class_character,
title = "Recognized Source Control URL",
description = paste(
"The \\acronym{URL} of the package's source control repository on a",
"recognized hosting platform, determined by matching against an",
"allow-list of known domains. Extracted from the \\acronym{URL} and",
"\\code{BugReports} fields in the \\code{DESCRIPTION} file. The",
"allow-list can be customized; see \\code{?options} for details."
),
function(pkg, resource, field, ...) {
# Get URLs from DESCRIPTION file
desc_urls <- tryCatch(
pkg$desc$get_urls(),
error = function(e) character(0)
)

# Get BugReports field if available
bug_reports <- tryCatch(
pkg$desc$get_field("BugReports"),
error = function(e) character(0)
)

# Combine all URLs to check, filtering out empty strings and NAs
all_urls <- c(desc_urls, bug_reports)
all_urls <- all_urls[!is.na(all_urls) & nzchar(all_urls)]

if (length(all_urls) == 0) {
return(character(0))
}

# Get recognized source control domains from options
source_control_domains <- opt("source_control_domains")

# Find all URLs matching known source control domains
# We use case-insensitive matching by converting to lowercase
# and fixed = TRUE to avoid regex special character issues (e.g., '.')
is_src_url <- vapply(
tolower(source_control_domains),
grepl,
logical(length(all_urls)),
x = tolower(all_urls),
fixed = TRUE
)

all_urls[rowSums(is_src_url) > 0]
}
)

impl_data(
"has_recognized_source",
class = class_logical,
metric = TRUE,
tags = c("best practice"),
permissions = c(),
title = "Has Recognized Source Repository",
description = paste(
"Indicates whether the package has a source code repository on a",
"recognized hosting platform from an allow-list of known domains.",
"Inferred from the \\acronym{URL} and \\code{BugReports} fields in the",
"\\code{DESCRIPTION} file. See \\code{?options} for customizing the",
"allow-list."
),
function(pkg, resource, field, ...) {
length(pkg$recognized_source_url) > 0L
}
)

# Mock implementation for random packages
impl_data(
"has_recognized_source",
for_resource = mock_resource,
function(pkg, resource, field, ...) {
# Simulate realistic distribution - most packages have recognized source
runif(1) > 0.2
}
)
17 changes: 16 additions & 1 deletion R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,20 @@ define_options(
"Silences console output during evaluation. This applies when pulling package
resources (such as download and installation output) and executing code
(for example, running `R CMD check`)",
quiet = TRUE
quiet = TRUE,

fmt("Recognized source control hosting domains used when inferring whether a
package has a source code repository on a recognized hosting platform.
Customize this to add additional git hosting services (e.g., self-hosted
GitLab instances or other federated git providers)."),
source_control_domains = c(
"github.com",
"gitlab.com",
"bitbucket.org",
"r-forge.r-project.org",
"codeberg.org",
"sr.ht", # sourcehut
"gitea.com",
"git.sr.ht"
)
)
7 changes: 7 additions & 0 deletions man/metrics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions man/options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/options_params.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

224 changes: 224 additions & 0 deletions tests/testthat/test-data_recognized_source.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
describe("source control metrics implementation details", {
# Test the actual implementation logic with mocked desc objects
it("extracts URLs from DESCRIPTION and matches against domains", {
# Create a temporary DESCRIPTION file
tmp_dir <- withr::local_tempdir()
desc_file <- file.path(tmp_dir, "DESCRIPTION")

writeLines(c(
"Package: testpkg",
"Version: 1.0.0",
"Title: Test Package",
"Description: A test package.",
"URL: https://github.com/user/testpkg",
"BugReports: https://github.com/user/testpkg/issues"
), desc_file)

d <- desc::desc(file = desc_file)

# Get URLs and check matching logic
urls <- d$get_urls()
domains <- opt("source_control_domains")

expect_true(length(urls) > 0)
expect_true(any(sapply(urls, function(url) {
any(sapply(domains, function(domain) {
grepl(domain, url, ignore.case = TRUE)
}))
})))
})

it("returns empty vector when no URLs match recognized domains", {
tmp_dir <- withr::local_tempdir()
desc_file <- file.path(tmp_dir, "DESCRIPTION")

writeLines(c(
"Package: testpkg",
"Version: 1.0.0",
"Title: Test Package",
"Description: A test package.",
"URL: https://example.com/testpkg"
), desc_file)

d <- desc::desc(file = desc_file)
urls <- d$get_urls()
domains <- opt("source_control_domains")

# None should match
matches <- any(sapply(urls, function(url) {
any(sapply(domains, function(domain) {
grepl(domain, url, ignore.case = TRUE)
}))
}))

expect_false(matches)
})

it("is case-insensitive when matching domains", {
tmp_dir <- withr::local_tempdir()
desc_file <- file.path(tmp_dir, "DESCRIPTION")

writeLines(c(
"Package: testpkg",
"Version: 1.0.0",
"Title: Test Package",
"Description: A test package.",
"URL: https://GitHub.COM/user/testpkg"
), desc_file)

d <- desc::desc(file = desc_file)
urls <- d$get_urls()

# Should match despite different case
matched <- any(sapply(urls, function(url) {
grepl("github.com", url, ignore.case = TRUE)
}))

expect_true(matched)
})

it("handles BugReports field", {
tmp_dir <- withr::local_tempdir()
desc_file <- file.path(tmp_dir, "DESCRIPTION")

writeLines(c(
"Package: testpkg",
"Version: 1.0.0",
"Title: Test Package",
"Description: A test package.",
"BugReports: https://github.com/user/testpkg/issues"
), desc_file)

d <- desc::desc(file = desc_file)

# BugReports should be available
bug_reports <- tryCatch(
d$get_field("BugReports"),
error = function(e) character(0)
)

expect_true(length(bug_reports) > 0)
expect_true(grepl("github.com", bug_reports, ignore.case = TRUE))
})
})

describe("source control option customization", {
it("respects custom source_control_domains option", {
skip_if_offline()
skip_on_cran()

# Save original and set custom domains
old_domains <- opt_set("source_control_domains", c("custom.org"))
on.exit(opt_set("source_control_domains", old_domains))

# Create test DESCRIPTION with custom domain
tmp_dir <- withr::local_tempdir()
desc_file <- file.path(tmp_dir, "DESCRIPTION")

writeLines(c(
"Package: testpkg",
"Version: 1.0.0",
"Title: Test Package",
"Description: A test package.",
"URL: https://custom.org/testpkg"
), desc_file)

d <- desc::desc(file = desc_file)
urls <- d$get_urls()
domains <- opt("source_control_domains")

# Should match custom domain
expect_equal(domains, "custom.org")
expect_true(any(sapply(urls, function(url) {
grepl("custom.org", url, ignore.case = TRUE)
})))
})
})

describe("source control metrics metadata", {
it("has_recognized_source is registered as a metric", {
info <- pkg_data_info("has_recognized_source")
expect_true(info@metric)
})

it("has_recognized_source has 'best practice' tag", {
info <- pkg_data_info("has_recognized_source")
expect_true("best practice" %in% info@tags)
})

it("recognized_source_url is not a metric", {
info <- pkg_data_info("recognized_source_url")
expect_false(info@metric)
})

it("metrics have non-empty titles", {
info_metric <- pkg_data_info("has_recognized_source")
info_data <- pkg_data_info("recognized_source_url")

expect_true(length(info_metric@title) > 0)
expect_true(nchar(info_metric@title) > 0)
expect_true(length(info_data@title) > 0)
expect_true(nchar(info_data@title) > 0)
})

it("metrics have non-empty descriptions", {
info_metric <- pkg_data_info("has_recognized_source")
info_data <- pkg_data_info("recognized_source_url")

expect_true(length(info_metric@description) > 0)
expect_true(length(info_data@description) > 0)
})
})

describe("source control option integration", {
it("default domains include major platforms", {
domains <- opt("source_control_domains")

expect_true("github.com" %in% domains)
expect_true("gitlab.com" %in% domains)
expect_true("bitbucket.org" %in% domains)
expect_true(length(domains) >= 5)
})

it("can extend default domains", {
defaults <- opt("source_control_domains")
old_domains <- opt_set("source_control_domains", c(
defaults,
"git.company.com"
))
on.exit(opt_set("source_control_domains", old_domains))

extended <- opt("source_control_domains")
expect_true("git.company.com" %in% extended)
expect_true("github.com" %in% extended)
expect_equal(length(extended), length(defaults) + 1)
})

it("can replace domains entirely", {
old_domains <- opt_set("source_control_domains", c("custom.org"))
on.exit(opt_set("source_control_domains", old_domains))

domains <- opt("source_control_domains")
expect_equal(domains, "custom.org")
expect_equal(length(domains), 1)
})
})

describe("source control mock implementation", {
it("random packages have realistic source control distribution", {
# Generate multiple random packages and check distribution
n <- 100
results <- replicate(n, {
p <- random_pkg()
p$has_recognized_source
})

# Should be logical
expect_type(results, "logical")

# Most packages should have recognized source (around 80%)
prop_with_source <- mean(results, na.rm = TRUE)
expect_true(prop_with_source > 0.5)
expect_true(prop_with_source < 1.0)
})
})