diff --git a/.ci/.lintr.R b/.ci/.lintr.R new file mode 100644 index 0000000000..09a0db3dca --- /dev/null +++ b/.ci/.lintr.R @@ -0,0 +1,105 @@ +for (f in list.files('ci/linters', full.names=TRUE)) source(f) +rm(f) + +linters = all_linters( + packages = "lintr", # TODO(lintr->3.2.0): Remove this. + # eq_assignment_linter(), + brace_linter(allow_single_line = TRUE), + # TODO(michaelchirico): Activate these incrementally. These are the + # parameterizations that match our style guide. + # implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE), + # implicit_integer_linter(allow_colon = TRUE), + # system_time_linter = undesirable_function_linter(c( + # system.time = "Only run timings in benchmark.Rraw" + # )), + # undesirable_function_linter(modify_defaults( + # default_undesirable_functions, + # ifelse = "Use fifelse instead.", + # Sys.setenv = NULL, + # library = NULL, + # options = NULL, + # par = NULL, + # setwd = NULL + # )), + undesirable_operator_linter(modify_defaults( + default_undesirable_operators, + `<<-` = NULL + )), + # TODO(lintr#2441): Use upstream implementation. + assignment_linter = NULL, + # TODO(lintr#2442): Use this once x[ , j, by] is supported. + commas_linter = NULL, + commented_code_linter = NULL, + # TODO(linter->3.2.0): Activate this. + consecutive_assertion_linter = NULL, + cyclocomp_linter = NULL, + function_argument_linter = NULL, + indentation_linter = NULL, + infix_spaces_linter = NULL, + # TODO(R>3.2.0): Activate this, extending to recognize vapply_1i(x, length). + lengths_linter = NULL, + line_length_linter = NULL, + missing_package_linter = NULL, + namespace_linter = NULL, + nonportable_path_linter = NULL, + object_name_linter = NULL, + object_usage_linter = NULL, + quotes_linter = NULL, + semicolon_linter = NULL, + spaces_inside_linter = NULL, + spaces_left_parentheses_linter = NULL, + # TODO(michaelchirico): Only exclude from vignettes, not sure what's wrong. + strings_as_factors_linter = NULL, + # TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate. + todo_comment_linter = NULL, + # TODO(michaelchirico): Enforce these and re-activate them one-by-one. Also stop using '<<-'. + brace_linter = NULL, + condition_call_linter = NULL, + conjunct_test_linter = NULL, + fixed_regex_linter = NULL, + function_left_parentheses_linter = NULL, + if_not_else_linter = NULL, + implicit_assignment_linter = NULL, + implicit_integer_linter = NULL, + keyword_quote_linter = NULL, + length_levels_linter = NULL, + matrix_apply_linter = NULL, + missing_argument_linter = NULL, + nzchar_linter = NULL, + object_overwrite_linter = NULL, + paren_body_linter = NULL, + redundant_equals_linter = NULL, + rep_len_linter = NULL, + repeat_linter = NULL, + return_linter = NULL, + sample_int_linter = NULL, + scalar_in_linter = NULL, + seq_linter = NULL, + undesirable_function_linter = NULL, + unnecessary_concatenation_linter = NULL, + unnecessary_lambda_linter = NULL, + unnecessary_nesting_linter = NULL, + unreachable_code_linter = NULL, + unused_import_linter = NULL +) +# TODO(lintr#2172): Glob with lintr itself. +exclusions = local({ + exclusion_for_dir <- function(dir, exclusions) { + files = list.files(dir, pattern = "\\.(R|Rmd)$") + stats::setNames(rep(list(exclusions), length(files)), files) + } + c( + exclusion_for_dir("tests", list( + quotes_linter = Inf, + # TODO(michaelchirico): Enforce these and re-activate them one-by-one. + implicit_integer_linter = Inf, + infix_spaces_linter = Inf, + undesirable_function_linter = Inf + )), + exclusion_for_dir("vignettes", list( + quotes_linter = Inf + # strings_as_factors_linter = Inf + # system_time_linter = Inf + )) + ) +}) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000000..7170016b1a --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,35 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: lint + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + r-lib/lintr + local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: true + R_LINTR_LINTER_FILE: .ci/.lintr diff --git a/R/bmerge.R b/R/bmerge.R index ddaedc1b3d..ff40fddb4f 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -187,4 +187,3 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans$xo = xo # for further use by [.data.table return(ans) } - diff --git a/R/data.table.R b/R/data.table.R index e0cddb38f3..5513fb276f 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2300,7 +2300,7 @@ transform.data.table = function (`_data`, ...) { if (!cedta()) return(NextMethod()) # nocov `_data` = copy(`_data`) - e = eval(substitute(list(...)), `_data`, parent.frame()) + e = eval(substitute(list(...)), `_data`, parent.frame()) set(`_data`, ,names(e), e) `_data` } diff --git a/R/duplicated.R b/R/duplicated.R index 27b9038122..0aad2ebdd5 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -118,4 +118,3 @@ uniqueN = function(x, by = if (is.list(x)) seq_along(x) else NULL, na.rm=FALSE) length(starts) } } - diff --git a/R/foverlaps.R b/R/foverlaps.R index 9a0cd55808..54dc61f938 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -247,4 +247,3 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k # Tests are added to ensure we cover these aspects (to my knowledge) to ensure that any undesirable changes in the future breaks those tests. # Conclusion: floating point manipulations are hell! - diff --git a/R/fwrite.R b/R/fwrite.R index b13b0afb77..37968d4eab 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -64,7 +64,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", length(nThread)==1L && !is.na(nThread) && nThread>=1L ) - is_gzip = compress == "gzip" || (compress == "auto" && grepl("\\.gz$", file)) + is_gzip = compress == "gzip" || (compress == "auto" && endsWithAny(file, ".gz")) file = path.expand(file) # "~/foo/bar" if (append && (file=="" || file.exists(file))) { @@ -122,4 +122,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto", } haszlib = function() .Call(Cdt_has_zlib) - diff --git a/R/onAttach.R b/R/onAttach.R index 6ff17972b3..7a4a9be3ed 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -21,7 +21,7 @@ nth = getDTthreads(verbose=FALSE) if (dev) packageStartupMessagef("data.table %s IN DEVELOPMENT built %s%s using %d threads (see ?getDTthreads). ", v, d, g, nth, appendLF=FALSE) - else + else packageStartupMessagef("data.table %s using %d threads (see ?getDTthreads). ", v, nth, appendLF=FALSE) packageStartupMessagef("Latest news: r-datatable.com") if (gettext("TRANSLATION CHECK") != "TRANSLATION CHECK") diff --git a/R/openmp-utils.R b/R/openmp-utils.R index f19120724b..85f6b32570 100644 --- a/R/openmp-utils.R +++ b/R/openmp-utils.R @@ -13,4 +13,3 @@ setDTthreads = function(threads=NULL, restore_after_fork=NULL, percent=NULL, thr getDTthreads = function(verbose=getOption("datatable.verbose")) { .Call(CgetDTthreads, verbose) } - diff --git a/R/print.data.table.R b/R/print.data.table.R index 9e33e0c4da..f80a5833cd 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -230,7 +230,7 @@ format_list_item.default = function(x, ...) { char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) { trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) if (!is.character(x) || trunc.char <= 0L) return(x) - nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096 + nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096 nchar_chars = nchar(x, 'char') is_full_width = nchar_width > nchar_chars idx = pmin(nchar_width, nchar_chars) > trunc.char @@ -272,4 +272,3 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){ n, brackify(paste0(not_printed, classes)) ) } - diff --git a/R/setkey.R b/R/setkey.R index 84488a8034..62da9ebe88 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -353,4 +353,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE) } l } - diff --git a/R/setops.R b/R/setops.R index 1034b0f0fa..23dd6ec8f6 100644 --- a/R/setops.R +++ b/R/setops.R @@ -290,4 +290,3 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu } TRUE } - diff --git a/R/tables.R b/R/tables.R index e47a1a42e8..6a0209c868 100644 --- a/R/tables.R +++ b/R/tables.R @@ -60,4 +60,3 @@ tables = function(mb=type_size, order.col="NAME", width=80, } invisible(info) } - diff --git a/R/test.data.table.R b/R/test.data.table.R index 748e095128..43486e2784 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -7,7 +7,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F if (length(memtest.id)) { if (length(memtest.id)==1L) memtest.id = rep(memtest.id, 2L) # for convenience of supplying one id rather than always a range stopifnot(length(memtest.id)<=2L, # conditions quoted to user when false so "<=2L" even though following conditions rely on ==2L - !anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L]) + !anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L]) if (memtest==0L) memtest=1L # using memtest.id implies memtest } if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { @@ -134,7 +134,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190 on.exit(setwd(owd)) - + if (memtest) { catf("\n***\n*** memtest=%d. This should be the first call in a fresh R_GC_MEM_GROW=0 R session for best results. Ctrl-C now if not.\n***\n\n", memtest) if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.") diff --git a/R/timetaken.R b/R/timetaken.R index daa52c9f1f..ae4b384fcf 100644 --- a/R/timetaken.R +++ b/R/timetaken.R @@ -12,4 +12,3 @@ timetaken = function(started.at) tt = proc.time()-started.at # diff all 3 times paste0(format(tt[3L])," elapsed (", format(tt[1L]), " cpu)") } - diff --git a/R/transpose.R b/R/transpose.R index 684b135d47..6d6da67799 100644 --- a/R/transpose.R +++ b/R/transpose.R @@ -56,7 +56,7 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { if (!(sum(!is_named) == 1L && !is_named[n] && is.function(type.convert[[n]]))) stopf("When the argument 'type.convert' contains an unnamed element, it is expected to be the last element and should be a function. More than one unnamed element is not allowed unless all elements are functions with length equal to %d (the length of the transpose list or 'keep' argument if it is specified).", length(keep)) else { - fothers = type.convert[[n]] + fothers = type.convert[[n]] type.convert = type.convert[-n] } } @@ -90,4 +90,3 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { setattr(ans, 'names', names) ans } - diff --git a/R/uniqlist.R b/R/uniqlist.R index 2a610ab1a7..4f3600f832 100644 --- a/R/uniqlist.R +++ b/R/uniqlist.R @@ -21,4 +21,3 @@ uniqlengths = function(x, len) { ans = .Call(Cuniqlengths, as.integer(x), as.integer(len)) ans } - diff --git a/R/utils.R b/R/utils.R index a78e5450f7..feacd2b004 100644 --- a/R/utils.R +++ b/R/utils.R @@ -165,4 +165,3 @@ rss = function() { #5515 #5517 round(ans / 1024, 1L) # return MB # nocov end } - diff --git a/inst/atime/tests.R b/inst/atime/tests.R index a0635d063f..19c1b27a8b 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -1,7 +1,7 @@ # A function to customize R package metadata and source files to facilitate version-specific installation and testing. # -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) -# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) +# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. # It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. # # @param old.Package Current name of the package. @@ -29,7 +29,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { Package_ <- gsub(".", "_", old.Package, fixed = TRUE) new.Package_ <- paste0(Package_, "_", sha) pkg_find_replace( - "DESCRIPTION", + "DESCRIPTION", paste0("Package:\\s+", old.Package), paste("Package:", new.Package)) pkg_find_replace( @@ -55,13 +55,13 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { } # A list of performance tests. -# +# # Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: # - N: A numeric sequence of data sizes to vary. # - setup: An expression evaluated for every data size before measuring time/memory. -# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. # This must call a function from data.table using a syntax with double or triple colon prefix. -# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. # (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) # # Optional parameters that may be useful to configure tests: @@ -70,8 +70,9 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { # - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. # For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). # @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. +# nolint start: undesirable_operator_linter. ':::' needed+appropriate here. test.list <- list( - # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 "Test regression fixed in #4440" = list( pkg.edit.fun = pkg.edit.fun, @@ -88,7 +89,7 @@ test.list <- list( # Test based on: https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 - # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 + # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 "Test regression fixed in #5463" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), @@ -101,8 +102,9 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) ) +# nolint end: undesirable_operator_linter. diff --git a/vignettes/datatable-faq.Rmd b/vignettes/datatable-faq.Rmd index 97c11aeba6..1501497bc8 100644 --- a/vignettes/datatable-faq.Rmd +++ b/vignettes/datatable-faq.Rmd @@ -373,7 +373,7 @@ Yes: The general form is: -```{r, eval = FALSE} +```r DT[where, select|update, group by][order by][...] ... [...] ``` @@ -619,4 +619,4 @@ Please see [this answer](https://stackoverflow.com/a/10529888/403310). ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index c783c3fa7a..e63caee5d7 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -652,4 +652,4 @@ We will see how to *add/update/delete* columns *by reference* and how to combine ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-keys-fast-subset.Rmd b/vignettes/datatable-keys-fast-subset.Rmd index 3ec50640ca..d85f69ad86 100644 --- a/vignettes/datatable-keys-fast-subset.Rmd +++ b/vignettes/datatable-keys-fast-subset.Rmd @@ -157,7 +157,7 @@ Once you *key* a *data.table* by certain columns, you can subset by querying tho flights[.("JFK")] ## alternatively -# flights[J("JFK")] (or) +# flights[J("JFK")] (or) # flights[list("JFK")] ``` @@ -464,7 +464,7 @@ Now let us look at binary search approach (method 2). Recall from [Properties of Here's a very simple illustration. Let's consider the (sorted) numbers shown below: -```{r eval = FALSE} +``` 1, 5, 10, 19, 22, 23, 30 ``` @@ -499,4 +499,4 @@ Key based subsets are **incredibly fast** and are particularly useful when the t ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index 0536f11d69..3ec1f57d5b 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -420,4 +420,4 @@ DT[, cl, env = list(cl = cl)] ```{r cleanup, echo=FALSE} options(.opts) registerS3method("print", "data.frame", base::print.data.frame) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd index 09243c820f..bd2618d53c 100644 --- a/vignettes/datatable-sd-usage.Rmd +++ b/vignettes/datatable-sd-usage.Rmd @@ -169,7 +169,7 @@ lm_coef = sapply(models, function(rhs) { }) barplot(lm_coef, names.arg = sapply(models, paste, collapse = '/'), main = 'Wins Coefficient\nWith Various Covariates', - col = col16, las = 2L, cex.names = .8) + col = col16, las = 2L, cex.names = 0.8) ``` The coefficient always has the expected sign (better pitchers tend to have more wins and fewer runs allowed), but the magnitude can vary substantially depending on what else we control for. @@ -254,4 +254,4 @@ The above is just a short introduction of the power of `.SD` in facilitating bea ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +```