@@ -397,11 +397,17 @@ validate_epi_archive <- function(x) {
397
397
# ' @importFrom dplyr filter
398
398
apply_compactify <- function (df , keys , abs_tol = 0 ) {
399
399
df %> %
400
- arrange(!!! keys ) %> %
401
- filter(if_any(
402
- c(everything(), - version ), # all non-version columns
403
- ~ ! is_locf(. , abs_tol )
404
- ))
400
+ arrange(!!! keys ) %> % # in case not archive DT & key
401
+ filter(
402
+ if_any(
403
+ all_of(keys ) & ! " version" , # epikeytimes
404
+ ~ ! is_locf(.x , abs_tol , TRUE )
405
+ ) |
406
+ if_any(
407
+ ! all_of(keys ), # measurement columns
408
+ ~ ! is_locf(.x , abs_tol , FALSE )
409
+ )
410
+ )
405
411
}
406
412
407
413
# ' get the entries that `compactify` would remove
@@ -410,23 +416,47 @@ apply_compactify <- function(df, keys, abs_tol = 0) {
410
416
removed_by_compactify <- function (df , keys , abs_tol ) {
411
417
df %> %
412
418
arrange(!!! keys ) %> %
413
- filter(if_all(
414
- c(everything(), - version ),
415
- ~ is_locf(. , abs_tol )
416
- )) # nolint: object_usage_linter
419
+ filter(
420
+ if_all(
421
+ all_of(keys ) & ! " version" , # epikeytimes
422
+ ~ is_locf(.x , abs_tol , TRUE )
423
+ ) &
424
+ if_all(
425
+ ! all_of(keys ), # measurement columns
426
+ ~ is_locf(.x , abs_tol , FALSE )
427
+ )
428
+ )
417
429
}
418
430
419
431
# ' Checks to see if a value in a vector is LOCF
420
- # ' @description
421
- # ' LOCF meaning last observation carried forward. lags the vector by 1, then
422
- # ' compares with itself. For doubles it compares whether the absolute
423
- # ' difference is `<= abs_tol`; otherwise it uses equality. `NA`'s and `NaN`'s
424
- # ' are considered equal to themselves and each other.
432
+ # ' @description LOCF meaning last observation carried forward (to later
433
+ # ' versions). Lags the vector by 1, then compares with itself. If `is_key` is
434
+ # ' `TRUE`, only values that are exactly the same between the lagged and
435
+ # ' original are considered LOCF. If `is_key` is `FALSE` and `vec` is a vector
436
+ # ' of numbers ([`base::is.numeric`]), then approximate equality will be used,
437
+ # ' checking whether the absolute difference between each pair of entries is
438
+ # ' `<= abs_tol`; if `vec` is something else, then exact equality is used
439
+ # ' instead.
440
+ # '
441
+ # ' @details
442
+ # '
443
+ # ' We include epikey-time columns in LOCF comparisons as part of an optimization
444
+ # ' to avoid slower grouped operations while still ensuring that the first
445
+ # ' observation for each time series will not be marked as LOCF. We test these
446
+ # ' key columns for exact equality to prevent chopping off consecutive
447
+ # ' time_values during flat periods when `abs_tol` is high.
448
+ # '
449
+ # ' We use exact equality for non-`is.numeric` double/integer columns such as
450
+ # ' dates, datetimes, difftimes, `tsibble::yearmonth`s, etc., as these may be
451
+ # ' used as part of re-indexing or grouping procedures, and we don't want to
452
+ # ' change the number of groups for those operations when we remove LOCF data
453
+ # ' during compactification.
454
+ # '
425
455
# ' @importFrom dplyr lag if_else
426
456
# ' @keywords internal
427
- is_locf <- function (vec , abs_tol ) { # nolint: object_usage_linter
457
+ is_locf <- function (vec , abs_tol , is_key ) { # nolint: object_usage_linter
428
458
lag_vec <- dplyr :: lag(vec )
429
- if (typeof (vec ) == " double " ) {
459
+ if (is.numeric (vec ) && ! is_key ) {
430
460
res <- if_else(
431
461
! is.na(vec ) & ! is.na(lag_vec ),
432
462
abs(vec - lag_vec ) < = abs_tol ,
0 commit comments