From bfc985b6622c489012c847e21217bbd163221d06 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Wed, 9 Jul 2025 10:12:52 +0200 Subject: [PATCH 01/10] fixes denominator specification for unsigned bytes and documentation of the header structure in readParmayMatrix --- R/readParmayMatrix.R | 10 ++++++---- man/readParmayMatrix.Rd | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/readParmayMatrix.R b/R/readParmayMatrix.R index 0a38d27..6478be8 100644 --- a/R/readParmayMatrix.R +++ b/R/readParmayMatrix.R @@ -11,8 +11,8 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], # The header information contains: # - remarks (empty in all files I have tested): bytes 1:512 - # - Count of the total number of packets in file (bytes 513:516) - # - header string = "MDTC" (bytes 517:520, if not there -> file corrupt) + # - header string = "MDTC" (bytes 513:516, if not there -> file corrupt) + # - Count of the total number of packets in file (bytes 517:520) # - range of the acc (bytes 521:522) and the gyro sensors (bytes 523:524) # Each packet contains the following information: @@ -202,7 +202,8 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], acc_readings = lapply(seq_along(acc_starts), function(i) { seek(con, acc_starts[i] - 1, "start") readings = readBin(con, what = "integer", size = 2, n = 3 * acc_count[i], endian = "little") - matrix(readings * (acc_dynrange / 32767), ncol = 3, byrow = T) + denominator = ifelse(readings >= 0, 32767, 32768) + matrix(readings * (acc_dynrange / denominator), ncol = 3, byrow = T) }) } @@ -214,7 +215,8 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], gyro_readings = lapply(seq_along(gyro_starts), function(i) { seek(con, gyro_starts[i] - 1, "start") readings = readBin(con, what = "integer", size = 2, n = 3 * gyro_count[i], endian = "little") - matrix(readings * (gyro_range / 32767), ncol = 3, byrow = T) + denominator = ifelse(readings >= 0, 32767, 32768) + matrix(readings * (gyro_range / denominator), ncol = 3, byrow = T) }) } prev_stops = gyro_stops diff --git a/man/readParmayMatrix.Rd b/man/readParmayMatrix.Rd index 15ea6fe..42c7192 100644 --- a/man/readParmayMatrix.Rd +++ b/man/readParmayMatrix.Rd @@ -44,8 +44,8 @@ \strong{Header Information:} \itemize{ \item Remarks: Bytes 1-512. - \item Total packets: Bytes 513-516. - \item Header string: Bytes 517-520. If not "MDTC", the file is considered corrupt. + \item Header string: Bytes 513-516. If not "MDTC", the file is considered corrupt. + \item Total packets: Bytes 517-520. \item Accelerometer dynamic range: Bytes 521-522. \item Gyroscope range: Bytes 523-524. } From 32fc553cae3943a2b8906903c06d7899387d5620 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Wed, 9 Jul 2025 10:16:15 +0200 Subject: [PATCH 02/10] update NEWS --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 812de5e..e2fbd5a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# Changes in version 1.0.6 (release date:?-?-2025) + +- Parmay Matrix: + - Fixed minor bug in the conversion of accelerometer and gyroscope readings #82 + - Correct documentation for header structure of bin files in readParmayMatrix #82 + # Changes in version 1.0.5 (release date:09-05-2025) - Fitbit: Now also loads heart rate data #78 From 26e14910ec4a5c3e1ca4ca8e1c05aa2f381e0b80 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Thu, 4 Sep 2025 12:08:06 +0200 Subject: [PATCH 03/10] Update Parmay Matrix website in documentation --- README.md | 2 +- man/readParmayMatrix.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d95d2f6..f8a2dc0 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Brand | Device name | File extension | Data type | GGIRread function Axivity Ltd https://axivity.com/ | AX3 and AX6 | .cwa | raw gravitational units |readAxivity ActivInsights Ltd https://activinsights.com/ | GENEActiv Original and Sleep | .bin | raw gravitational units | readGENEActiv Unilever Discover Ltd | Genea (no longer manufactured) | .bin | raw gravitational units | readGenea -Parmay Tech https://www.parmaytech.com/ | Matrix | .bin, .BIN | raw gravitational units | readParmayMatrix +Parmay Tech https://www.matrixwearable.net/ | Matrix | .bin, .BIN | raw gravitational units | readParmayMatrix ActiGraph | ??? | .csv | count data | readActigraphCount Actiwatch | ??? | .csv and .awd | count data | readActiwatchCount Actical | ??? | .csv | count data | readActicalCount diff --git a/man/readParmayMatrix.Rd b/man/readParmayMatrix.Rd index 42c7192..0346544 100644 --- a/man/readParmayMatrix.Rd +++ b/man/readParmayMatrix.Rd @@ -112,9 +112,9 @@ } \references{ For more details on Matrix devices', see: - \url{https://www.parmaytech.com/devices/en-matrix} + \url{https://www.matrixwearable.net/} For additional details on Matrix bin/BIN files structure, please contact manufacturer: - \url{https://www.parmaytech.com/contact} + \url{https://www.matrixwearable.net/#!/contact} } \seealso{ \code{\link{resample}} for resampling sensor data. From c66e078eb69c9776aad5e96ef36edbc6ac806ae9 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Thu, 4 Sep 2025 12:10:02 +0200 Subject: [PATCH 04/10] update NEWS --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e2fbd5a..6c7226e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ - Parmay Matrix: - Fixed minor bug in the conversion of accelerometer and gyroscope readings #82 - - Correct documentation for header structure of bin files in readParmayMatrix #82 + - Correct documentation for header structure of bin files in readParmayMatrix #83 + - Update Parmay matrix website in README and readParmayMatrix function documentation #87 # Changes in version 1.0.5 (release date:09-05-2025) From a697927ee7fccf074f472d28826b5af3a5d77946 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Wed, 15 Oct 2025 11:10:18 +0200 Subject: [PATCH 05/10] revised handling of number of declared packets when it does not match with number of observed packets --- R/readParmayMatrix.R | 59 +++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/R/readParmayMatrix.R b/R/readParmayMatrix.R index 6478be8..a635b05 100644 --- a/R/readParmayMatrix.R +++ b/R/readParmayMatrix.R @@ -57,11 +57,9 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], total_packets = readBin(tPackets_bytes, "integer", size = 4, endian = "little") # Validate start and end packets - lastchunk = FALSE if (is.null(end)) end = total_packets if (end >= total_packets) { end = total_packets - lastchunk = TRUE } if (start < 1) start = 1 if (start > total_packets) return(NULL) @@ -83,10 +81,27 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], packet_starti = find_matrix_packet_start(filename, packet_header_bytes) packet_endi = c(packet_starti[-1] - 1, file.info(filename)$size) + # save number of packets declared and observed + n_declared = total_packets + n_observed_file = length(packet_starti) + # validate packet headers (i.e., nr. of appearances of MDTCPACK in files) - if (length(packet_starti) != total_packets) { - stop(paste0(basename(filename), ": Probably corrupted file")) - } + # in case that empty packets have been added at the end, only use valid packets + n_use = min(total_packets, length(packet_starti)) + packet_starti = packet_starti[seq_len(n_use)] + packet_endi = packet_endi[seq_len(n_use)] + + # Apply chunking AFTER truncation + i_from = max(1, start) + i_to = if (is.null(end)) n_use else min(end, n_use) + idx = seq.int(i_from, i_to) + n_rows = length(idx) + + packet_starti = packet_starti[idx] + packet_endi = packet_endi[idx] + + # lastchunk should reflect the truncated space + lastchunk = (i_to == n_use) # start time # starttime_indices = (packet_starti[1] + 12):(packet_starti[1] + 15) @@ -99,10 +114,6 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], starttime_posix = as.POSIXct(starttime, origin = "1970-1-1", tz = configtz) } - # Select chunk of interest - packet_starti = packet_starti[start:end] - packet_endi = packet_endi[start:end] - # ------------------------------------------------------------------------- # 3 - Check packets integrity with CRC32 checksum @@ -114,15 +125,15 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], crc32_stored = ifelse(crc32_stored_signed < 0, crc32_stored_signed + 2^32, crc32_stored_signed) # observed CRC32 - crc32_observed <- vapply(seq_along(packet_starti), function(i) { + crc32_observed = vapply(seq_along(packet_starti), function(i) { seek(con, packet_starti[i] + 11, "start") # Move to correct position - size <- packet_endi[i] - packet_starti[i] - 11 # Calculate the actual size + size = packet_endi[i] - packet_starti[i] - 11 # Calculate the actual size if (size <= 0) return(NA_real_) # Handle edge cases where size is invalid - crc32_observed_raw <- readBin(con, what = "raw", n = size, endian = "little") + crc32_observed_raw = readBin(con, what = "raw", n = size, endian = "little") - crc32_hex <- digest::digest(crc32_observed_raw, algo = "crc32", serialize = FALSE) + crc32_hex = digest::digest(crc32_observed_raw, algo = "crc32", serialize = FALSE) as.numeric(paste0("0x", crc32_hex)) }, numeric(1)) @@ -131,16 +142,18 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], # Initialize Quality-Check (QC) data frame QClog = data.frame( - checksum_pass = rep(TRUE, length(start:end)), - blockID = start:end, - start = integer(length(start:end)), - end = integer(length(start:end)), - blockLengthSeconds = numeric(length(start:end)), - frequency_set = numeric(length(start:end)), - frequency_observed = numeric(length(start:end)), - imputed = logical(length(start:end)), - gap_with_previous_block_secs = numeric(length(start:end)), - start_time_adjustment_secs = numeric(length(start:end)) + checksum_pass = rep(TRUE, n_rows), + blockID = idx, + start = integer(n_rows), + end = integer(n_rows), + blockLengthSeconds = numeric(n_rows), + frequency_set = numeric(n_rows), + frequency_observed = numeric(n_rows), + imputed = logical(n_rows), + gap_with_previous_block_secs = numeric(n_rows), + start_time_adjustment_secs = numeric(n_rows), + declared_packets = rep(n_declared, n_rows), + observed_packets = rep(n_observed_file, n_rows) ) # log corrupt packets From 97f038bb309a52618e406951a80fc74d926a41af Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Wed, 15 Oct 2025 11:46:05 +0200 Subject: [PATCH 06/10] workflows: replace test-coverage and R-CMD-check standard for latest version in r-lib/actions, and small adjustment to r_ubuntuOnly.yml --- .github/workflows/r_codecov.yml | 23 ++++++++++--- .github/workflows/r_multiOS.yml | 10 +++--- .github/workflows/r_ubuntuOnly.yml | 53 +++++++----------------------- 3 files changed, 35 insertions(+), 51 deletions(-) diff --git a/.github/workflows/r_codecov.yml b/.github/workflows/r_codecov.yml index 4ff0e90..eb448e9 100644 --- a/.github/workflows/r_codecov.yml +++ b/.github/workflows/r_codecov.yml @@ -6,7 +6,9 @@ on: pull_request: branches: [main, master] -name: test-coverage +name: test-coverage.yaml + +permissions: read-all jobs: test-coverage: @@ -23,23 +25,34 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + print(cov) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v5 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + files: ./cobertura.xml + plugins: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results @@ -47,4 +60,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: coverage-test-failures - path: ${{ runner.temp }}/package \ No newline at end of file + path: ${{ runner.temp }}/package diff --git a/.github/workflows/r_multiOS.yml b/.github/workflows/r_multiOS.yml index de55301..55219db 100644 --- a/.github/workflows/r_multiOS.yml +++ b/.github/workflows/r_multiOS.yml @@ -4,9 +4,10 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: R-CMD-check +name: R-CMD-check.yaml + +permissions: read-all jobs: R-CMD-check: @@ -18,7 +19,7 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} @@ -46,4 +47,5 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: - upload-snapshots: true \ No newline at end of file + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' \ No newline at end of file diff --git a/.github/workflows/r_ubuntuOnly.yml b/.github/workflows/r_ubuntuOnly.yml index d9e3f98..e49d56c 100644 --- a/.github/workflows/r_ubuntuOnly.yml +++ b/.github/workflows/r_ubuntuOnly.yml @@ -23,8 +23,6 @@ jobs: - {os: ubuntu-latest, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: @@ -33,47 +31,18 @@ jobs: - run: tlmgr --version - uses: r-lib/actions/setup-r@v2 with: - r-version: ${{ matrix.config.r }} + r-version: 'release' + use-public-rspm: true - uses: r-lib/actions/setup-pandoc@v2 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v4 + + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual"), check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + extra-packages: any::rcmdcheck + needs: check + cache-version: 2 + use-public-rspm: true + + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check \ No newline at end of file + upload-snapshots: true \ No newline at end of file From 19db966f869f35a1e9a0ae78a55bfee2a498f8f0 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Wed, 15 Oct 2025 11:59:10 +0200 Subject: [PATCH 07/10] update NEWS.md --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index f4d8445..40663cd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ - Fixed minor bug in the conversion of accelerometer and gyroscope readings #82 - Correct documentation for header structure of bin files in readParmayMatrix #83 - Update Parmay matrix website in README and readParmayMatrix function documentation #87 + - Enhanced robustness when reading partially corrupted files, valid data packets are now processed, while only corrupted sections are skipped #88 + +- Workflows: Updated to the latest r-lib/actions examples and fixed r-cmd-check-basic to pass # Changes in version 1.0.6 (GitHub-only-release date:16-07-2025) From f14bdea770e10de04e2bb5660b7748b557798cc6 Mon Sep 17 00:00:00 2001 From: jhmigueles Date: Thu, 30 Oct 2025 13:19:34 +0100 Subject: [PATCH 08/10] minor fixes for imputing temperature when no enough data available to calculate the mean --- R/readParmayMatrix.R | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/R/readParmayMatrix.R b/R/readParmayMatrix.R index a635b05..1ac2eeb 100644 --- a/R/readParmayMatrix.R +++ b/R/readParmayMatrix.R @@ -279,11 +279,33 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], x[corrupt_packets] = NA prev_index = ifelse(any(!is.na(x[1:(i - 1)])), max(x[1:(i - 1)], na.rm = T), NA) - prev_temp = temp_readings[[prev_index]][nrow(temp_readings[[prev_index]]), ] + if (temp_count[prev_index] > 0) { + prev_temp = temp_readings[[prev_index]][nrow(temp_readings[[prev_index]]), ] + } else { + prev_temp = NA + } next_index = ifelse(any(!is.na(x[(i + 1):length(x)])), min(x[(i + 1):length(x)], na.rm = T), NA) - next_temp = temp_readings[[next_index]][1, ] - mean_temp = (prev_temp + next_temp) / 2 + if (!is.na(next_index)) { + if (temp_count[next_index] > 0) { + next_temp = temp_readings[[next_index]][1, ] + } else { + next_temp = NA + } + if (!is.na(prev_temp[1]) & !is.na(next_temp[1])) { + mean_temp = (prev_temp + next_temp) / 2 + } else { + mean_temp = NA + } + } else { + # if the last packet is corrupted, then impute by last temperature observed + if (!is.na(prev_temp[1])) { + mean_temp = prev_temp + } else { + mean_temp = NA + } + } + temp_readings[[i]] = matrix(mean_temp, nrow = nrow(temp_readings[[i]]), ncol = 2, byrow = T) } @@ -404,6 +426,7 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], data = data.frame(time = required_timepoints) # add sensors if available + browser() if (any(acc_count > 0) & read_acc) data[, c("acc_x", "acc_y", "acc_z")] = acc_resampled if (any(gyro_count > 0) & read_gyro) data[, c("gyro_x", "gyro_y", "gyro_z")] = gyro_resampled if (any(temp_count > 0) & read_temp) data[, c("bodySurface_temp", "ambient_temp")] = temp_resampled From b3be00cd12157fcf718fe1b1eb0738fc8100ce83 Mon Sep 17 00:00:00 2001 From: jhmigueles Date: Wed, 5 Nov 2025 20:08:50 +0100 Subject: [PATCH 09/10] Improved resampling of sensors in readParmayMatrix using acc sensor as referent for timestamps AND improved handling of first-last corrupted packets --- R/readParmayMatrix.R | 219 ++++++++++++++++++++++++++++++++----------- 1 file changed, 166 insertions(+), 53 deletions(-) diff --git a/R/readParmayMatrix.R b/R/readParmayMatrix.R index 1ac2eeb..f44b671 100644 --- a/R/readParmayMatrix.R +++ b/R/readParmayMatrix.R @@ -35,8 +35,112 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], # - checksum is used to verify integrity of data in each packet # - resampling is used with resample function to make the frequency stable # - Gaps in data are not expected - # ------------------------------------------------------------------------- + # Helper function to resample matrix sensor signals onto a common time grid, + # usually the accelerometer's reference timeline. + # data = matrix with the sensor data + # timestamps = numeric vector of timestamps corresponding to rows fo `data` + # grid = numeric vector of target timestamps (usually, accelerometer resampled timestamps) + # interpolationType = + resample_to_grid = function(data, timestamps, grid, interpolationType = 1) { + + ## ---- 0. Coerce shapes early ---------------------------------------------- + if (is.matrix(timestamps) || is.data.frame(timestamps)) { + timestamps = timestamps[, 1] + } + timestamps = as.vector(timestamps) + data = as.matrix(data) + + common_len = min(NROW(data), length(timestamps)) + if (common_len == 0L) { + out = matrix(NA_real_, nrow = length(grid), ncol = NCOL(data)) + colnames(out) = colnames(data) + return(out) + } + data = data[seq_len(common_len), , drop = FALSE] + timestamps = timestamps[seq_len(common_len)] + + ## ---- 1. Normalize input types ------------------------------------------ + # Convert POSIXct timestamps to numeric (seconds since epoch) if necessary + is_posix = inherits(timestamps, "POSIXct") + tz = if (is_posix) attr(timestamps, "tzone") else NULL + ts_num = if (is_posix) as.numeric(timestamps) else as.numeric(timestamps) + grid_num = if (inherits(grid, "POSIXct")) as.numeric(grid) else as.numeric(grid) + + data = as.matrix(data) + + ## ---- 2. Clean and sort input data -------------------------------------- + # Remove rows with NA timestamps or NA values in any data column + ok = is.finite(ts_num) & rowSums(!is.finite(data)) == 0 + ts_num = ts_num[ok] + data = data[ok, , drop = FALSE] + + # Ensure strictly increasing timestamps + ord = order(ts_num) + ts_num = ts_num[ord] + data = data[ord, , drop = FALSE] + + ## ---- 3. Remove duplicate timestamps ------------------------------------ + # Duplicate timestamps cause division-by-zero errors in interpolation. + # Combine duplicates by taking the column-wise mean. + if (any(duplicated(ts_num))) { + split_ix = split(seq_along(ts_num), ts_num) + ts_unique = as.numeric(names(split_ix)) + data_unique = do.call(rbind, lapply(split_ix, function(ix) { + colMeans(data[ix, , drop = FALSE]) + })) + ord2 = order(ts_unique) + ts_num = ts_unique[ord2] + data = data_unique[ord2, , drop = FALSE] + } + + ## ---- 4. Ensure matching length and valid range ------------------------- + # The C++ function assumes length(rawTime) == nrow(raw) + n_raw = min(length(ts_num), nrow(data)) + if (n_raw < 2L) { + # Not enough data to interpolate → return all NA + out = matrix(NA_real_, nrow = length(grid_num), ncol = ncol(data)) + colnames(out) = colnames(data) + return(out) + } + + ts_num = ts_num[seq_len(n_raw)] + data = data[seq_len(n_raw), , drop = FALSE] + t0 = ts_num[1] + t1 = ts_num[n_raw] + + ## ---- 5. Clip the grid to the sensor’s valid range ---------------------- + # Only pass grid points within [t0, t1] to C++ for interpolation. + # The rest will be filled with NA later. + inside = (grid_num >= t0) & (grid_num <= t1) + grid_in = grid_num[inside] + + ## ---- 6. Interpolate within valid range --------------------------------- + res_in = matrix(NA_real_, nrow = sum(inside), ncol = ncol(data)) + if (length(grid_in) > 0L) { + res_in = resample( + raw = data, + rawTime = ts_num, + time = grid_in, + stop = nrow(data), + type = interpolationType + ) + } + + ## ---- 7. Pad output to full grid length --------------------------------- + # Build output with same number of rows as the target grid. + # Fill with NA outside sensor coverage, and insert resampled data inside. + out = matrix(NA_real_, nrow = length(grid_num), ncol = ncol(data)) + if (length(grid_in) > 0L) { + out[inside, ] = res_in + } + + # Preserve column names for clarity in downstream use + colnames(out) = colnames(data) + out + } + # ------------------------------------------------------------------------- + # Main code # 1 - read file and extract header information # open connection to read the binary file @@ -215,6 +319,10 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], acc_readings = lapply(seq_along(acc_starts), function(i) { seek(con, acc_starts[i] - 1, "start") readings = readBin(con, what = "integer", size = 2, n = 3 * acc_count[i], endian = "little") + # make sure number of readings is multiple of 3 (might not be in corrupted packets) + n_triplets = length(readings) %/% 3 + readings_ok = 3 * n_triplets + readings = readings[1:readings_ok] denominator = ifelse(readings >= 0, 32767, 32768) matrix(readings * (acc_dynrange / denominator), ncol = 3, byrow = T) }) @@ -228,6 +336,10 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], gyro_readings = lapply(seq_along(gyro_starts), function(i) { seek(con, gyro_starts[i] - 1, "start") readings = readBin(con, what = "integer", size = 2, n = 3 * gyro_count[i], endian = "little") + # make sure number of readings is multiple of 3 (might not be in corrupted packets) + n_triplets = length(readings) %/% 3 + readings_ok = 3 * n_triplets + readings = readings[1:readings_ok] denominator = ifelse(readings >= 0, 32767, 32768) matrix(readings * (gyro_range / denominator), ncol = 3, byrow = T) }) @@ -241,6 +353,10 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], temp_readings = lapply(seq_along(temp_starts), function(i) { seek(con, temp_starts[i] - 1, "start") readings = readBin(con, what = "integer", size = 2, n = 2 * temp_count[i], endian = "little") + # make sure number of readings is multiple of 2 (might not be in corrupted packets) + n_triplets = length(readings) %/% 2 + readings_ok = 2 * n_triplets + readings = readings[1:readings_ok] matrix(readings / 10, ncol = 2, byrow = T) }) } @@ -253,6 +369,10 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], heart_readings = lapply(seq_along(heart_starts), function(i) { seek(con, heart_starts[i] - 1, "start") readings = readBin(con, what = "integer", size = 2, n = 2 * heart_count[i], endian = "little") + # make sure number of readings is multiple of 3 (might not be in corrupted packets) + n_triplets = length(readings) %/% 2 + readings_ok = 2 * n_triplets + readings = readings[1:readings_ok] matrix(readings, ncol = 2, byrow = T) }) } @@ -279,11 +399,18 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], x[corrupt_packets] = NA prev_index = ifelse(any(!is.na(x[1:(i - 1)])), max(x[1:(i - 1)], na.rm = T), NA) - if (temp_count[prev_index] > 0) { - prev_temp = temp_readings[[prev_index]][nrow(temp_readings[[prev_index]]), ] + if (!is.na(prev_index)) { + if (temp_count[prev_index] > 0) { + prev_temp = temp_readings[[prev_index]][nrow(temp_readings[[prev_index]]), ] + } else { + prev_temp = NA + } } else { - prev_temp = NA + # if the first packet is corrupted, then impute by first non-corrupted temperature observed + first_valid_temp = which(!1:length(temp_count) %in% corrupt_packets)[1] + prev_temp = temp_readings[[first_valid_temp]][1,] } + # next index next_index = ifelse(any(!is.na(x[(i + 1):length(x)])), min(x[(i + 1):length(x)], na.rm = T), NA) if (!is.na(next_index)) { @@ -292,19 +419,18 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], } else { next_temp = NA } - if (!is.na(prev_temp[1]) & !is.na(next_temp[1])) { - mean_temp = (prev_temp + next_temp) / 2 - } else { - mean_temp = NA - } } else { - # if the last packet is corrupted, then impute by last temperature observed - if (!is.na(prev_temp[1])) { - mean_temp = prev_temp - } else { - mean_temp = NA - } + next_temp = NA } + + if (!is.na(prev_temp[1]) & !is.na(next_temp[1])) { + mean_temp = (prev_temp + next_temp) / 2 + } else if (is.na(prev_temp[1])) { + mean_temp = next_temp + } else if (is.na(next_temp[1])) { + # if the last packet is corrupted, then impute by last temperature observed + mean_temp = prev_temp + } temp_readings[[i]] = matrix(mean_temp, nrow = nrow(temp_readings[[i]]), ncol = 2, byrow = T) @@ -376,57 +502,44 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], # Resample data to defined sampling frequency if (any(acc_count > 0) & read_acc) { # accelerometer has been activated - required_timepoints = seq(from = acc_timestamps[1], to = acc_timestamps[length(acc_timestamps)], - by = 1/sf) + t_start = acc_timestamps[1] + t_end = acc_timestamps[nrow(acc_data)] + n_required_timepoints = floor((t_end - t_start) * sf) + 1L + required_timepoints = seq(from = acc_timestamps[1], by = 1/sf, + length.out = n_required_timepoints) acc_resampled = resample(raw = acc_data, rawTime = acc_timestamps, time = required_timepoints, stop = nrow(acc_data), type = interpolationType) } if (any(gyro_count > 0) & read_gyro) { # gyroscope has been activated - # make sure measurement covers full recording length: - if (min(gyro_timestamps) > min(required_timepoints)) { - gyro_timestamps = rbind(c(NA, NA), gyro_data) - } - if (max(gyro_timestamps) < max(required_timepoints)) { - gyro_timestamps = c(gyro_timestamps, max(required_timepoints)) - gyro_data = rbind(gyro_data, c(NA, NA)) - } - gyro_resampled = resample(raw = gyro_data, rawTime = gyro_timestamps, - time = required_timepoints, - stop = nrow(gyro_data), type = interpolationType) + gyro_resampled = resample_to_grid( + data = gyro_data, + timestamps = gyro_timestamps, + grid = required_timepoints, + interpolationType = interpolationType + ) } - if (any(temp_count > 0) & read_temp) { # temperature has been activated - # make sure measurement covers full recording length: - if (min(temp_timestamps) > min(required_timepoints)) { - temp_timestamps = rbind(c(NA, NA), temp_data) - } - if (max(temp_timestamps) < max(required_timepoints)) { - temp_timestamps = c(temp_timestamps, max(required_timepoints)) - temp_data = rbind(temp_data, c(NA, NA)) - } - temp_resampled = resample(raw = temp_data, rawTime = temp_timestamps, - time = required_timepoints, - stop = nrow(temp_data), type = interpolationType) + if (any(temp_count > 0) & read_temp) { + temp_resampled = resample_to_grid( + data = temp_data, + timestamps = temp_timestamps, + grid = required_timepoints, + interpolationType = interpolationType + ) } - if (any(heart_count > 0) & read_heart) { # heart rate has been activated - # make sure measurement covers full recording length: - if (min(heart_timestamps) > min(required_timepoints)) { - heart_timestamps = rbind(c(NA, NA), heart_data) - } - if (max(heart_timestamps) < max(required_timepoints)) { - heart_timestamps = c(heart_timestamps, max(required_timepoints)) - heart_data = rbind(heart_data, c(NA, NA)) - } - heart_resampled = resample(raw = heart_data, rawTime = heart_timestamps, - time = required_timepoints, - stop = nrow(heart_data), type = interpolationType) + if (any(heart_count > 0) & read_heart) { + heart_resampled = resample_to_grid( + data = heart_data, + timestamps = heart_timestamps, + grid = required_timepoints, + interpolationType = interpolationType + ) } # build output data ----------- data = data.frame(time = required_timepoints) # add sensors if available - browser() if (any(acc_count > 0) & read_acc) data[, c("acc_x", "acc_y", "acc_z")] = acc_resampled if (any(gyro_count > 0) & read_gyro) data[, c("gyro_x", "gyro_y", "gyro_z")] = gyro_resampled if (any(temp_count > 0) & read_temp) data[, c("bodySurface_temp", "ambient_temp")] = temp_resampled From f50bb3df73835c1f76cd4ff07a63a04b9e58393e Mon Sep 17 00:00:00 2001 From: jhmigueles Date: Wed, 5 Nov 2025 22:34:07 +0100 Subject: [PATCH 10/10] implemented less conservative strategy by which all packets in file are read, then GGIR will do usual data quality checks --- R/readParmayMatrix.R | 41 +++++++++++++++++++------- tests/testthat/test_readParmayMatrix.R | 4 +-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/R/readParmayMatrix.R b/R/readParmayMatrix.R index f44b671..b3cad8d 100644 --- a/R/readParmayMatrix.R +++ b/R/readParmayMatrix.R @@ -189,15 +189,14 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], n_declared = total_packets n_observed_file = length(packet_starti) - # validate packet headers (i.e., nr. of appearances of MDTCPACK in files) - # in case that empty packets have been added at the end, only use valid packets - n_use = min(total_packets, length(packet_starti)) + # Use all packets available in the file (even if empty packets have been added at the end) + n_use = n_observed_file packet_starti = packet_starti[seq_len(n_use)] packet_endi = packet_endi[seq_len(n_use)] # Apply chunking AFTER truncation i_from = max(1, start) - i_to = if (is.null(end)) n_use else min(end, n_use) + i_to = n_use idx = seq.int(i_from, i_to) n_rows = length(idx) @@ -257,7 +256,8 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], gap_with_previous_block_secs = numeric(n_rows), start_time_adjustment_secs = numeric(n_rows), declared_packets = rep(n_declared, n_rows), - observed_packets = rep(n_observed_file, n_rows) + observed_packets = rep(n_observed_file, n_rows), + acc_recorded = TRUE ) # log corrupt packets @@ -299,7 +299,6 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], seek(con, packet_starti[i] + 31, "start") # Move to correct position readBin(con, what = "integer", size = 4, endian = "little") }, numeric(1)) - # Process timestamps and calculate sampling frequency packets_dur_s = packets_t1 - packets_t0 sf_acc_observed = acc_count / packets_dur_s @@ -312,6 +311,20 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], QClog[, "frequency_set"] = sf QClog[, "frequency_observed"] = sf_acc_observed + # At the moment the function requires that accelerometer data has been + # recorded to work, as accelerometer timestamps are used as reference time + if (sum(acc_count) == 0) { + QClog[, "acc_recording"] = FALSE + return(list( + QClog = QClog, + data = matrix(nrow = 0, ncol = 3), + header = list(sf = sf, + acc_dynrange = acc_dynrange, + starttime = starttime_posix), + lastchunk = TRUE + )) + } + # build data structure lists # acc acc_starts = packet_starti + 36; acc_stops = acc_starts + 6*acc_count - 1 @@ -408,7 +421,11 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], } else { # if the first packet is corrupted, then impute by first non-corrupted temperature observed first_valid_temp = which(!1:length(temp_count) %in% corrupt_packets)[1] - prev_temp = temp_readings[[first_valid_temp]][1,] + if (is.na(first_valid_temp)) { + prev_temp = NA + } else { + prev_temp = temp_readings[[first_valid_temp]][1,] + } } # next index next_index = ifelse(any(!is.na(x[(i + 1):length(x)])), @@ -422,15 +439,17 @@ readParmayMatrix = function(filename, output = c("all", "sf", "dynrange")[1], } else { next_temp = NA } - if (!is.na(prev_temp[1]) & !is.na(next_temp[1])) { mean_temp = (prev_temp + next_temp) / 2 - } else if (is.na(prev_temp[1])) { + } else if (is.na(prev_temp[1]) & !is.na(next_temp[1])) { mean_temp = next_temp - } else if (is.na(next_temp[1])) { + } else if (!is.na(prev_temp[1]) & is.na(next_temp[1])) { # if the last packet is corrupted, then impute by last temperature observed mean_temp = prev_temp - } + } else { + # safe conduct, if both are NA, then leave it without imputation + mean_temp = temp_readings[[i]] + } temp_readings[[i]] = matrix(mean_temp, nrow = nrow(temp_readings[[i]]), ncol = 2, byrow = T) diff --git a/tests/testthat/test_readParmayMatrix.R b/tests/testthat/test_readParmayMatrix.R index 0afbcb2..76b3142 100644 --- a/tests/testthat/test_readParmayMatrix.R +++ b/tests/testthat/test_readParmayMatrix.R @@ -72,8 +72,8 @@ test_that("Identification of corrupt files and foreseen errors", { expect_equal(BIN$data$acc_x[1], 0) expect_equal(BIN$data$acc_y[1], 0) expect_equal(BIN$data$acc_z[1], 1) - expect_true(is.na(BIN$data$bodySurface_temp[1])) - expect_true(is.na(BIN$data$ambient_temp[1])) + expect_false(is.na(BIN$data$bodySurface_temp[1])) + expect_false(is.na(BIN$data$ambient_temp[1])) # also, identification of full corrupted file binfile = system.file("testfiles/mtx_corrupted_full.bin", package = "GGIRread")[1] expect_error(readParmayMatrix(filename = binfile, desiredtz = "Europe/Berlin",