Skip to content

Commit

Permalink
Merge pull request #233 from grunwaldlab/fix-231
Browse files Browse the repository at this point in the history
Fix genind2genalex() error with SNP data
  • Loading branch information
zkamvar authored Jan 31, 2021
2 parents b39bad9 + 3b8e325 commit 0ab416f
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 90 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
^vignettes/*.bbl$
^vignettes/*.log$
^vignettes/*.toc$
vignettes/mlg.Rmd
vignettes/poppr_manual.Rmd
^tools/*$
^\.travis\.yml$
^vignettes/figure
Expand Down
54 changes: 18 additions & 36 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
# NOTE: This workflow is overkill for most R packages
# check-standard.yaml is likely a better choice
# usethis::use_github_action("check-standard") will install it.
#
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
push:
branches:
- main
- master
pull_request:
branches:
- main
- master

name: R-CMD-check
Expand All @@ -24,15 +22,10 @@ jobs:
fail-fast: false
matrix:
config:
- {os: macOS-latest, cov: 'true', r: 'release'}
- {os: windows-latest,cov: 'false', r: 'release'}
- {os: windows-latest,cov: 'false', r: '3.6'}
- {os: ubuntu-16.04, cov: 'false', r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest", http-user-agent: "R/4.0.0 (ubuntu-16.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
- {os: ubuntu-16.04, cov: 'false', r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, cov: 'false', r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, cov: 'false', r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, cov: 'false', r: '3.4', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: ubuntu-16.04, cov: 'false', r: '3.3', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
- {os: windows-latest, r: 'release'}
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
Expand All @@ -42,12 +35,18 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v1
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}

- uses: r-lib/actions/setup-pandoc@master
- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-tinytex@v1
- name: Install LaTeX packages
run: |
tlmgr install colortbl
tlmgr install mathtools
tlmgr install preprint
tlmgr install natbib
- name: Query dependencies
run: |
Expand All @@ -58,7 +57,7 @@ jobs:

- name: Cache R packages
if: runner.os != 'Windows'
uses: actions/cache@v1
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
Expand All @@ -70,37 +69,20 @@ jobs:
while read -r cmd
do
eval sudo $cmd
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "16.04"))')
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
- name: Install dependencies
run: |
remotes::install_deps(dependencies = TRUE)
remotes::install_cran("rcmdcheck")
shell: Rscript {0}

- name: Session info
run: |
options(width = 100)
pkgs <- installed.packages()[, "Package"]
sessioninfo::session_info(pkgs, include_base = TRUE)
shell: Rscript {0}

- name: Check
env:
_R_CHECK_CRAN_INCOMING_: false
_R_CHECK_CRAN_INCOMING_REMOTE_: false
run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}

- name: Show testthat output
if: always()
run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash

- name: Cover
if: matrix.config.cov == 'true'
run: covr::codecov()
shell: Rscript {0}

- name: Upload check results
if: failure()
uses: actions/upload-artifact@main
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ GITHUB

* The default branch for the repository is now "main" (@zkamvar, #218)

BUG FIX
-------

* `genind2genalex()` no longer converts diploid sequence data to zeros on export
This fixes #231 (@zkamvar, #233).

poppr 2.8.7
===========

Expand Down
6 changes: 3 additions & 3 deletions R/file_handling.r
Original file line number Diff line number Diff line change
Expand Up @@ -634,17 +634,17 @@ genind2genalex <- function(gid, filename = "", overwrite = FALSE, quiet = FALSE,
the_gid <- as.character(pop(gid))
df <- genind2df(gid, sep = "/", usepop = FALSE)
if (any(ploid > 1)){
df <- generate_bruvo_mat(df, maxploid = max(ploid), sep = "/", mat = TRUE)
df <- generate_bruvo_mat(df, maxploid = max(ploid), sep = "/", mat_type = "character")
}
df[is.na(df)] <- 0

# making sure that the individual names are included.
if(all(indNames(gid) == "") | is.null(indNames(gid))){
indNames(gid) <- paste("ind", 1:nInd(gid), sep="")
indNames(gid) <- paste("ind", seq(nInd(gid)), sep="")
}
df <- cbind(indNames(gid), the_gid, df)
# setting the NA replacement. This doesn't work too well.
replacement <- ifelse(gid@type == "PA", "-1", "0")
replacement <- if(gid@type == "PA") "-1" else "0"
if(!quiet) cat("Writing the table to", filename, "... ")

if(geo == TRUE & !is.null(gid$other[[geodf]])){
Expand Down
98 changes: 57 additions & 41 deletions R/internal.r
Original file line number Diff line number Diff line change
Expand Up @@ -1614,52 +1614,55 @@ make_poppr_plot_title <- function(samp, file = NULL, N = NULL, pop = NULL){
return(plot_title)
}

#==============================================================================#
# fill a single genotype with zeroes if the number of alleles is maxploid.
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # fill_zero_locus
#==============================================================================#
fill_zero <- function(x, maxploid, mat = FALSE){
#' Pad a single locus genotype with zeroes according the maximum ploidy.
#'
#' @param x a vector of alleles for a single individual at a single locus
#' @param maxploid the maximum ploidy to pad
#' @param mat_type if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso used by: [fill_zero_locus()]
fill_zero <- function(x, maxploid, mat_type = character(0)){
if (length(x) < maxploid){
if (!mat){
# If the genotype is less than the max ploidy, fill it with a zero
if (length(mat_type)) {
fill <- as(0L, mat_type)
pad <- rep(fill, maxploid - length(x))
res <- c(pad, as(x, mat_type))
} else {
zeroes <- paste(rep(0, maxploid - length(x)), collapse = "/")
res <- paste(x, collapse = "/")
res <- paste(zeroes, res, sep = "/")
} else {
res <- c(rep(0.0, maxploid - length(x)), as.numeric(x))
}

} else {
if (!mat){
res <- paste(x, collapse = "/")
# If the genotype is the right format_type, either collapse it or return it
if (length(mat_type)){
res <- as(x, mat_type)
} else {
res <- as.numeric(x)
res <- paste(x, collapse = "/")
}
}
return(res)
}

#==============================================================================#
# Fill short genotypes in a character vector with zeroes.
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # generate_bruvo_mat
#==============================================================================#
fill_zero_locus <- function(x, sep = "/", maxploid, mat = FALSE){
#' Fill short genotypes in a character vector with zeroes
#'
#' @param x a character vector of genotypes at a single locus, separated by "/"
#' @param maxploid the maximum ploidy to pad
#' @param mat_type if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso uses: [fill_zero_locus()], used by: [create_bruvo_mat()]
fill_zero_locus <- function(x, sep = "/", maxploid, mat_type = character(0)){
x <- strsplit(x, sep)
if (mat){
result <- numeric(maxploid)
if (length(mat_type)) {
result <- vector(mode = mat_type, length = maxploid)
} else {
result <- character(1)
}
return(t(vapply(x, fill_zero, result, maxploid, mat)))
return(t(vapply(x, fill_zero, result, maxploid, mat_type)))
}

#==============================================================================#
Expand Down Expand Up @@ -1708,19 +1711,32 @@ fill_zero_locus <- function(x, sep = "/", maxploid, mat = FALSE){
# sample_10 0 0 41 31 0 17 30 57
#
#
# Public functions utilizing this function:
# # none
#
# Private functions utilizing this function:
# # none
#==============================================================================#
generate_bruvo_mat <- function(x, maxploid, sep = "/", mat = FALSE){
if (mat){
result <- matrix(numeric(nrow(x)*maxploid), ncol = maxploid, nrow = nrow(x))

#' Fill short genotypes in a data frame with zeroes
#'
#' @param x a data frame of character vectors representing genotypes with alleles separated by "/"
#' @param maxploid the maximum ploidy to pad
#' @param mat if the final output is to be a matrix with one column per
#' allele, what type of matrix should it be. Acceptable are: numeric and character.
#' Default is an empty character vector, indicating that alleles should be concatenated.
#' @noRd
#' @return a vector of length 1 (default) or of length maxploid.
#' @seealso uses: [fill_zero_locus()], used by: [genind2genalex()]
generate_bruvo_mat <- function(x, maxploid, sep = "/", mat_type = character(0)){
# --- 2021-01-30 ---
# mat has been renamed to mat_type and recast as a character vector. For
# details, see https://github.com/grunwaldlab/poppr/issues/108
# ------------------
# Create a template for vapply to fill in with the result.
if (length(mat_type)) {
# Each locus will be a matrix with one allele per column
fill <- vector(mode = mat_type, length = nrow(x) * maxploid)
result <- matrix(fill, ncol = maxploid, nrow = nrow(x))
} else {
# Each locus will be a character vector with all the alleles
result <- character(nrow(x))
}
res <- vapply(x, fill_zero_locus, result, sep, maxploid, mat)
res <- vapply(x, fill_zero_locus, result, sep, maxploid, mat_type)
if (length(dim(res)) > 2){
redim <- dim(res)
dim(res) <- c(redim[1], redim[2]*redim[3])
Expand All @@ -1731,7 +1747,7 @@ generate_bruvo_mat <- function(x, maxploid, sep = "/", mat = FALSE){
} else {
colnames(res) <- colnames(x)
}
if (!mat){
if (length(mat_type) == 0) {
res[grep("NA", res)] <- NA_character_
}
rownames(res) <- rownames(x)
Expand Down
2 changes: 1 addition & 1 deletion R/methods.r
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ setMethod(
replen <- match_replen_to_loci(locNames(gen), replen)
ploid <- max(ploidy(gen))
popdf <- genind2df(gen, sep = "/", usepop = FALSE)
mat <- generate_bruvo_mat(popdf, maxploid = ploid, sep = "/", mat = TRUE)
mat <- generate_bruvo_mat(popdf, maxploid = ploid, sep = "/", mat_type = "numeric")
mat[is.na(mat)] <- 0
slot(.Object, "mat") <- mat
slot(.Object, "replen") <- replen
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Poppr version 2 <img src="man/figures/small_logo.png" align="right" height="98"/>

<!-- badges: start -->
[![R build status](https://github.com/grunwaldlab/poppr/workflows/R-CMD-check/badge.svg)](https://github.com/grunwaldlab/poppr/actions)
[![R-CMD-check](https://github.com/grunwaldlab/poppr/workflows/R-CMD-check/badge.svg)](https://github.com/grunwaldlab/poppr/actions)
<!-- badges: end -->

## What is *poppr*?
Expand Down
41 changes: 41 additions & 0 deletions tests/testthat/test-import.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,47 @@ test_that("not specifying a file for genind2genalex will generate a tempfile", {
expect_is(read.genalex(f), "genclone")
})

test_that("genind2genalex() handles snp data appropriately", {
# context: https://github.com/grunwaldlab/poppr/issues/231
tmp <- tempfile(fileext = ".csv")
on.exit(unlink(tmp), add = TRUE)
x <- new("genind", tab = structure(c(NA, 2L, 2L, 2L, 2L, NA, 0L, 0L,
0L, 0L, NA, 2L, 2L, 2L, 2L, NA, 0L, 0L, 0L, 0L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 0L, 0L, 1L), .Dim = 5:6, .Dimnames = list(c("TT056001.trim",
"TT060001.trim", "TT062001.trim", "TT063001.trim", "TT064001.trim"
), c("loc87_pos30.A", "loc87_pos30.G", "loc106_pos31.G", "loc106_pos31.T",
"loc345_pos27.G", "loc345_pos27.T"))), loc.fac = structure(c(1L,
1L, 2L, 2L, 3L, 3L), .Label = c("loc87_pos30", "loc106_pos31",
"loc345_pos27"), class = "factor"), loc.n.all = c(loc87_pos30 = 2L,
loc106_pos31 = 2L, loc345_pos27 = 2L), all.names = list(loc87_pos30 = c("A",
"G"), loc106_pos31 = c("G", "T"), loc345_pos27 = c("G", "T")),
ploidy = c(2L, 2L, 2L, 2L, 2L), type = "codom", other = list(),
call = .local(x = x, i = i, j = j, loc = ..1, drop = drop),
pop = NULL, strata = NULL, hierarchy = NULL)
expect_output(genind2genalex(x, tmp), "Extracting the table ...")
y <- read.genalex(tmp)
expect_equal(genind2df(x, pop = FALSE), genind2df(y, pop = FALSE))
})

test_that("fill_zero() works with character and numeric data", {
char <- "A"
num <- "13"

# Default
expect_equal(fill_zero(char, 2), "0/A")
expect_equal(fill_zero(num, 2), "0/13")
expect_equal(fill_zero(char, 3, character(0)), "0/0/A")
expect_equal(fill_zero(num, 3, character(0)), "0/0/13")

# As character vector
expect_equal(fill_zero(char, 3, "character"), c("0", "0", "A"))
expect_equal(fill_zero(num, 3, "character"), c("0", "0", "13"))

# As numeric vector
expect_equal(expect_warning(fill_zero(char, 3, "numeric")), c(0, 0, NA_real_))
expect_equal(fill_zero(num, 3, "numeric"), c(0.0, 0.0, 13.0))
})

test_that("genind2genalex will prevent a file from being overwritten", {
skip_on_cran()
f <- tempfile()
Expand Down
4 changes: 0 additions & 4 deletions vignettes/mlg.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ output:
toc_depth: 2
fig_width: 5
fig_height: 5
vignette: >
%\VignetteIndexEntry{Multilocus Genotype Analysis}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, echo = FALSE, message = FALSE, warning = FALSE}
Expand Down
4 changes: 0 additions & 4 deletions vignettes/poppr_manual.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ output:
fig_height: 5
toc: true
toc_depth: 1
vignette: >
%\VignetteIndexEntry{Data Import and Manipulation}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, echo = FALSE, message = FALSE, warning = FALSE}
Expand Down

0 comments on commit 0ab416f

Please sign in to comment.