From 3950d48b86338070be427a2a438be5489293049b Mon Sep 17 00:00:00 2001 From: Pavel Dimens Date: Wed, 13 Oct 2021 12:42:34 -0400 Subject: [PATCH] add PopDataInfo! constructor --- src/PopData.jl | 28 ++++++++++++++++++++++++---- test/popdata.jl | 5 +++-- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/PopData.jl b/src/PopData.jl index 19692f2..a6e8ce0 100755 --- a/src/PopData.jl +++ b/src/PopData.jl @@ -35,7 +35,7 @@ end # constructor FORMAT just the genodata dataframe function PopDataInfo(genodf::DataFrame) - sampleinfo = unique(genodf, :name) + sampleinfo = unique(dropmissing(genodf), :name) sampleinfo.ploidy = [ismissing(geno) ? Int8(0) : Int8(length(geno)) for geno in sampleinfo.genotype] select!(sampleinfo, :name => collect => :name, :population, :ploidy) ploidy = unique(sampleinfo.ploidy) @@ -109,6 +109,7 @@ end PopData(data::DataFrame) = PopData(PopDataInfo(data), data) +# method to update PopDataInfo from PopData, all in one swoop function PopDataInfo!(data::PopData) data.metadata.samples = length(data.genodata.name.pool) data.metadata.loci = length(data.genodata.locus.pool) @@ -126,6 +127,25 @@ function PopDataInfo!(data::PopData) return end +# method to update preexisting PopDataInfo with new genodata +# useful for getindex and creating new PopData from that +function PopDataInfo!(popdatainfo::PopDataInfo, genodata::DataFrame) + popdatainfo.samples = length(genodata.name.pool) + popdatainfo.loci = length(genodata.locus.pool) + popdatainfo.populations = length(genodata.population.pool) + filter!(:name => x -> x ∈ genodata.name.pool, popdatainfo.sampleinfo) + filter!(:locus => x -> x ∈ genodata.locus.pool, popdatainfo.locusinfo) + if "ploidy" ∈ names(popdatainfo.sampleinfo) + ploidy = unique(popdatainfo.sampleinfo.ploidy) + ploidy = length(ploidy) == 1 ? Int8(ploidy[1]) : Int8.(ploidy) + else + ploidy = Int8(0) + end + popdatainfo.ploidy = ploidy + popdatainfo.biallelic = popdatainfo.biallelic ? true : isbiallelic(genodata) + return popdatainfo +end + """ Genotype::DataType @@ -255,9 +275,9 @@ function Base.getindex(data::PopData, args...) 3 => (i -> PooledArray(i, compress = true)) => :locus, 4 ) - out = PopData(data.metadata, geno) - PopDataInfo!(out) - return out + pdinfo = deepcopy(data.info) + out = PopDataInfo!(pdinfo, geno) + PopData(out, geno) end # shortcut methods for convenience and less verbose typing diff --git a/test/popdata.jl b/test/popdata.jl index edb44ec..7ae13a9 100644 --- a/test/popdata.jl +++ b/test/popdata.jl @@ -1,11 +1,11 @@ module TestPopData using PopGenCore +using DataFrames using Test x = @nancycats - @testset "PopData" begin @testset "Struct types" begin @test x isa PopData @@ -30,7 +30,8 @@ x = @nancycats @testset "Indexing" begin @test x[x.genodata.locus .== "fca8", :] isa PopData @test x[x.genodata.name .== "N100", :] isa PopData - @test x[x.genodata.locus .∈ ["fca8", "fca37"], :] isa PopData + @test x[x.genodata.name .∈ Ref(["N100", "N217"]), :] isa PopData + @test x[x.genodata.locus .∈ Ref(["fca8", "fca37"]), :] isa PopData end end