diff --git a/.gitignore b/.gitignore index 23a8f09..6a01bce 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ Manifest.toml .ipynb_checkpoints */.ipynb_checkpoints/* +.vscode/* \ No newline at end of file diff --git a/Project.toml b/Project.toml index e37ae02..d672556 100644 --- a/Project.toml +++ b/Project.toml @@ -1,28 +1,24 @@ name = "FlxQTL" uuid = "ede81e08-c6d8-4fe3-94c2-f928d9d678ed" authors = ["Hyeonju Kim "] -version = "0.3.1" +version = "0.3.2" [deps] -Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" -PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Conda = "1.10" DelimitedFiles = "1.6" Distributed = "1" Distributions = "^0.23, 0.24, 0.25" LossFunctions = "~0.11" -PyPlot = "2.11" StaticArrays = "1.9" Statistics = "1" StatsBase = "0.33, 0.34" diff --git a/README.md b/README.md index e04664a..2109297 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,6 @@ [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://senresearch.github.io/FlxQTL.jl/stable) [![CI](https://github.com/senresearch/FlxQTL.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/senresearch/FlxQTL.jl/actions/workflows/ci.yml) [![codecov](https://codecov.io/gh/senresearch/FlxQTL.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/senresearch/FlxQTL.jl) - *FlxQTL.jl* is a package for a multivariate linear mixed model based QTL analysis tool that supports incorporating information from trait @@ -15,9 +14,9 @@ multivariate genome scans, visualization of genome scans, support for LOCO (leave-one-chromosome-out), computation of kinship matrices, and support for distributed computing. -![1D Genome Scan](image/ex1.png) +![1D Genome Scan](images/ex1.png) -![2D Genome Scan](image/ex2.jpg) +![2D Genome Scan](images/ex2.jpg) The package is written in [Julia](https://www.julialang.org) and includes extensive diff --git a/docs/src/guide/analysis.md b/docs/src/guide/analysis.md index 3eb6c5d..2838faa 100644 --- a/docs/src/guide/analysis.md +++ b/docs/src/guide/analysis.md @@ -13,6 +13,7 @@ Use any Julia package able to read data files (`.txt`, `.csv`, etc.). Julia's b Let's try using an example dataset in `FlxQTL`. It is plant data: Arabidopsis thaliana in the `data` folder. Detailed description on the data can be referred to `README` in the folder. + ```julia using DelimitedFiles @@ -21,13 +22,13 @@ pheno = readdlm("data/Arabidopsis_fitness.csv",',';skipstart=1); # skip to read geno = readdlm("data/Arabidopsis_genotypes.csv",',';skipstart=1); markerinfo = readdlm("data/Arabidopsis_markerinfo_1d.csv",',';skipstart=1); - ``` For efficient computation, the normalization of matrices is necessary. The phenotype matrix labelled as `pheno` here composes of wide range of values from 1.774 to 34.133, so that it is better to narow the range of values in [0,1], [-1,1], or any narrower interval for easy computation. Note that the dimension of a phenotype matrix should be `the number of traits x the number of individuals`. + ```julia using Statistics, StatsBase Y=convert(Array{Float64,2},pheno'); #convert from transposed one to a Float64 matrix @@ -39,28 +40,31 @@ Ystd=(Y.-mean(Y,dims=2))./std(Y,dims=2); # sitewise normalization In the genotype data, `1`, `2` indicate Italian, Swedish parents, respectively. You can rescale the genotypes for efficiency. + ```julia geno[geno.==1.0].=0.0;geno[geno.==2.0].=1.0; # or can do geno[geno.==1.0].=-1.0 for only genome scan - ``` + For genome scan, we need restructure the standardized genotype matrix combined with marker information. Note that the genome scan in `FlxQTL` is implemented by CPU parallelization, so we need to add workers (or processes) before the genome scan. Depending on the computer CPU, one can add as many processes as possible. If your computer has 16 cores, then you can add 15 or little more. Note that you need to type `@everywhere` followed by `using PackageName` for parallel computing. The dimension of a genotype (probability) matrix should be `the number of markers x the number of individuals`. + ```julia using Distributed addprocs(4) @everywhere using FlxQTL XX=Markers(markerinfo[:,1],markerinfo[:,2],markerinfo[:,3],geno') # marker names, chromosomes, marker positions, genotypes - ``` + - **Julia tip**: Whenever you reload a package, i.e. `using FlxQTL`, you should re-enter `XX=FlxQTL.Markers(markerinfo[:,1],markerinfo[:,2],markerinfo[:,3],geno')` to fresh the struct of array. If not, your genome scan throws an error. You should also do with another struct of array in a submodule `QTLplot`, `FlxQTL.layers`. Optionally, one can generate a trait covariate matrix (Z). The first column indicates overall mean between the two regions, and the second implies site difference: `-1` for Italy, and `1` for Sweden. -```@repl + +```julia Z=hcat(ones(6),vcat(-ones(3),ones(3))) m,q = size(Z) # check the dimension ``` @@ -75,20 +79,23 @@ Note that the shrinkage option is only used for `kinshipMan`, `kinship4way`. For the Arabidopsis genotype data, we will use a genetic relatedness matrix using manhattan distance measure, `kinshipMan` with a shrinkage with the LOCO option. + ```julia Kg = shrinkgLoco(kinshipMan,10,XX) ``` + For no LOCO option with shrinkage, + ```julia K = shrinkg(kinshipMan,10,XX.X) ``` - If you have climatic information on your trait data, you can compute the relatedness matrix using one of the above functions, but it is recommended using `kinshipGs`,`kinshipLin`,`kinshipCtr` after normalization. Since the climatic information is not available, we use an identity matrix. -```@repl + +```julia using LinearAlgebra Kc = Matrix(1.0I,6,6) # 3 years x 2 sites ``` @@ -99,6 +106,7 @@ Once all input matrices are ready, we need to proceed the eigen-decomposition to For a non-identity climatic relatedness, and a kinship with LOCO, you can do eigen-decomposition simultaneously. Since we use the identity climatic relatedness, you can use `Matrix(1.0I,6,6)` for a matrix of eigenvectors and `ones(6)` for a vector of eigenvalues. + ```julia Tg,Λg,Tc,λc = K2Eig(Kg,Kc,true); # the last argument: LOCO::Bool = false (default) @@ -107,26 +115,34 @@ Tg,λg = K2eig(Kg, true) # for eigen decomposition to one kinship with LOCO For eigen decomposition to one kinship with no LOCO option, + ```julia T,λ = K2eig(K) ``` + Now start with 1D genome scan with (or without) LOCO including `Z` or not. For the genome scan with LOCO including `Z`, + ```julia LODs,B,est0 = geneScan(1,Tg,Tc,Λg,λc,Ystd,XX,Z,true); # FlxQTL for including Z (trait covariates) or Z=I ``` + For the genome scan with LOCO excluding `Z`, i.e. an identity matrix, we have two options: a FlxQTL model and a conventional MLMM + + ```julia LODs,B,est0 = geneScan(1,Tg,Tc,Λg,λc,Ystd,XX,true); # FlxQTL for Z=I LODs,B,est0 = geneScan(1,Tg,Λg,Ystd,XX,true); # MLMM ``` + Note that the first argument in `geneScan` is `cross::Int64`, which indicates a type of genotype or genotype probability. For instance, if you use a genotype matrix whose entry is one of 0,1,2, type `1`. If you use genotype probability matrices, depending on the number of alleles or genotypes in a marker, one can type the corresponding number. i.e. `4-way cross: 4`, `HS DO mouse: 8 for alleles, 32 for genotypes`, etc. For no LOCO option, + ```julia LODs,B,est0 = geneScan(1,T,Tc,λ,λc,Ystd,XX,Z); @@ -134,36 +150,25 @@ LODs,B,est0 = geneScan(1,T,Tc,λ,λc,Ystd,XX); LODs,B,est0 = geneScan(1,T,λ,Ystd,XX); # MLMM ``` + The function `geneScan` has three arguments: `LOD scores (LODs)`, `effects matrix under H1 (B)`, and `parameter estimates under H0 (est0)`, which is an `Array{Any,1}`. If you want to see null parameter esitmate in chromosome 1 for LOCO option, type `est0[1].B`, `est0[1].loglik`, `est0[1].τ2`, `est0[1].Σ`. In particular, you can extract values from each matrix in `B` (3D array of matrices) to generate an effects plot. To print an effect size matrix for the third marker, type `B[:,:,3]`. - ## Generating plots -The `QTLplot` module is currently unavailable but will replaced with [BigRiverQTLPlots.jl](https://github.com/senresearch/BigRiverQTLPlots.jl) soon. - - +![arabidopsis](images/arab-lod.png) ## Performing a permutation test Since the statistical inference for `FlxQTL` relies on LOD scores and LOD scores, the function `permTest` finds thresholds for a type I error. The first argument is `nperm::Int64` to set the number of permutations for the test. For `Z = I`, type `Matrix(1.0I,6,6)` for the Arabidopsis thaliana data. In the keyword argument, `pval=[0.05 0.01]` is default to get thresholds of `type I error rates (α)`. Note that permutation test is implemented by no LOCO option. + ```julia -julia> maxLODs, H1par_perm, cutoff = permTest(1000,1,K,Kc,Ystd,XX,Z;pval=[0.05]) # cutoff at 5 % +maxLODs, H1par_perm, cutoff = permTest(1000,1,K,Kc,Ystd,XX,Z;pval=[0.05]) # cutoff at 5 % ``` diff --git a/docs/src/guide/arab-lod.png b/docs/src/guide/images/arab-lod.png similarity index 100% rename from docs/src/guide/arab-lod.png rename to docs/src/guide/images/arab-lod.png diff --git a/docs/src/guide/tutorial.md b/docs/src/guide/tutorial.md index ec6ec48..fda6c01 100644 --- a/docs/src/guide/tutorial.md +++ b/docs/src/guide/tutorial.md @@ -15,8 +15,6 @@ Or, equivalently, ```julia julia> using Pkg; Pkg.add("FlxQTL") ``` - - To remove the package from the Julia REPL, diff --git a/docs/src/index.md b/docs/src/index.md index 8c023a8..c78ac8f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -9,7 +9,6 @@ structured multivariate traits. - LOCO (Leave One Chromosome Out) support for genome scan - Computation for Genetic (or Climatic) Relatedness matrix (or kinship) - CPU parallelization - ## Guide diff --git a/image/ex1.png b/images/ex1.png similarity index 100% rename from image/ex1.png rename to images/ex1.png diff --git a/image/ex2.jpg b/images/ex2.jpg similarity index 100% rename from image/ex2.jpg rename to images/ex2.jpg diff --git a/src/FlxQTL.jl b/src/FlxQTL.jl index 2ffa87d..9240b8d 100644 --- a/src/FlxQTL.jl +++ b/src/FlxQTL.jl @@ -12,7 +12,6 @@ module FlxQTL include("MLM.jl") -# include("QTLplot.jl") include("Miscellanea.jl") include("GRM.jl") include("EcmNestrv.jl") @@ -25,9 +24,6 @@ using .GRM:kinshipMan,kinship4way,kinshipGs,kinshipLin,kinshipCtr,kinshipStd,shr export kinshipMan,kinship4way,kinshipGs,kinshipLin,kinshipCtr,kinshipStd export shrinkg,shrinkgLoco,kinshipLoco -# using .QTLplot:layers, plot1d, plot2d, subplot2d -# export layers, plot1d, plot2d, subplot2d - using .flxMLMM: geneScan,gene2Scan,envScan,permTest,K2eig, K2Eig, obtainKc,gene1Scan,updateKc #selectQTL export geneScan,gene2Scan,envScan,permTest,K2eig, K2Eig, obtainKc,gene1Scan,updateKc diff --git a/src/QTLplot.jl b/src/QTLplot.jl deleted file mode 100644 index 12fcd25..0000000 --- a/src/QTLplot.jl +++ /dev/null @@ -1,263 +0,0 @@ -""" - - QTLplot - - -A module for generating PyPlot-based 1D-, 2D-plots for LOD scores (or effects). - -""" -module QTLplot - -using PyPlot, LinearAlgebra -import Statistics:median - - -""" - - layers(chr::Array{Any,1},pos::Array{Float64,1},lod::Array{Float64,2}) - -Creates a struct of arrays for plotting LOD scores, (or main, interaction) effects - -# Argument - -- `chr` : A vector of strings or numbers indicating Chromosome names, ex. 1,2,3,... or 1K,1N,2K,2N,... -- `pos` : A vector of marker positions. -- `lod` : A matrix of LOD scores obtained from 1d- or 2d-genome scan. Can be a matrix of effects (main, or interaction). - -""" -struct layers - chr::Array{Any,1} - pos::Array{Float64,1} - lod::Array{Float64,2} -end - - -""" - - function plot1d(xx::layers;title= " ",title_font=25,ylabel="LOD",yint=[],yint_color=["red"],Legend=[],fontsize=20,loc="upper right") - - -Generates one or more graphs of LOD scores (or effects) obtained from 1d-genome scan on a single plot. - -# Arguments - -- `xx` : A type of [`layers`](@ref) that consists of chromosome names, marker positions, and a matrix of LODs(or effects). - -## Keyword arguements - -- `title` : A string of title. Default is blank. -- `title_font` : A string or number to set `title` fontsize (default= 25). i.e. "small", "medium", "large", or any integer. -- `ylabel` : A string of a y-axis label (default = LOD) -- `yint` : A vector of y-intercept(s). -- `yint_color` : A vector of colors (strings) of y-intercepts in yint. -- `Legend` : A vector of graph names in `layers`. -- `fontsize` : A string or number to set fontsizes of `Legend`, `xlabel` and `ylabel` (default= 20). - i.e. "small", "medium", "large", or any integer. -- `loc` : A string of Legend's position. Default is "upper right". - - -""" -function plot1d(xx::layers;title= " ",title_font=25,ylabel="LOD",yint=[], - yint_color=["red"],sub_dim=111,Legend=[],fontsize=20, - loc="upper right") - - -Chr=unique(xx.chr); nchr=length(Chr);np=size(xx.lod,2) - - -# #sanity check -# for j = axes(xx.lod) -# if (sum(xx.lod[:,j].<0.0) -# println("Error! LOD or -log₁₀P can't be negative.") -# else -# idx = findall(xx.lod[:,j].==-0.0) -# xx.lod[idx,j].=0.0 -# end -# end - #generating a set of line segments - -idx=findall(xx.chr.==Chr[1]) -line_seg=Any[collect(zip(idx,xx.lod[idx,j])) for j=1:size(xx.lod,2)] -#generating major ticks (Mticks), minor ticks(mticks) - Mticks=[0;idx[end]]; mticks=[median(idx)] - -for i=2:nchr - idx=findall(xx.chr.==Chr[i]) - append!(line_seg,[collect(zip(idx,xx.lod[idx,j])) for j=1:size(xx.lod,2)]) - Mticks=[Mticks; idx[end]] - mticks=[mticks; median(idx)] -end - -#Mticks=collect(Iterators.take(Mticks,nchr-1)) -############ -## Plot ## -############ -fig=figure(figsize=[20,10]) -ax=subplot(sub_dim) # creates a subplot with just one graphic - #major labels -Mnames=["" for j=1:nchr]; -ax.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(Mticks)) # Set interval of major ticks -ax.xaxis.set_major_formatter(matplotlib.ticker.FixedFormatter(Mnames)) - -ax.xaxis.set_minor_locator(matplotlib.ticker.FixedLocator(mticks)) # Set interval of major ticks -ax.xaxis.set_minor_formatter(matplotlib.ticker.FixedFormatter(Chr)) - -ax.xaxis.set_tick_params(which="major",length=10,width=2,labelsize=20) -ax.xaxis.set_tick_params(which="minor",length=5,width=2, labelsize=20) - - - -## colors -#c = Vector{Int}[[0,1,0],[0,1,0],[0,1,0],[1, 1, 0]] -c= repeat([rand(3) for j=1:np],nchr) -# Assemble everything into a LineCollection -line_segments = matplotlib.collections.LineCollection(line_seg, colors=c) - -# mx = matplotlib.ticker.MultipleLocator(1) # Define interval of minor ticks -# ax.xaxis.set_minor_locator(mx) # Set interval of minor ticks - -My = matplotlib.ticker.MultipleLocator(5) # Define interval of major ticks -ax.yaxis.set_major_locator(My) # Set interval of major ticks - -my = matplotlib.ticker.MultipleLocator(1) # Define interval of minor ticks -ax.yaxis.set_minor_locator(my) # Set interval of minor ticks - -#horizontal line -#yint=[6.3869; 7.7035] - if (length(yint)!=0) - if(length(yint_color)>1) - for j=1:length(yint) - ax.axhline(y=yint[j],linewidth=0.8,linestyle="dashed",color=yint_color[j]) - end - else - for j=1:length(yint) - ax.axhline(y=yint[j],linewidth=0.8,linestyle="dashed",color=yint_color[1]) - end - end - end -# #legend - if (length(Legend)!=0) - lbox=[]; - #plegend=["Scan 1"] - for l=1:np - lbox0= matplotlib.lines.Line2D([], [], color=c[l],label=Legend[l]) - lbox=vcat(lbox,lbox0) - end - legend(handles=lbox, loc=loc, fontsize=fontsize) - end - -ax.add_collection(line_segments) -ax.axis("image") -ax.axis("tight") -# ax.set(xlabel="Chromosome", ylabel=ylabel, title=title) -# ax.set(xlabel="Chromosome", ylabel=ylabel) -ax.set_xlabel("Chromosome", fontsize=fontsize) -ax.set_ylabel(ylabel, fontsize=fontsize) -ax.set_title(label=title,fontsize=title_font, loc="center") # - -setp(ax.get_yticklabels(), fontsize=15) # Y Axis font formatting - - -ax.grid("on") -# gcf(); -PyPlot.display_figs() -end - -## 2d-plot -##plotting 2D-scan PyPlot -""" - - plot2d(S::layers;title_font=20,fontsize=15) - -Generates 2-d heatmap plots of LOD scores from 2d-genome scan - -# Argument - -- `S`: a type of 'layers' that consists of chromosome names, marker positions, and a matrix of LODs - -## Keyword arguements - -- `title_font` : A string or number to set `title` fontsize (default= 20). i.e. "small", "medium", "large", or any integer. -- `fontsize` : A string or number to set fontsizes of x-, y-ticks, and colorbar (default= 15). - i.e. "small", "medium", "large", or any integer. - -""" -function plot2d(S::layers;title_font=20,fontsize=15) -Chr=unique(S.chr); -for i=1:length(Chr) - cidx=findall(S.chr.==Chr[i]) - Chrom=Array(Symmetric(S.lod[cidx,cidx],:L)) - x,y=S.pos[cidx],S.pos[cidx] - figure();imshow(Chrom,cmap="jet",interpolation="bicubic",extent=[minimum(x),maximum(x),maximum(y),minimum(y)], - vmin=0.0,vmax=maximum(S.lod)); - -# eval(Meta.parse(string("""title(string("Chromsome ", """,Chr[i],"""),fontsize=20) """))) - title(string("Chromosome ",Chr[i]),fontsize=title_font) - xticks(fontsize=fontsize);yticks(fontsize=fontsize) - cbar=colorbar() - cbar.ax.tick_params(labelsize=fontsize) - end -end - - -#sub_dim is a three digit integer, -#where the first digit is the number of rows, the second the number of columns, and the third the index of the subplot. -""" - - subplot2d(S::layers,sub_dim::Int64;title_font=20,fontsize=15) - - -Generates a matrix of 2-d heatmap subplots for LOD scores obtained from 2d-genome scan - -# Arguments - -- `sub_dim`: A two digit integer, where the first digit (m) is the number of rows, the second (n) the number of columns. - It returns a m x n matrix of subplots. - -## Keyword Arguments - -- `title_font` : A string or number to set `title` fontsize (default= 20). i.e. "small", "medium", "large", or any integer. -- `fontsize` : A string or number to set fontsizes of x-, y-ticks, and colorbar (default= 15). - i.e. "small", "medium", "large", or any integer. -""" -function subplot2d(S::layers,sub_dim::Int64;title_font=20,fontsize=15) -Chr=unique(S.chr); - d=digits(sub_dim,base=10) - inner_num=d[1]*d[2] - outer_num=Int(ceil(length(Chr)/inner_num)) - for j=1:outer_num - figure(); - for i=1:inner_num - cidx=findall(S.chr.==Chr[i+inner_num*(j-1)]) - Chrom=Array(Symmetric(S.lod[cidx,cidx],:L)) - x,y=S.pos[cidx],S.pos[cidx] - position=sub_dim*10+i - #position=parse(Int,string(nrow,ncol))*10+i - # position=320+i - subplot(position) - imshow(Chrom,cmap="jet",interpolation="bicubic",extent=[minimum(x),maximum(x),maximum(y),minimum(y)],vmin=0.0,vmax=maximum(S.lod)); -# eval(Meta.parse(string("""title(string("Chromsome ", """,Chr[i+inner_num*(j-1)],""")) """))) - title(string("Chromosome ",Chr[i+inner_num*(j-1)]),fontsize=title_font) - xticks(fontsize=fontsize);yticks(fontsize=fontsize) - cbar=colorbar() - cbar.ax.tick_params(labelsize=fontsize) - end - end -end - - - -# for j=1:3 -# figure() -# for i=1:6 -# Chrom=Array(Symmetric([find(markers.chr.==Chr[i+6*(j-1)]),find(markers.chr.==Chr[i+6*(j-1)])],:L)) -# position=230+i -# subplot(position) -# imshow(Chrom,cmap="jet",interpolation="bicubic",vmin=0.0,vmax=maximum(lod2_h)); -# eval(parse(string(""" title("HEIGHT """, Chr[i+6*(j-1)],""" "); colorbar()"""))) -# end -# end - - -# export layers, plot1d, plot2d, subplot2d -end \ No newline at end of file