forked from Blue-Lab/assoc_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pcrelate.R
executable file
·61 lines (52 loc) · 2.2 KB
/
pcrelate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#! /usr/bin/env Rscript
library(argparser)
library(magrittr)
argp <- arg_parser("Run PC-Relate") %>%
add_argument("gds_file", help = "GDS file") %>%
add_argument("pcair_file", help = "File with pcair object") %>%
add_argument("--out_prefix", help = "Prefix for output files",
default = "") %>%
add_argument("--n_pcs", default = 10,
"Number of PCs to pass to pcrelate") %>%
add_argument("--variant_id", help = "File with vector of variant IDs") %>%
add_argument("--sample_id", help = "File with vector of sample IDs") %>%
add_argument("--scale_kin", help = "Scaling factor for GRM output",
default = 1) %>%
add_argument("--sparse_thresh", default = 0, "Threshold for sparsifying GRM (will be multiplied by scale_kin)") %>%
add_argument("--sample_block_size", help = "pcrelate sample block size",
default = 5000) %>%
add_argument("--small_samp_correct",
"Flag to implement small-sample correction",
flag = TRUE) %>%
add_argument("--variant_block", help = "SeqVarBlaockIterator block size",
default = 1024)
argv <- parse_args(argp)
sessionInfo()
print(argv)
library(SeqArray)
library(SeqVarTools)
library(GENESIS)
if (!is.na(argv$variant_id)) {
variant_id <- readRDS(argv$variant_id)
} else {
variant_id <- NULL
}
if (!is.na(argv$sample_id)) {
sample_id <- readRDS(argv$sample_id)
} else {
sample_id <- NULL
}
gds <- seqOpen(argv$gds_file)
mypcair <- readRDS(argv$pcair_file)
seqSetFilter(gds, variant.id = variant_id, sample.id = sample_id)
seqData <- SeqVarData(gds)
iterator <- SeqVarBlockIterator(seqData, verbose=FALSE,
variantBlock = argv$variant_block)
mypcrel <- pcrelate(iterator, pcs = mypcair$vectors[, seq(argv$n_pcs)],
training.set = mypcair$unrels, sample.include = sample_id,
sample.block.size = argv$sample_block_size,
small.samp.correct = argv$small_samp_correct)
saveRDS(mypcrel, paste0(argv$out_prefix, "pcrelate.rds"))
pcr_mat <- pcrelateToMatrix(mypcrel, thresh = argv$sparse_thresh,
scaleKin = argv$scale_kin)
saveRDS(pcr_mat, paste0(argv$out_prefix, "pcr_mat.rds"))