-
Notifications
You must be signed in to change notification settings - Fork 0
/
makeLAD.py
86 lines (63 loc) · 2.31 KB
/
makeLAD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import argparse
import logging
import sys
import numpy as np
import pandas as pd
logging.basicConfig(level=logging.INFO)
def parse_args(argv):
parser = argparse.ArgumentParser(
description="Compute local ancestry linkage disequilibrium matrix"
)
parser.add_argument("--cm-map", help="recombination map")
parser.add_argument("--extract", help="list of coordinates to include")
parser.add_argument(
"--generations", help="number of generations since admixture", type=float
)
parser.add_argument("--parama", help="parama", type=float, default=0)
parser.add_argument("--paramb", help="paramb", type=float, default=1)
parser.add_argument("--out", help="output prefix")
return parser.parse_args(argv)
def make_LADmat(args):
cm_map = pd.read_csv(args.cm_map, sep="\t", header=None)
# generate interpolated map
extract_coord_sorted = np.sort(np.loadtxt(args.extract))
extract_cm = np.interp(extract_coord_sorted, cm_map.iloc[:, 1], cm_map.iloc[:, 2])
interp_map = pd.DataFrame(
{
0: np.repeat(cm_map.iloc[0, 0], extract_coord_sorted.shape[0]),
1: extract_coord_sorted,
2: extract_cm,
}
)
# compute LADmat
lambda_mat = np.abs(extract_cm[:, None] - extract_cm[None, :])
LADmat = args.parama + args.paramb * np.exp(-0.01 * args.generations * lambda_mat)
# Normalize the diagonal to 1.
# D = np.diag(LADmat)
# LADmat_ = np.diag(D**-0.5) @ LADmat @ np.diag(D**-0.5)
LADmat_ = LADmat
return interp_map, LADmat_
def main(argv):
args = parse_args(argv)
logging.info(
f"""
==============Start==============
Read genetic map: {args.cm_map}
Read coordinates of target markers: {args.extract}
Assumed number of generations since admixture: {args.generations}
Interpolated genetic map will be output to : {args.out}.map
NumPy matrix of LAD matrix be output to : {args.out}.npy
"""
)
logging.info("Interp")
interp_map, LADmat = make_LADmat(args)
logging.info("Writing Output files")
np.savetxt(f"{args.out}.map", interp_map, fmt=("%d", "%d", "%f"), delimiter="\t")
np.save(f"{args.out}.npy", LADmat)
logging.info(
"""
===============END================
"""
)
if __name__ == "__main__":
main(sys.argv[1:])