-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.jl
99 lines (82 loc) · 2.45 KB
/
main.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
using JSON
using Printf
using LightGraphs, SimpleWeightedGraphs
using ArgParse
using FASTX
using XLSX
using BioSequences
include("mut-sig-probs.jl")
include("translation-tables.jl")
using .MutSigProbs, .TranslationTables
function parse_command_line()
s = ArgParseSettings(description="Appends codons and mutational signature probabilities to excel sheets in columns N and O")
@add_arg_table! s begin
"--mut-prob"
help = "the path to the JSON file with three-mer mutational probabilities"
required = true
"--fasta"
help = "the path to the fasta file with the cDNA sequence"
required = true
"--identifier"
help = "the identifier of the sequence in the fasta to extract"
required = true
"--excel-file"
help = "that path to an excel file on which to append probabilities"
"--sheets"
help = "the sheet names within the excel file to append the codon and probability columns"
nargs = '+'
end
return parse_args(ARGS, s)
end
function parseJsonFile(f)
open(f, "r") do reader
JSON.parse(reader)
end
end
function readSequence(f, identifier)
open(FASTA.Reader, f) do reader
for record in reader
if FASTA.identifier(record) == identifier
return FASTA.sequence(record)
end
end
end
end
function toAa(s)
AminoAcid(AA3_1[titlecase(s)])
end
function appendColumnsToExcelFile(workbook, sheets, mutation_probs_dict, fivemer_dict)
XLSX.openxlsx(workbook, mode="rw") do xf
for name in sheets
sheet = xf[name]
for row in XLSX.eachrow(sheet)
rowNum = XLSX.row_number(row)
if rowNum == 1
sheet[@sprintf "N%d" rowNum] = "sig_prob"
sheet[@sprintf "O%s" rowNum] = "codon"
continue
end
resCell = XLSX.getcell(row, 1)
wtCell = XLSX.getcell(row, 2)
mutCell = XLSX.getcell(row, 4)
residue = XLSX.getdata(sheet, resCell)
wt = toAa(XLSX.getdata(sheet, wtCell))
mut = toAa(XLSX.getdata(sheet, mutCell))
sheet[@sprintf "N%d" rowNum] = mutation_probs_dict[(residue, wt, mut)]
sheet[@sprintf "O%s" rowNum] = convert(String, fivemer_dict[(residue, wt, mut)])
end
end
end
end
function main()
args = parse_command_line()
probs = parseJsonFile(args["mut-prob"])
sequence = readSequence(args["fasta"], args["identifier"])
init(probs)
mutation_prob_dict, fivemer_dict = calculateMutationalProbabilities(sequence)
if ! haskey(args, "excel-file") || ! haskey(args, "sheets")
exit()
end
appendColumnsToExcelFile(args["excel-file"], args["sheets"], mutation_prob_dict, fivemer_dict)
end
main()