-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
108 lines (96 loc) · 3.19 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from code.variants.MFA2CSV import MFA2CSV
### ### ### ### ### ### ### ### ### ### ### ####
# virus reference + target genomes -> variants #
### ### ### ### ### ### ### ### ### ### ### ####
###
# install virulign
###
###
# run the provided bash for your specified (target) sequences
###
###
# convert MFA --> variants tabular file per target
###
### ###
### SARS-Cov2 ###
### ###
# ===========#
# Test #
# ===========#
# use as test orf the ORF7b for SARS-cov2 from virulign and create 3 sequences with the following variants:
# >Ref
# CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAA
# >Seq1
# ------------ATTGACACTGTACCAGTAACATTAAAGCCAGGAATGGATGGACCAAAG
# >Seq2
# CCTATG---------GAAACTGTGCCAGTAAAATTAAAGCCAGGAATGGAT--------- (not displayed)
# >Seq3
# CTCATTAGTCCTATTAGT---------GTAAAATTAAAACCAGGAATGGATGGCCCAAGG (not displayed)
# >Seq4
# ------AGTCCCATTGAAACTGTACCAGTAAAA---------GGA---GATGGCCCAAAG (not displayed)
"""
data_path = "/home/damian/Documents/L3S/projects/sars_cov2/data"
alignments_folder = "test_alignments"
xmls_folder = "test_orf_xml"
#ORF input:
mfa_file = "test_mfa_nt.fasta"
orf_xml_file = "test_orf.xml"
ncbi_ref_id = "test_ref"
#output
out_path = "/home/damian/Documents/L3S/projects/sars_cov2/test_variants"
"""
"""
#======================================================#
# Example for S (Spike) protein and 2 target sequences #
#======================================================#
# inputs:
data_path = "/home/damian/Documents/L3S/projects/sars_cov2/data"
alignments_folder = "alignments"
xmls_folder = "xmls"
# ORF input:
mfa_file = "S_mfa_nt.fasta"
orf_xml_file = "S.xml"
ncbi_ref_id = "NC_045512.2"
# output:
out_path = "/home/damian/Documents/L3S/projects/sars_cov2/variants"
print("==== ====")
MFA2CSV_S_protein_test = MFA2CSV(data_path, alignments_folder, xmls_folder, ncbi_ref_id, out_path)
MFA2CSV_S_protein_test.run(orf_xml_file, mfa_file)
print("=== * ===")
print("== *** ==")
"""
"""
# =================================== #
# Convert MFA to variants csv file #
# for all ORFS of 10 target sequences #
# =================================== #
# input #
data_path = "/home/damian/Documents/L3S/projects/sars_cov2/data"
alignments_folder = "alignments"
xmls_folder = "xmls"
ncbi_ref_id = "NC_045512.2"
# output #
out_path = "/home/damian/Documents/L3S/projects/sars_cov2/variants"
print("==== ====")
MFA2CSV_all_orf_10targets = MFA2CSV(data_path, alignments_folder, xmls_folder, ncbi_ref_id, out_path)
MFA2CSV_all_orf_10targets.run_multiple_orfs()
print("=== * ===")
print("== *** ==")
"""
# =================================== #
# Convert MFA to variants csv file #
# for all ORFS of NCBI sequences #
# =================================== #
# input #
data_path = "/home/damian/Documents/L3S/projects/sars_cov2/data"
alignments_folder = "ncbi_alignments"
xmls_folder = "xmls"
ncbi_ref_id = "NC_045512.2"
is_amino_acid = True
# output #
out_path = "/home/damian/Documents/L3S/projects/sars_cov2/ncbi_variants"
print("==== ====")
MFA2CSV_all_orf_ncbi_target_aa = MFA2CSV(data_path, alignments_folder, xmls_folder, ncbi_ref_id, out_path)
MFA2CSV_all_orf_ncbi_target_aa.run_multiple_orfs(is_amino_acid)
print("=== * ===")
print("== *** ==")