-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
1,649 additions
and
20 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"CoV": "cid-da630018ba06m/EUEURPVKOYLHBNJMPLPGOIQQLTHINKRQGSTOFTHYEGNWRJGXKETAWDWPWCBARFVPCXMOCZYQOBIMSPAFLBJVJQORZQKLIUIXSZDSFHVTHIBVOFUMBVIRNJEAWJYBZPHA"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,11 @@ | ||
B.1.1.7+E484K | ||
B.1.351 | ||
P.1 | ||
B.1.617 | ||
B.1.617.1 | ||
B.1.617.2 | ||
B.1.617.3 | ||
B.1.617.3 | ||
B.1.620 | ||
B.1.621 | ||
B.1.622 | ||
B.1.623 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#remove deletion/insertion caused by homopolymers and NGS errors | ||
from Bio import SeqIO | ||
import subprocess | ||
import csv | ||
import sys | ||
import argparse | ||
|
||
def __main__(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--first_consensus', dest='first_consensus', help='first_consensus file') | ||
parser.add_argument('--reference_fasta', dest='reference_fasta', help='reference_fasta file') | ||
parser.add_argument('--minority_variants', dest='minority_variants', help='minority_variants file') | ||
parser.add_argument('--majority_variants', dest='majority_variants', help='majority_variants file') | ||
args = parser.parse_args() | ||
|
||
subprocess.call("cat " + args.reference_fasta + " " + args.first_consensus + " > sequences.fasta", shell=True) | ||
subprocess.call("mafft --quiet --auto sequences.fasta > all.fasta", shell=True) | ||
|
||
records=list(SeqIO.parse("all.fasta", "fasta")) | ||
reference=records[0].seq | ||
sequence=records[1].seq | ||
name_sequence=records[1].id | ||
|
||
csv_max_file = open(args.majority_variants) | ||
read_csv_max = list(csv.reader(csv_max_file, delimiter="\t")) | ||
csv_min_file= open(args.minority_variants) | ||
read_csv_min = list(csv.reader(csv_min_file, delimiter="\t")) | ||
|
||
read_csv_minmax=[] | ||
for line in read_csv_max[1:]: | ||
if line[5].find("FRAME_SHIFT")!=-1: | ||
read_csv_minmax.append(line) | ||
for line in read_csv_min[1:]: | ||
if line[5].find("FRAME_SHIFT")!=-1: | ||
read_csv_minmax.append(line) | ||
|
||
new_sequence='' | ||
i=0 | ||
lunghezza=len(sequence)-1 | ||
while i<lunghezza: | ||
if sequence[i-1]!='-' and sequence[i+1]!='-' and sequence[i]=='-': | ||
position_tab=i | ||
for line in read_csv_minmax: | ||
if position_tab==int(line[1]) and read_csv_minmax[read_csv_minmax.index(line)][1]!=read_csv_minmax[read_csv_minmax.index(line)-1][1]: | ||
nucleotide=line[2][1].lower() | ||
new_sequence+=nucleotide | ||
i+=1 | ||
else: | ||
new_sequence+=sequence[i] | ||
i+=1 | ||
|
||
i=0 | ||
lunghezza=len(reference) | ||
to_remove=[] | ||
while i<lunghezza-1: | ||
if reference[i-1]!='-' and reference[i+1]!='-' and reference[i]=='-': | ||
to_remove.append(i) | ||
i+=1 | ||
else: | ||
i+=1 | ||
|
||
if len(to_remove)>=1: | ||
to_remove.sort(reverse=True) | ||
for i in to_remove: | ||
for line in read_csv_minmax: | ||
if i == int(line[1]): | ||
new_sequence=new_sequence[:i]+new_sequence[i+1:] | ||
|
||
new_sequence=new_sequence.replace("-","") | ||
fasta=open("consensus.fasta", "w") | ||
fasta.write(">"+name_sequence+"\n") | ||
fasta.write(new_sequence.upper()) | ||
fasta.close | ||
|
||
if __name__ == "__main__": | ||
__main__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<tool id="remove_nucleotide_deletions" name="remove nucleotide deletions" version="3.3"> | ||
<description>Removes deletions/insertions caused by homopolymers and NGS errors</description> | ||
<requirements> | ||
<requirement type="package" version="1.79">biopython</requirement> | ||
<requirement type="package" version="7.480">mafft</requirement> | ||
<yield /> | ||
</requirements> | ||
<command detect_errors="exit_code"> | ||
<![CDATA[ | ||
python $__tool_directory__/remove_nucleotide_deletions.py | ||
--first_consensus $first_consensus | ||
--reference_fasta $reference_fasta | ||
--minority_variants $minority_variants | ||
--majority_variants $majority_variants | ||
]]> | ||
</command> | ||
<inputs> | ||
<param name="first_consensus" type="data" format="fasta" label="first consensus file" /> | ||
<param name="reference_fasta" type="data" format="fasta" label="Reference fasta file" /> | ||
<param name="minority_variants" type="data" format="tabular" label="minority variants" /> | ||
<param name="majority_variants" type="data" format="tabular" label="majority variants" /> | ||
</inputs> | ||
<outputs> | ||
<data name="consensus" format="fasta" label="Final consensus" from_work_dir="consensus.fasta" /> | ||
</outputs> | ||
</tool> | ||
|
||
|
||
|