Skip to content

Commit

Permalink
RECoVERY 3.3
Browse files Browse the repository at this point in the history
  • Loading branch information
aknijn committed Jul 3, 2021
1 parent c8cac89 commit 09604d7
Show file tree
Hide file tree
Showing 8 changed files with 1,649 additions and 20 deletions.
1,508 changes: 1,508 additions & 0 deletions Galaxy-Workflow-RECoVERY_3.3.ga

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions RECoVGISAID/zzz_gisaid_uploader.authtoken
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"CoV": "cid-da630018ba06m/EUEURPVKOYLHBNJMPLPGOIQQLTHINKRQGSTOFTHYEGNWRJGXKETAWDWPWCBARFVPCXMOCZYQOBIMSPAFLBJVJQORZQKLIUIXSZDSFHVTHIBVOFUMBVIRNJEAWJYBZPHA"}
7 changes: 6 additions & 1 deletion RECoVJ/VOCLineages
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
B.1.1.7+E484K
B.1.351
P.1
B.1.617
B.1.617.1
B.1.617.2
B.1.617.3
B.1.617.3
B.1.620
B.1.621
B.1.622
B.1.623
3 changes: 1 addition & 2 deletions RECoVL/recovl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
#import subprocess
#set $library = $subprocess.getoutput('cat ' + str($librarytype))
#if $library == "sang":
python
$__tool_directory__/RECoVL.py --variants $variants --strain $strain --lineage $lineage
pangolin --min-length 1000 $consensus --outfile $lineage
#else:
pangolin --min-length 10000 $consensus --outfile $lineage
#end if
Expand Down
4 changes: 2 additions & 2 deletions tools/ivar_covid_consensus.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="ivar_covid_consensus" name="ivar covid consensus" version="3.2+galaxy0">
<tool id="ivar_covid_consensus" name="ivar covid consensus" version="3.3+galaxy0">
<description>Call consensus from aligned BAM file</description>
<requirements>
<requirement type="package" version="1.2">ivar</requirement>
Expand Down Expand Up @@ -29,7 +29,7 @@
<param name="uploaded_fasta" type="data" format="fasta" label="Uploaded fasta file" />
</inputs>
<outputs>
<data name="consensus" format="fasta" label="${tool.name} on ${on_string} Consensus" from_work_dir="consensus.fa"/>
<data name="first_consensus" format="fasta" label="${tool.name} on ${on_string} first_consensus" from_work_dir="consensus.fa"/>
</outputs>
<citations>
<citation type="doi">10.1186/s13059-018-1618-7</citation>
Expand Down
41 changes: 26 additions & 15 deletions tools/remove_aa_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,20 @@
'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'}

def finding_errors(err):
def finding_errors(positions):
success='NOT_SUCCESS'
errors={'FRAME_SHIFT','CHROMOSOME_LARGE_DELETION','CODON_CHANGE','CODON_INSERTION','CODON_CHANGE_PLUS_CODON_INSERTION','CODON_DELETION','CODON_CHANGE_PLUS_CODON_DELETION','CODON_INSERTION','CODON_CHANGE_PLUS_CODON_INSERTION','STOP_GAINED', '?', '*', 'STOP_GAINED'}
for c in errors:
if err.find(c)!=-1:
success = 'SUCCESS'
degeneration={"R","D","M","N","S","K","W","H","B","V","Y","N"}
for error in errors:
for p in positions:
if p.find(error)!=-1:
success = 'SUCCESS'
return success
for deg in degeneration:
for p in positions:
if p==deg:
success = 'SUCCESS'
return success
return success

def getAABase(AA):
Expand All @@ -49,7 +57,7 @@ def __main__():
read_csv2=[]
read_csv2.append(['Gene', 'Position', 'Reference', 'Alternative', 'Mutation type', 'Codon change', 'Amino Acid Effect'])
for line in read_csv[1:]:
if len(line)<=8:
if len(line)<=8 and line[3]!='N':
del line[4]
read_csv2.append(line)

Expand All @@ -62,25 +70,26 @@ def __main__():
riga=''
non_trovato=0
if read_csv2[index][1]!=read_csv2[index+1][1]:
position = [read_csv2[index][3], read_csv2[index][4], read_csv2[index][6], read_csv2[index + 1][3], read_csv2[index + 1][4], read_csv2[index + 1][6]]
if finding_errors(read_csv2[index][4])=='NOT_SUCCESS' and finding_errors(read_csv2[index+1][4])=='NOT_SUCCESS':
if finding_errors(read_csv2[index][6])=='NOT_SUCCESS' and finding_errors(read_csv2[index+1][6])=='NOT_SUCCESS':
if finding_errors(position)=='NOT_SUCCESS':
if aa==getAABase(read_csv2[index+1][6]) and aa!='' and read_csv2[index][0]==read_csv2[index+1][0]:
codone.append(read_csv2[index])
codone.append(read_csv2[index+1])
index += 1
non_trovato+=1
if read_csv2[index][1]!=read_csv2[index+1][1]:
if finding_errors(read_csv2[index][4]) == 'NOT_SUCCESS' and finding_errors(read_csv2[index + 1][4]) == 'NOT_SUCCESS':
if finding_errors(read_csv2[index][6]) == 'NOT_SUCCESS' and finding_errors(read_csv2[index + 1][6]) == 'NOT_SUCCESS':
if aa==getAABase(read_csv2[index+1][6]) and aa!='' and read_csv2[index][0]==read_csv2[index+1][0]:
codone.append(read_csv2[index+1])
index += 1
non_trovato+=1
position = [read_csv2[index][3], read_csv2[index][4], read_csv2[index][6], read_csv2[index + 1][3], read_csv2[index + 1][4], read_csv2[index + 1][6]]
if finding_errors(position) == 'NOT_SUCCESS':
if aa==getAABase(read_csv2[index+1][6]) and aa!='' and read_csv2[index][0]==read_csv2[index+1][0]:
codone.append(read_csv2[index+1])
index += 1
non_trovato+=1
if non_trovato==0:
l=len(read_csv2[index][2])-1
if args.minmax == 'max':
out_file.write('\t'.join(read_csv2[index])+'\n')
elif read_csv2[index][4].find('FRAME_SHIFT')!=0:
elif read_csv2[index][4].find('FRAME_SHIFT')==-1:
out_file.write('\t'.join(read_csv2[index]) + '\n')
elif l%3==0 and l>=3:
out_file.write('\t'.join(read_csv2[index]) + '\n')
Expand Down Expand Up @@ -116,9 +125,10 @@ def __main__():
mutations=''
if gencode.get(codon[0:-4].upper())==gencode.get(codon[4:].upper()):
mutations+='SYNONYMOUS_CODING'
riga += cod1 + '\t' + cod2 + '\t' + mutations + '\t' + codon + '\t' + gencode.get(codon[0:-4].upper()) + str(aa) + '\n'
else:
mutations+='NON_SYNONYMOUS_CODING'
riga+=cod1+'\t'+cod2+'\t'+mutations+'\t'+codon+'\t'+gencode.get(codon[0:-4].upper())+aa+gencode.get(codon[4:].upper())+'\n'
riga += cod1 + '\t' + cod2 + '\t' + mutations + '\t' + codon + '\t' + gencode.get(codon[0:-4].upper()) + str(aa) + gencode.get(codon[4:].upper()) + '\n'
out_file.write(riga)
if non_trovato==2:
mutations=''
Expand All @@ -130,7 +140,8 @@ def __main__():
riga+=codone[0][0]+'\t'+codone[0][1]+'\t'+read_csv2[index][5][0:3].upper()+'\t'+newcodon+'\t'+mutations+'\t'+read_csv2[index][5][0:3].upper()+'/'+newcodon+'\t'+gencode.get(read_csv2[index][5][0:3].upper())+aa+gencode.get(newcodon)+'\n'
out_file.write(riga)
index+=1
out_file.write('\t'.join(read_csv2[len(read_csv2)-1])+'\n')
if read_csv2[len(read_csv2)-1][4].find('FRAME_SHIFT')==-1:
out_file.write('\t'.join(read_csv2[len(read_csv2)-1])+'\n')
else:
out_file.write('error: no variants detected\n')
out_file.close
Expand Down
76 changes: 76 additions & 0 deletions tools/remove_nucleotide_deletions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#remove deletion/insertion caused by homopolymers and NGS errors
from Bio import SeqIO
import subprocess
import csv
import sys
import argparse

def __main__():
parser = argparse.ArgumentParser()
parser.add_argument('--first_consensus', dest='first_consensus', help='first_consensus file')
parser.add_argument('--reference_fasta', dest='reference_fasta', help='reference_fasta file')
parser.add_argument('--minority_variants', dest='minority_variants', help='minority_variants file')
parser.add_argument('--majority_variants', dest='majority_variants', help='majority_variants file')
args = parser.parse_args()

subprocess.call("cat " + args.reference_fasta + " " + args.first_consensus + " > sequences.fasta", shell=True)
subprocess.call("mafft --quiet --auto sequences.fasta > all.fasta", shell=True)

records=list(SeqIO.parse("all.fasta", "fasta"))
reference=records[0].seq
sequence=records[1].seq
name_sequence=records[1].id

csv_max_file = open(args.majority_variants)
read_csv_max = list(csv.reader(csv_max_file, delimiter="\t"))
csv_min_file= open(args.minority_variants)
read_csv_min = list(csv.reader(csv_min_file, delimiter="\t"))

read_csv_minmax=[]
for line in read_csv_max[1:]:
if line[5].find("FRAME_SHIFT")!=-1:
read_csv_minmax.append(line)
for line in read_csv_min[1:]:
if line[5].find("FRAME_SHIFT")!=-1:
read_csv_minmax.append(line)

new_sequence=''
i=0
lunghezza=len(sequence)-1
while i<lunghezza:
if sequence[i-1]!='-' and sequence[i+1]!='-' and sequence[i]=='-':
position_tab=i
for line in read_csv_minmax:
if position_tab==int(line[1]) and read_csv_minmax[read_csv_minmax.index(line)][1]!=read_csv_minmax[read_csv_minmax.index(line)-1][1]:
nucleotide=line[2][1].lower()
new_sequence+=nucleotide
i+=1
else:
new_sequence+=sequence[i]
i+=1

i=0
lunghezza=len(reference)
to_remove=[]
while i<lunghezza-1:
if reference[i-1]!='-' and reference[i+1]!='-' and reference[i]=='-':
to_remove.append(i)
i+=1
else:
i+=1

if len(to_remove)>=1:
to_remove.sort(reverse=True)
for i in to_remove:
for line in read_csv_minmax:
if i == int(line[1]):
new_sequence=new_sequence[:i]+new_sequence[i+1:]

new_sequence=new_sequence.replace("-","")
fasta=open("consensus.fasta", "w")
fasta.write(">"+name_sequence+"\n")
fasta.write(new_sequence.upper())
fasta.close

if __name__ == "__main__":
__main__()
29 changes: 29 additions & 0 deletions tools/remove_nucleotide_deletions.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<tool id="remove_nucleotide_deletions" name="remove nucleotide deletions" version="3.3">
<description>Removes deletions/insertions caused by homopolymers and NGS errors</description>
<requirements>
<requirement type="package" version="1.79">biopython</requirement>
<requirement type="package" version="7.480">mafft</requirement>
<yield />
</requirements>
<command detect_errors="exit_code">
<![CDATA[
python $__tool_directory__/remove_nucleotide_deletions.py
--first_consensus $first_consensus
--reference_fasta $reference_fasta
--minority_variants $minority_variants
--majority_variants $majority_variants
]]>
</command>
<inputs>
<param name="first_consensus" type="data" format="fasta" label="first consensus file" />
<param name="reference_fasta" type="data" format="fasta" label="Reference fasta file" />
<param name="minority_variants" type="data" format="tabular" label="minority variants" />
<param name="majority_variants" type="data" format="tabular" label="majority variants" />
</inputs>
<outputs>
<data name="consensus" format="fasta" label="Final consensus" from_work_dir="consensus.fasta" />
</outputs>
</tool>



0 comments on commit 09604d7

Please sign in to comment.