Skip to content

Commit

Permalink
Allow for consensus files as input
Browse files Browse the repository at this point in the history
  • Loading branch information
aknijn committed Apr 12, 2021
1 parent df77f5e commit 1e2cc15
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 53 deletions.
13 changes: 6 additions & 7 deletions RECoVG/RECoVG.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,15 @@ def __main__():
# Sanger
elif args.library=='sang':
# ALIGN SARS-COV-2 GENOME
if os.stat(args.input1).st_size < 5120:
# SPIKE
subprocess.call("bowtie2 -p ${GALAXY_SLOTS:-4} -x '" + TOOL_DIR + "/data/genome_Spike' -f -U '" + args.input1 + "' --very-sensitive | samtools sort -@${GALAXY_SLOTS:-2} -O bam -o " + args.covidref_aligned, shell=True)
else:
# CONSENSUS
subprocess.call("minimap2 -t ${GALAXY_SLOTS:-4} " + TOOL_DIR + "/data/genome.fa '" + args.input1 + "' -a | samtools sort -@${GALAXY_SLOTS:-2} -O bam -o " + args.covidref_aligned, shell=True)
subprocess.call("bowtie2 -p ${GALAXY_SLOTS:-4} -x '" + TOOL_DIR + "/data/genome_Spike' -f -U '" + args.input1 + "' --very-sensitive | samtools sort -@${GALAXY_SLOTS:-2} -O bam -o " + args.covidref_aligned, shell=True)
# Consensus
elif args.library=='cons':
# ALIGN SARS-COV-2 GENOME
subprocess.call("minimap2 -t ${GALAXY_SLOTS:-4} " + TOOL_DIR + "/data/genome.fa '" + args.input1 + "' -a | samtools sort -@${GALAXY_SLOTS:-2} -O bam -o " + args.covidref_aligned, shell=True)


# COPY CORRESPONDING REFERENCE
if args.library=='sang' and os.stat(args.input1).st_size < 5120:
if args.library=='sang':
shutil.copy(TOOL_DIR + "/data/CovidREF_Spike.gbk", args.reference_genbank)
shutil.copy(TOOL_DIR + "/data/genome_Spike.fa", args.reference_fasta)
else:
Expand Down
4 changes: 3 additions & 1 deletion RECoVG/data/CovidREF.gbk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ LOCUS NC_045512 29903 bp ss-RNA linear VRL 13-MAR-2020
DEFINITION Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1,
complete genome.
ACCESSION NC_045512
VERSION NC_045512.2
VERSION NC_045512
DBLINK BioProject: PRJNA485481
KEYWORDS RefSeq.
SOURCE Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)
Expand Down Expand Up @@ -205,6 +205,8 @@ FEATURES Location/Qualifiers
/gene="orf1ab nsp12"
CDS join(13442..13468,13468..16236)
/gene="orf1ab nsp12"
/ribosomal_slippage
/codon_start=1
/product="orf1ab nsp12"
/translation="SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAG
FAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFF
Expand Down
20 changes: 10 additions & 10 deletions RECoVG/data/ProteineNt_Covid19.fasta

Large diffs are not rendered by default.

16 changes: 11 additions & 5 deletions RECoVG/recovg.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@
</stdio>
<command>
<![CDATA[
#import os
#if $token == "D85SteptkQZ9MobOk9pa":
#if str( $input_pc.forward.name ) == "dummy.fastq" or str( $input_pc.reverse.name ) == "dummy.fastq":
#if str( $input_pc.forward.name ).endswith(".fasta") or str( $input_pc.reverse.name ).endswith(".fasta"):
#set $library = "sang"
#if os.stat( str($input_pc.forward) ).st_size < 5120 and os.stat( str($input_pc.reverse) ).st_size < 5120:
#set $library = "sang"
#else:
#set $library = "cons"
#end if
#else:
#if str( $input_pc.forward.name ).endswith(".fast5") or str( $input_pc.reverse.name ).endswith(".fast5"):
#set $library = "nano"
Expand All @@ -28,10 +33,10 @@
echo $library > $librarytype &&
echo $strain > $strainname &&
touch $uploaded_fasta &&
#if str( $input_pc.forward.name ) == "dummy.fastq" and $library == "sang":
#if str( $input_pc.forward.name ) == "dummy.fastq" and str( $input_pc.reverse.name ).endswith(".fasta"):
cp '${input_pc.reverse}' $uploaded_fasta &&
#else:
#if str( $input_pc.reverse.name ) == "dummy.fastq" and $library == "sang":
#if str( $input_pc.reverse.name ) == "dummy.fastq" and str( $input_pc.forward.name ).endswith(".fasta"):
cp '${input_pc.forward}' $uploaded_fasta &&
#end if
#end if
Expand All @@ -52,10 +57,11 @@
<param name="token" type="text" label="execution token" />
<param name="strain" type="text" label="Set output FASTA ID with Strain name" />
<param name="library" type="select" label="What kind of sequences are you using?">
<option value="iont">Ion torrent</option>
<option value="iont">Ion Torrent</option>
<option value="illu">Illumina</option>
<option value="nano">Nanopore</option>
<option value="nano">Sanger</option>
<option value="sang">Sanger</option>
<option value="cons">Consensus</option>
</param>
<param name="input_pc" type="data_collection" format="fastqsanger" collection_type="paired" label="Paired-end FASTQ collection" help="Must be of datatype &quot;fastqsanger&quot;" optional="false" />
</inputs>
Expand Down
13 changes: 12 additions & 1 deletion RECoVJ/RECoVJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ def main():
report_data["information_name"] = args.strain
report_data["region"] = args.region
report_data["year"] = args.year
library = open(args.librarytype).readline().rstrip()
if library == 'iont':
report_data["sequence"] = "Ion Torrent"
elif library == 'illu':
report_data["sequence"] = "Illumina"
elif library == 'nano':
report_data["sequence"] = "Nanopore"
elif library == 'sang':
report_data["sequence"] = "Sanger"
elif library == 'cons':
report_data["sequence"] = "Consensus"
with open(args.lineage) as table_in:
tab_lineage = [[str(col).rstrip() for col in row.split(',')] for row in table_in]
report_data["lineage"] = tab_lineage[1][1] + " (" + tab_lineage[1][2] + ")"
Expand All @@ -136,7 +147,7 @@ def main():
report_data["qc_status"] = 'Passed'
with open(args.variants) as table_in:
tab_variants = [[str(col).rstrip() for col in row.split('\t')] for row in table_in]
if open(args.librarytype).readline().rstrip() == 'sang':
if library == 'sang':
strDefault = "ND"
else:
strDefault = "="
Expand Down
4 changes: 2 additions & 2 deletions tools/ivar_covid_consensus.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#set $idname = $subprocess.getoutput('cat ' + str($strainname))
#set $library = $subprocess.getoutput('cat ' + str($librarytype))
#set $clean_name = re.sub('[^\w\-]', '_', str($idname))
#if str( $library ) == "sang":
cp $uploaded_fasta consensus.fa
#if str( $library ) == "sang" or str( $library ) == "cons":
cat $uploaded_fasta > consensus.fa &&
#else:
ln -s '$input_bam' sorted.bam &&
samtools mpileup -A -d 0 -Q 0 sorted.bam | ivar consensus -p consensus -t 0.2 -n N -q 20 -m 30 &&
Expand Down
2 changes: 1 addition & 1 deletion tools/ivar_covid_variants.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
-p variants
-q 20
-t 0.5 &&
#if str( $library ) == "sang":
#if str( $library ) == "sang" or str( $library ) == "cons":
mv variants.tsv $output_variants
#else:
awk -F '\t' '$12>9 { print }' variants.tsv > $output_variants
Expand Down
66 changes: 41 additions & 25 deletions tools/remove_aa_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@
'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'}

def getAABase(AA):
if len(AA) <= 1:
AABase = '-'
elif AA[-1].isnumeric():
AABase = AA
else:
AABase = AA[:-1]
return AABase
AABase=''
for x in AA:
if x.isdigit():
AABase+=x
return int(AABase)

def __main__():
parser = argparse.ArgumentParser()
Expand All @@ -40,26 +38,47 @@ def __main__():
out_file = open(args.outtab,'w')
out_file.write('\t'.join(read_csv[0])+'\n')

read_csv2=[]
read_csv2.append(read_csv[0])
ff=1
while ff<len(read_csv[:-1]):
if read_csv[ff][7]=='':
read_csv2.append(read_csv[ff])
ff += 1
if read_csv[ff][7] != '' and read_csv[ff-1][7]=='':
read_csv2.append(read_csv[ff])
ff += 1
if read_csv[ff-1][5].find('DELETION')!=-1:
n1 = getAABase(read_csv[ff][7])
n2 = getAABase(read_csv[ff - 1][7])
if n1==n2:
ff+=1
else:
read_csv2.append(read_csv[ff])
ff+=1
else:
read_csv2.append(read_csv[ff])
ff+=1
read_csv2.append(read_csv[ff])

index=1
while index<len(read_csv[:-1]):
aa=getAABase(read_csv[index][7])
while index<len(read_csv2[:-1]):
aa=read_csv2[index][7][:-1]
codone=[]
riga=''
non_trovato=0
if aa != '-':
if aa==getAABase(read_csv[index+1][7]):
codone.append(read_csv[index])
codone.append(read_csv[index+1])
index += 1
non_trovato+=1

if aa==getAABase(read_csv[index+1][7]):
codone.append(read_csv[index+1])
index += 1
non_trovato+=1

if aa==read_csv2[index+1][7][:-1]:
codone.append(read_csv2[index])
codone.append(read_csv2[index+1])
index += 1
non_trovato+=1
if aa==read_csv2[index+1][7][:-1]:
codone.append(read_csv2[index+1])
index += 1
non_trovato+=1
if non_trovato==0:
out_file.write('\t'.join(read_csv[index])+'\n')
out_file.write('\t'.join(read_csv2[index])+'\n')
if non_trovato==1:
riga += codone[0][0] + '\t' + codone[0][1]+ '\t'
cod=[]
Expand All @@ -68,7 +87,6 @@ def __main__():
codon=''
for x in range(len(codone)):
cod.append(codone[x][6])

for x in range(0, 7):
if x<3:
if cod[0][x].isupper() and cod[1][x].islower():
Expand All @@ -95,15 +113,13 @@ def __main__():
mutations+='SYNONYMOUS_CODING'
else:
mutations+='NON_SYNONYMOUS_CODING'

riga+=cod1+'\t'+cod2+'\t'+codone[0][0]+'\t'+mutations+'\t'+codon+'\t'+gencode.get(codon[0:-4].upper())+codone[0][7][1:-1]+gencode.get(codon[4:].upper())+'\n'
out_file.write(riga)
if non_trovato==2:
newcodon=codone[0][6][4]+codone[1][6][5]+codone[2][6][6]
riga+=codone[0][0]+'\t'+codone[0][1]+'\t'+read_csv[index][6][0:3].upper()+'\t'+newcodon+'\t'+codone[0][0]+'\t'+codone[0][5]+'\t'+read_csv[index][6][0:3].upper()+'/'+newcodon+'\t'+codone[0][7][:-1]+gencode.get(newcodon)+'\n'
riga+=codone[0][0]+'\t'+codone[0][1]+'\t'+read_csv2[index][6][0:3].upper()+'\t'+newcodon+'\t'+codone[0][0]+'\t'+codone[0][5]+'\t'+read_csv2[index][6][0:3].upper()+'/'+newcodon+'\t'+codone[0][7][:-1]+gencode.get(newcodon)+'\n'
out_file.write(riga)
index+=1

if len(read_csv) > 0:
out_file.write('\t'.join(read_csv[len(read_csv)-1])+'\n')
out_file.close
Expand Down
2 changes: 1 addition & 1 deletion tools/remove_aa_artifact.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<description>Transforms SNVs on the same codon in MNVs</description>
<command detect_errors="exit_code">
<![CDATA[
awk -F '\t' '$6!="N" && $4!="N" { print }' $input_tab > clean_input_tab &&
awk -F '\t' '$6!="N" && $4!="N" && $1!="" { print }' $input_tab > clean_input_tab &&
python $__tool_directory__/remove_aa_artifact.py -i clean_input_tab -o $output_tab
]]>
</command>
Expand Down

0 comments on commit 1e2cc15

Please sign in to comment.