Skip to content

Commit

Permalink
version 1.2 with new features
Browse files Browse the repository at this point in the history
  • Loading branch information
alipirani88 committed Jun 5, 2018
1 parent 15231ba commit 2b27cdc
Show file tree
Hide file tree
Showing 30 changed files with 1,345 additions and 169 deletions.
4 changes: 2 additions & 2 deletions config_ali
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ binbase: /nfs/esnitkin/bin_group/variant_calling_bin/
resources: nodes=1:ppn=4,pmem=4000mb,walltime=250:00:00
large_resources: nodes=1:ppn=12,mem=47gb,walltime=250:00:00
email: apirani@med.umich.edu
queue: fluxod
flux_account: esnitkin_fluxod
queue: flux
flux_account: esnitkin_flux
notification: ae

# Set Parameters for individual tools. Set the binbase of each tool: This should be the folder name of respective tools where the executables for each resp. tool resides.
Expand Down
Binary file modified config_settings.pyc
Binary file not shown.
Binary file modified modules/__init__.pyc
Binary file not shown.
98 changes: 98 additions & 0 deletions modules/core_prep_sanity_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from __future__ import division
import sys
import os
import errno
from config_settings import ConfigSectionMap
from logging_subprocess import *
from log_modules import *

""" Sanity Check Methods"""
def make_sure_path_exists(out_path):
"""
Fuction to make sure output folder exists. If not, create it.
:param: out_path
:return: null/exception
"""
try:
os.makedirs(out_path)
except OSError as exception:
if exception.errno != errno.EEXIST:
keep_logging('\nErrors in output folder path! please change the output path or analysis name\n',
'\nErrors in output folder path! please change the output path or analysis name\n', logger,
'info')
exit()

def make_sure_files_exists(vcf_file_array):
"""
Function to make sure the variant call output files exists and are not empty.
:param: vcf_file_array
:return: null or exception
"""
not_found_files = []
for files in vcf_file_array:
ori_unmapped_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"unmapped.bed_positions")
ori_proximate_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"filter2_final.vcf_no_proximate_snp.vcf_positions_array")
ori_variant_position_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"filter2_final.vcf_no_proximate_snp.vcf")
ori_5bp_mpileup_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"aln_mpileup_raw.vcf_5bp_indel_removed.vcf")
ori_mpileup_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"aln_mpileup_raw.vcf")
ori_filter_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"filter2_final.vcf")
ori_indel_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"filter2_indel_final.vcf")

if os.path.isfile(ori_unmapped_file) and os.path.isfile(ori_proximate_file) and os.path.isfile(
ori_variant_position_file) and os.path.isfile(ori_5bp_mpileup_file) and os.path.isfile(
ori_mpileup_file) and os.path.isfile(ori_filter_file) and os.path.isfile(ori_indel_file):
continue
else:
not_found_files.append(files)
if len(not_found_files) > 0:
for i in not_found_files:
keep_logging('Error finding variant calling output files for: %s' % os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
'Error finding variant calling output files for: %s' % os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), logger, 'exception')
exit()

def make_sure_label_files_exists(vcf_file_array, uniq_snp_positions, uniq_indel_positions):
"""
Function to make sure the variant call output files exists and are not empty.
:param: vcf_file_array
:return: null or exception
"""
not_found_files = []
found_incomplete = []
for files in vcf_file_array:

snps_label_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"filter2_final.vcf_no_proximate_snp.vcf_positions_label")

indel_label_file = files.replace("filter2_final.vcf_no_proximate_snp.vcf",
"filter2_indel_final.vcf_indel_positions_label")

num_snps_label_lines = sum(1 for line in open('%s' % snps_label_file))
num_indel_label_lines = sum(1 for line in open('%s' % indel_label_file))

if os.path.isfile(snps_label_file) and os.path.isfile(indel_label_file):
if num_snps_label_lines == uniq_snp_positions and num_indel_label_lines == uniq_indel_positions:
continue
else:
found_incomplete.append(files)
else:
not_found_files.append(files)
# keep_logging('Error finding variant calling output files: %s' % files, 'Error finding variant calling output files: %s' % files, logger, 'exception')
if len(not_found_files) > 0:
for i in not_found_files:
keep_logging('Error finding core_prep output files for: %s' % os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
'Error finding core_prep output files for: %s' % os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), logger, 'exception')
exit()

if len(found_incomplete) > 0:
for i in found_incomplete:
tmp_file = os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', ''))
keep_logging('core_prep step failed for: %s. Rerun %s.pbs' % (tmp_file, i),
'core_prep step failed for: %s. Rerun %s.pbs' % (tmp_file, i), logger, 'exception')
exit()
Binary file added modules/core_prep_sanity_checks.pyc
Binary file not shown.
24 changes: 24 additions & 0 deletions modules/fasttree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
__author__ = 'alipirani'
import os
from config_settings import ConfigSectionMap
from logging_subprocess import *
from log_modules import *

def fasttree(tree_dir, input_fasta, cluster, logger, Config):
keep_logging('Running Fasttree on input: %s' % input_fasta, 'Running Fasttree on input: %s' % input_fasta, logger, 'info')
fasttree_cmd = "%s/%s/%s -nt %s > %s/%s_FastTree.tree" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("fasttree", Config)['fasttree_bin'], ConfigSectionMap("fasttree", Config)['base_cmd'], input_fasta, tree_dir, (os.path.basename(input_fasta)).replace('.fa', ''))
keep_logging('%s' % fasttree_cmd, '%s' % fasttree_cmd, logger, 'info')
if cluster == "parallel-local" or cluster == "local":
call("cd %s" % tree_dir, logger)
call(fasttree_cmd, logger)
elif cluster == "cluster":
call("cd %s" % tree_dir, logger)
call(fasttree_cmd, logger)
elif cluster == "parallel-cluster":
job_file_name = "%s/fasttree_%s.pbs" % (tree_dir, os.path.basename(input_fasta))
job_name = os.path.basename(job_file_name)
job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l nodes=1:ppn=4,mem=47000mb,walltime=76:00:00\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\ncd %s\n%s" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], tree_dir, fasttree_cmd)
f1=open(job_file_name, 'w+')
f1.write(job_print_string)
f1.close()
call("qsub %s" % job_file_name, logger)
Binary file added modules/fasttree.pyc
Binary file not shown.
15 changes: 15 additions & 0 deletions modules/gubbins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
__author__ = 'alipirani'
import os
from config_settings import ConfigSectionMap
from logging_subprocess import *
from log_modules import *

def gubbins(gubbins_dir, input_fasta, logger, Config):
keep_logging('\nRunning Gubbins on input: %s\n' % input_fasta, '\nRunning Gubbins on input: %s\n' % input_fasta, logger,
'info')
call("cd %s" % ConfigSectionMap("gubbins", Config)['gubbins_bin'], logger)
gubbins_cmd = "%s/%s --prefix %s/%s %s" % (ConfigSectionMap("gubbins", Config)['gubbins_bin'], ConfigSectionMap("gubbins", Config)['base_cmd'], gubbins_dir, (os.path.basename(input_fasta)).replace('.fa', ''), input_fasta)
#call(gubbins_cmd, logger)
keep_logging('\nRunning Gubbins: %s' % input_fasta, '\nRunning Gubbins: %s\n' % input_fasta,
logger,
'info')
Binary file added modules/gubbins.pyc
Binary file not shown.
Binary file modified modules/log_modules.pyc
Binary file not shown.
Binary file modified modules/logging_subprocess.pyc
Binary file not shown.
66 changes: 54 additions & 12 deletions modules/phage_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ def run_phaster(reference_genome, outdir, logger, Config):
keep_logging('Running: %s' % phaster_post_cmd, 'Running: %s' % phaster_post_cmd, logger, 'debug')
call(phaster_post_cmd, logger)
else:
keep_logging("Phaster Post Json file %s/%s_phaster_post.json exists" % (outdir, str(out_name[0])), "Phaster Post Json: %s/%s_phaster_post.json exists" % (outdir, str(out_name[0])), logger,
keep_logging("Phaster Post Json file %s/%s_phaster_post.json already exists. Skipping Phaster Job submission. To get latest Phaster results remove this file and run core_prep step again." % (outdir, str(out_name[0])), "Phaster Post Json: %s/%s_phaster_post.json exists. Skipping Phaster Job submission. To get latest Phaster results remove this file and run core_prep step again." % (outdir, str(out_name[0])), logger,
'info')


def parse_phaster(reference_genome, outdir, logger, Config):
out_name = (os.path.basename(reference_genome)).split('.')
if os.path.isfile("%s/%s_phaster_post.json" % (outdir, str(out_name[0]))) and os.stat("%s/%s_phaster_post.json" % (outdir, str(out_name[0]))).st_size != 0:
Expand All @@ -28,19 +30,59 @@ def parse_phaster(reference_genome, outdir, logger, Config):
phaster_get_cmd = "wget \"http://phaster.ca/phaster_api?acc=%s\" -O %s/%s" % (
data["job_id"], outdir, str(out_name[0]) + "_phaster_get.json")
if not os.path.isfile("%s/%s_phaster_get.json" % (outdir, str(out_name[0]))):
keep_logging('Running: %s' % phaster_get_cmd, 'Running: %s' % phaster_get_cmd, logger, 'debug')
call(phaster_get_cmd, logger)
else:
keep_logging('Running: %s to get results from Phaster server' % phaster_get_cmd, 'Running: %s to get results from Phaster server' % phaster_get_cmd, logger, 'debug')
call(phaster_get_cmd, logger)
with open('%s/%s' % (outdir, str(out_name[0]) + "_phaster_get.json")) as json_get_data:
get_data = json.load(json_get_data)
phaster_get_zip_cmd = "wget \"http://%s\" -O %s/%s_phaster_get.zip" % (str(get_data["zip"]), outdir, str(out_name[0]))
phaster_unzip_cmd = "unzip -o %s/%s_phaster_get.zip" % (outdir, str(out_name[0]))
if get_data["status"] == "Complete":
keep_logging("Phaster Get Json file exists... The status of Phaster job id %s is %s" % (
get_data["job_id"], get_data["status"]),
"Phaster Get Json file exists... The status of Phaster job id %s is %s" % (
get_data["job_id"], get_data["status"]), logger,
'info')
phaster_get_zip_cmd = "wget \"http://%s\" -O %s/%s_phaster_get.zip" % (str(get_data["zip"]), outdir, str(out_name[0]))
phaster_unzip_cmd = "unzip -o %s/%s_phaster_get.zip" % (outdir, str(out_name[0]))
keep_logging('Running: %s' % phaster_get_zip_cmd, 'Running: %s' % phaster_get_zip_cmd, logger,
'debug')
keep_logging('Running: %s' % phaster_unzip_cmd, 'Running: %s' % phaster_get_cmd, logger, 'debug')
call(phaster_get_zip_cmd, logger)
call(phaster_unzip_cmd, logger)
else:
keep_logging("Phaster Get Json file exists... The status of Phaster job id %s is %s" % (
get_data["job_id"], get_data["status"]),
"Phaster Get Json file exists... The status of Phaster job id %s is %s" % (
get_data["job_id"], get_data["status"]), logger,
'info')
json_get_data.close()

elif os.path.isfile("%s/%s_phaster_get.json" % (outdir, str(out_name[0]))) and os.stat("%s/%s_phaster_get.json" % (outdir, str(out_name[0]))).st_size != 0:
#call(phaster_get_cmd, logger)
with open('%s/%s' % (outdir, str(out_name[0]) + "_phaster_get.json")) as json_get_data:
get_data = json.load(json_get_data)
if get_data["status"] == "Complete":
keep_logging("Phaster Get Json file exists... The status of Phaster job id %s is %s" % (
get_data["job_id"], get_data["status"]),
"Phaster Get Json file exists... The status of Phaster job id %s is %s" % (
get_data["job_id"], get_data["status"]), logger,
'info')
phaster_get_zip_cmd = "wget \"http://%s\" -O %s/%s_phaster_get.zip" % (str(get_data["zip"]), outdir, str(out_name[0]))
phaster_unzip_cmd = "unzip -o %s/%s_phaster_get.zip" % (outdir, str(out_name[0]))
keep_logging('Running: %s' % phaster_get_zip_cmd, 'Running: %s' % phaster_get_zip_cmd, logger,
'debug')
keep_logging('Running: %s' % phaster_unzip_cmd, 'Running: %s' % phaster_get_cmd, logger, 'debug')
call(phaster_get_zip_cmd, logger)
call(phaster_unzip_cmd, logger)
else:
keep_logging("Phaster Get Json file exists... The status of Phaster job id %s is %s. Phaster job is still running or Phaster get json results were empty depending on the Phaster Job Status." % (
get_data["job_id"], get_data["status"]),
"Phaster Get Json file exists... The status of Phaster job id %s is %s. Phaster job is still running or Phaster get json results were empty depending on the Phaster Job Status." % (
get_data["job_id"], get_data["status"]), logger,
'info')
json_get_data.close()
keep_logging('Running: %s' % phaster_get_zip_cmd, 'Running: %s' % phaster_get_zip_cmd, logger, 'debug')
keep_logging('Running: %s' % phaster_unzip_cmd, 'Running: %s' % phaster_get_cmd, logger, 'debug')
call(phaster_get_zip_cmd, logger)
call(phaster_unzip_cmd, logger)
# keep_logging('Running: %s' % phaster_get_zip_cmd, 'Running: %s' % phaster_get_zip_cmd, logger, 'debug')
# keep_logging('Running: %s' % phaster_unzip_cmd, 'Running: %s' % phaster_get_cmd, logger, 'debug')
# call(phaster_get_zip_cmd, logger)
# call(phaster_unzip_cmd, logger)
if os.path.isfile("%s/summary.txt" % outdir):
keep_logging('Extracting Phage region information from %s/summary.txt' % outdir, 'Extracting Phage region information from %s/summary.txt' % outdir, logger, 'info')
get_phage_regions = "sed -n -e '/REGION/,$p' %s/summary.txt | awk 'NR>2' | awk -F' ' '{print $5}'" % outdir
Expand All @@ -58,8 +100,8 @@ def parse_phaster(reference_genome, outdir, logger, Config):
for pos in phage_positions:
f_open.write(str(pos) + "\n")
else:
keep_logging('Phaster output file %s/summary.txt not found' % outdir,
'Phaster output file %s/summary.txt not found' % outdir, logger, 'exception')
keep_logging('Phaster output file %s/summary.txt not found. Phaster job is still running or Phaster get json results were empty.' % outdir,
'Phaster output file %s/summary.txt not found. Phaster job is still running or Phaster get json results were empty.' % outdir, logger, 'exception')
exit()

keep_logging('Number of phage Positions: %s' % len(phage_positions),
Expand Down
Binary file modified modules/phage_detection.pyc
Binary file not shown.
26 changes: 26 additions & 0 deletions modules/raxml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
__author__ = 'alipirani'
import os
from config_settings import ConfigSectionMap
from logging_subprocess import *
from log_modules import *


def raxml(tree_dir, input_fasta, jobrun, logger, Config):
keep_logging('Running RAXML on input: %s' % input_fasta, 'Running RAXML on input: %s' % input_fasta, logger, 'info')
raxml_cmd = "%s/%s/%s %s -s %s -n %s_raxML" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("raxml", Config)['raxml_bin'], ConfigSectionMap("raxml", Config)['base_cmd'], ConfigSectionMap("raxml", Config)['parameters'], input_fasta, (os.path.basename(input_fasta)).replace('.fa', ''))
keep_logging('%s' % raxml_cmd, '%s' % raxml_cmd, logger, 'info')
if jobrun == "parallel-local" or jobrun == "local":
call("cd %s" % tree_dir, logger)
call(raxml_cmd, logger)
elif jobrun == "cluster":
call("cd %s" % tree_dir, logger)
call(raxml_cmd, logger)
elif jobrun == "parallel-cluster":
job_file_name = "%s/raxml_%s.pbs" % (tree_dir, os.path.basename(input_fasta))
job_name = os.path.basename(job_file_name)
job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l nodes=1:ppn=4,mem=47000mb,walltime=76:00:00\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\ncd %s\n%s" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], tree_dir, raxml_cmd)
f1=open(job_file_name, 'w+')
f1.write(job_print_string)
f1.close()
#os.system("qsub %s" % job_file_name)
call("qsub %s" % job_file_name, logger)
Binary file added modules/raxml.pyc
Binary file not shown.
Binary file modified modules/tabix.pyc
Binary file not shown.
Binary file modified modules/variant_diagnostics/config_settings.pyc
Binary file not shown.
Loading

0 comments on commit 2b27cdc

Please sign in to comment.