Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warwick #18

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed bin/hapog
Binary file not shown.
15 changes: 13 additions & 2 deletions modules/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,9 @@ def do_spades(self, reads) :
if os.path.isdir(outdir) :
shutil.rmtree(outdir)
read_len = min(self.__get_read_len(reads), 140)
kmer = ','.join([str(max(min(int(read_len*float(x)/200)*2+1, 127),17)) for x in parameters['kmers'].split(',')])
kmer = [str(max(min(int(read_len * float(x) / 200) * 2 + 1, 127), 17)) for x in parameters['kmers'].split(',')]
# Ensure there are no duplicate kmers
kmer = ','.join(list(dict.fromkeys(kmer)))
read_input = []
for lib_id, lib in enumerate(reads) :
if len(lib) == 1 :
Expand Down Expand Up @@ -711,9 +713,18 @@ def __readAssembly(self, assembly) :
return seq, fasfile

def do_kraken(self, assembly, seq) :
window_size = 10000
with open(assembly+'.filter', 'w') as fout :
# Kraken calculates species percentages based on the number of contigs
# so large contigs, such as found with long read assemblies are not
# appropriately represented
for n, s in sorted(seq.items()) :
if s[0] > 1000 :
if s[0] > (2 * window_size):
N = s[0]//window_size
size = s[0]//N
for i in range(N):
fout.write('>{0}_{1}\n{2}\n'.format(n, i, s[2][i*size:((i+1)*size)-1]))
elif s[0] > 1000 :
fout.write('>{0}\n{1}\n'.format(n, s[2]))
cmd = '{kraken2} -db {kraken_database} --threads {n_cpu} --output - --report {assembly}.kraken {assembly}.filter'.format(
assembly=assembly, **parameters
Expand Down
54 changes: 21 additions & 33 deletions modules/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def download_krakenDB() :
os.makedirs('minikraken2')
os.chdir(os.path.join(moveTo, 'minikraken2'))
if not os.path.exists('hash.k2d') or not os.path.getsize('hash.k2d') == 8000000032:
minikraken_url = 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/old/minikraken2_v2_8GB_201904.tgz'
minikraken_url = 'https://genome-idx.s3.amazonaws.com/kraken/minikraken2_v2_8GB_201904.tgz'
logger('Downloading minikraken2 from {0}. This might take a long time.'.format(minikraken_url))
subprocess.Popen('curl -Lo minikraken2_v2_8GB.tgz {0}'.format(minikraken_url).split(), stderr=subprocess.PIPE).communicate()
logger('Unpackaging minikraken2.')
Expand Down Expand Up @@ -261,7 +261,7 @@ def install_externals() :
if not getExecutable([externals['hapog']]) :

# HAPO depends on installed htslib which may not be available on host, so do a local install
htslib_ver = '1.3.2'
htslib_ver = '1.15.1'
url = 'https://github.com/samtools/htslib/releases/download/{0}/htslib-{0}.tar.bz2'.format(htslib_ver)
logger('Downloading htslib from {0}'.format(url))
subprocess.Popen('curl -Lo htslib.tar.bz2 {0}'.format(url).split(), stderr=subprocess.PIPE).communicate()
Expand All @@ -276,7 +276,7 @@ def install_externals() :
shutil.rmtree('htslib-{0}'.format(htslib_ver))
logger('Done\n')

hapog_ver = '1.2'
hapog_ver = '1.3.4'
hapog_name = 'HAPO-G-'+hapog_ver
url = 'https://github.com/institut-de-genomique/HAPO-G/archive/refs/tags/{0}.tar.gz'.format(hapog_ver)
logger('Downloading Hapo-G package from {0}'.format(url))
Expand All @@ -285,35 +285,23 @@ def install_externals() :
subprocess.Popen('tar -xzf hapog.tar.gz'.split()).communicate()

os.unlink('hapog.tar.gz')
gcc_ver = int(subprocess.Popen(['gcc', '-dumpversion'], stdout=subprocess.PIPE).communicate()[0].
strip().split(b'.')[0])
if gcc_ver < 9:
# Compiling hapog with earlier versions of gcc creates an executable that runs
# but fails with a segmentation fault when processing data
# Use precompiled which works with a locally compileed htslib
os.makedirs('HAPO-G-{0}/build'.format(hapog_ver), exist_ok=True)
shutil.copy('../bin/hapog','HAPO-G-1.2/build')
os.makedirs('HAPO-G-{0}/bin'.format(hapog_ver), exist_ok=True)
subprocess.Popen('ln -fs ../build/hapog HAPO-G-{0}/bin/hapog'.format(hapog_ver).
split(), stderr=subprocess.PIPE).communicate()
else:
my_env = os.environ.copy()

if not getExecutable('cmake') or int(os.popen('cmake --version').read().split()[2].split('.')[0]) < 3:
url = 'https://github.com/Kitware/CMake/releases/download/v3.25.1/cmake-3.25.1-linux-x86_64.tar.gz'
logger('Installing cmake from {0} for hapog installation'.format(url))
subprocess.Popen('curl -Lo cmake.tar.gz {0}'.format(url).split(), stderr=subprocess.PIPE).communicate()
logger('Unpackaging cmake package')
subprocess.Popen('tar -xzf cmake.tar.gz'.split()).communicate()
subprocess.Popen('ln -fs cmake-3.25.1-linux-x86_64/bin/cmake .'.split(),
stderr=subprocess.PIPE).communicate()
os.unlink('cmake.tar.gz')
my_env["PATH"] = externals_dir + ':' + my_env["PATH"]

subprocess.Popen('bash build.sh -l {0}'.format(htslib_dir), shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, cwd='HAPO-G-{0}'.format(hapog_ver), env=my_env).communicate()

subprocess.Popen('ln -fs HAPO-G-{0}/hapog.py ./hapog.py'.format(hapog_ver).split(),
my_env = os.environ.copy()

if not getExecutable('cmake') or int(os.popen('cmake --version').read().split()[2].split('.')[0]) < 3:
url = 'https://github.com/Kitware/CMake/releases/download/v3.25.1/cmake-3.25.1-linux-x86_64.tar.gz'
logger('Installing cmake from {0} for hapog installation'.format(url))
subprocess.Popen('curl -Lo cmake.tar.gz {0}'.format(url).split(), stderr=subprocess.PIPE).communicate()
logger('Unpackaging cmake package')
subprocess.Popen('tar -xzf cmake.tar.gz'.split()).communicate()
subprocess.Popen('ln -fs cmake-3.25.1-linux-x86_64/bin/cmake .'.split(),
stderr=subprocess.PIPE).communicate()
os.unlink('cmake.tar.gz')
my_env["PATH"] = externals_dir + ':' + my_env["PATH"]

subprocess.Popen('bash build.sh -l {0}'.format(htslib_dir), shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, cwd='HAPO-G-{0}'.format(hapog_ver), env=my_env).communicate()

subprocess.Popen('ln -s HAPO-G-{0}/hapog.py ./hapog.py'.format(hapog_ver).split(),
stderr=subprocess.PIPE).communicate()
subprocess.Popen('chmod 755 ./hapog.py'.split(), stderr=subprocess.PIPE).communicate()

Expand Down Expand Up @@ -475,7 +463,7 @@ def install_externals() :
logger('Done\n')

if not getExecutable([externals['blastn']]) or not getExecutable([externals['makeblastdb']]) :
blast_url = 'ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.8.1/ncbi-blast-2.8.1+-x64-linux.tar.gz'
blast_url = 'https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.8.1/ncbi-blast-2.8.1+-x64-linux.tar.gz'
logger('Downloading ncbi-blast package from {0}'.format(blast_url))
subprocess.Popen('curl -Lo ncbi-blast-2.8.1+-x64-linux.tar.gz {0}'.format(blast_url).split(), stderr=subprocess.PIPE).communicate()
logger('Unpackaging ncbi-blast package'.format(blast_url))
Expand Down