Skip to content

Commit

Permalink
Mask repeats with ska2
Browse files Browse the repository at this point in the history
  • Loading branch information
nickjcroucher committed Jun 22, 2023
1 parent 97f24cd commit 1da11b9
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 15 deletions.
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,4 @@ dependencies:
- raxml-ng=1.0.1
- fasttree=2.1.10
# Scripts
- ska2
- jellyfish>2.2
- ska2>=0.3.0
15 changes: 2 additions & 13 deletions python/scripts/generate_ska_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,14 @@ def ska_map_sequences(seq, k = None, ref = None):
shell = True)

# Remove repetitive sequences
if os.path.exists(args.reference):
# Count k-mers
subprocess.check_output('jellyfish count -L 2 -C -m ' + str(args.k + 1) + ' -o ' + args.out + \
'_mer_counts.jf -c 3 -s 10000000 ' + args.reference,
shell = True)
# Extract repetitive k-mers
subprocess.check_output('jellyfish dump -o ' + args.out + '.toweed ' + args.out + '_mer_counts.jf',
shell = True)
# Weed k-mers
subprocess.check_output('ska weed ' + args.out + '.skf ' + args.out + '.toweed',
shell = True)
else:
if not os.path.exists(args.reference):
sys.stderr.write('Reference file missing: ' + args.reference + '\n')
sys.exit(1)

# Run ska mapping
if os.path.exists(args.out + '.skf'):
tmp_aln = os.path.join(os.path.dirname(args.out), 'tmp.' + os.path.basename(args.out))
subprocess.check_output('ska map -o ' + tmp_aln + ' --threads ' + str(args.threads) + ' ' + \
subprocess.check_output('ska map -o ' + tmp_aln + ' --threads ' + str(args.threads) + ' --repeat-mask ' + \
args.reference + ' ' + args.out + '.skf',
shell = True)
else:
Expand Down

0 comments on commit 1da11b9

Please sign in to comment.