Skip to content

Commit e789327

Browse files
authored
Merge pull request #61 from jlanga/devel
1.5.0
2 parents 580bd9c + c6b993c commit e789327

28 files changed

+1625
-1120
lines changed

.travis.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,14 @@ script:
5959
--kmer 27
6060
--max-fp-bases 5
6161
--output-gfa test_exons_raw.gfa
62-
- gfa1_to_exons
62+
- gfa1_to_fasta
6363
--input-gfa test_exons_raw.gfa
6464
--output-fasta exons.fa
6565
--soft-mask-overlaps
66-
- gfa1_to_gapped_transcripts
66+
- gfa1_to_fasta
6767
--input-gfa test_exons_raw.gfa
6868
--output-fasta gapped_transcripts.fa
69+
--gapped-transcript
6970
--number-of-ns 100
7071
--hard-mask-overlaps
7172
- cat exons.fa

bin/build_baited_bloom_filter

Lines changed: 29 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,159 +1,54 @@
11
#!/usr/bin/env python3
22

3-
import argparse
3+
"""build_baited_bloom_filter: script to build a bloom filter from a set of
4+
(genomic) reads that have related to a fasta file (a transcriptome)"""
5+
46
import logging
5-
import sys
6-
from os.path import isfile, exists, dirname, abspath
7-
from os import \
8-
makedirs, \
9-
remove
7+
from os.path import exists, dirname, abspath
8+
from os import makedirs
109
from shutil import which
1110

12-
from exfi import __version__
13-
from exfi.build_baited_bloom_filter import build_baited_bloom_filter
14-
15-
parser = argparse.ArgumentParser(
16-
usage='build_baited_bloom_filter '
17-
'-i transcriptome.fa '
18-
'-o bloom_filter.bf '
19-
'-k 30 '
20-
'reads1.fq ... readsn.fq',
21-
description='Build a Bloom filter with reads that have at least one kmer '
22-
'in the transcriptome.',
23-
epilog='Jorge Langa. Send issues and pull requests to github.com/jlanga/'
24-
'exfi'
25-
)
26-
27-
parser.add_argument(
28-
'--version',
29-
action='version',
30-
version='%(prog)s {version}'.format(
31-
version=__version__
32-
)
33-
)
34-
35-
parser.add_argument(
36-
'--input-fasta', '-f',
37-
type=str,
38-
required=True,
39-
help='Input transcriptome in FASTA format',
40-
dest='fasta',
41-
metavar='FILE'
42-
)
43-
44-
parser.add_argument(
45-
'--kmer-size', '-k',
46-
type=int,
47-
required=False,
48-
help='The size of the k-mer [31]',
49-
dest='kmer',
50-
metavar='INT',
51-
default=31
52-
)
53-
54-
parser.add_argument(
55-
'--bloom-size', '-b',
56-
type=str,
57-
required=False,
58-
help="Size of the Bloom filter [500M]. This is the total size. The final Bloom filter will be"
59-
"size / levels.",
60-
dest="bloom_size",
61-
metavar='STR',
62-
default='500M'
63-
)
64-
65-
parser.add_argument(
66-
'--levels', '-l',
67-
type=int,
68-
required=False,
69-
help='Build a cascading bloom filter with N levels and '
70-
'output the last level [1]',
71-
dest='levels',
72-
metavar='INT',
73-
default=1
74-
)
11+
from exfi.arguments import build_baited_bloom_filter_args
12+
from exfi.logger import set_up_logger
7513

76-
parser.add_argument(
77-
'--threads', '-t',
78-
type=int,
79-
required=False,
80-
help='Number of threads to build Bloom filters and bait reads',
81-
dest='threads',
82-
metavar='INT',
83-
default=1
84-
)
85-
86-
parser.add_argument(
87-
'--output-bloom', '-o',
88-
type=str,
89-
required=True,
90-
help='Path to write the resulting Bloom filter',
91-
dest="bloom",
92-
metavar="FILE"
93-
)
94-
95-
parser.add_argument(
96-
metavar='reads',
97-
type=str,
98-
nargs='+',
99-
help='FASTA/Q files (gz or not)',
100-
dest='reads'
101-
)
102-
103-
parser.add_argument(
104-
"-v", "--verbose",
105-
action="store_true",
106-
dest="verbose",
107-
help="Increase output verbosity"
108-
)
109-
110-
parser.add_argument(
111-
"-d", "--debug",
112-
action="store_true",
113-
dest="debug",
114-
help="Log everything!"
115-
)
14+
from exfi.build_baited_bloom_filter import build_baited_bloom_filter
11615

11716
if __name__ == '__main__':
11817

119-
# Store arguments
120-
args = vars(parser.parse_args())
121-
args["fasta"] = abspath(args["fasta"])
122-
args["bloom"] = abspath(args["bloom"])
123-
args["threads"] = int(args["threads"]) # I don't know why it is parsed as tuple
18+
PARSER = build_baited_bloom_filter_args()
12419

125-
# Set up logger
126-
logger = logging.getLogger()
127-
logging.basicConfig(
128-
format='%(asctime)s\t%(module)s\t%(message)s',
129-
level=logging.ERROR
130-
)
131-
if args["verbose"]:
132-
logger.setLevel(logging.INFO)
133-
if args["debug"]:
134-
logger.setLevel(logging.DEBUG)
20+
# Store arguments
21+
ARGS = vars(PARSER.parse_args())
22+
ARGS["fasta"] = abspath(ARGS["fasta"])
23+
ARGS["bloom"] = abspath(ARGS["bloom"])
24+
ARGS["threads"] = int(ARGS["threads"]) # It is parsed as tuple
13525

26+
# Set up the logger
27+
LOGGER = set_up_logger(ARGS)
13628

13729
# Check inputs
13830
logging.info('Checking input parameters')
139-
assert args["kmer"] >= 1, 'ERROR: incorrect kmer size'
31+
assert ARGS["kmer"] >= 1, 'ERROR: incorrect kmer size'
14032
# assert bloom_size
141-
assert args["levels"] >= 1, 'ERROR: incorrect number of levels'
142-
assert args["threads"] >= 1, 'ERROR: incorrect number of threads'
33+
assert ARGS["levels"] >= 1, 'ERROR: incorrect number of levels'
34+
assert ARGS["threads"] >= 1, 'ERROR: incorrect number of threads'
14335

14436
# Check if programs are in path
14537
logging.info('Checking if biobloom* and abyss-bloom are in $PATH')
146-
assert which('biobloommaker') is not None, 'ERROR: biobloommaker not in PATH'
147-
assert which('biobloomcategorizer') is not None, 'ERROR: biobloomcategorizer not in PATH'
148-
assert which('abyss-bloom') is not None, 'ERROR: abyss-bloom not in PATH'
38+
assert which('biobloommaker') is not None, \
39+
'ERROR: biobloommaker not in PATH'
40+
assert which('biobloomcategorizer') is not None, \
41+
'ERROR: biobloomcategorizer not in PATH'
42+
assert which('abyss-bloom') is not None, \
43+
'ERROR: abyss-bloom not in PATH'
14944

15045
# Create output folder if it doesn't exist
151-
output_dir = dirname(args["bloom"])
152-
if output_dir != "" and not exists(output_dir):
153-
makedirs(output_dir)
46+
OUTPUT_DIR = dirname(ARGS["bloom"])
47+
if OUTPUT_DIR != "" and not exists(OUTPUT_DIR):
48+
makedirs(OUTPUT_DIR)
15449

15550
# Run the program
15651
logging.info('Running build_baited_bloom_filter')
157-
build_baited_bloom_filter(args)
52+
build_baited_bloom_filter(ARGS)
15853

15954
logging.info("Done!")

0 commit comments

Comments
 (0)