Skip to content

Commit e50ca2d

Browse files
authored
Merge pull request #162 from HadrienG/dev
1.4.6
2 parents 6a9bca7 + 5c91fb8 commit e50ca2d

File tree

7 files changed

+211
-166
lines changed

7 files changed

+211
-166
lines changed

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ numpy = "*"
99
scipy = "*"
1010
biopython = "*"
1111
joblib = "*"
12-
pysam = "==0.15.3"
12+
pysam = "==0.15.4"
1313
requests = "*"
1414

1515
[dev-packages]

Pipfile.lock

Lines changed: 157 additions & 139 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

iss/download.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,23 @@
55
from Bio import Entrez
66

77
import io
8+
import sys
9+
import time
810
import zlib
911
import random
1012
import logging
1113

12-
1314
import requests
1415

1516

17+
class BadRequestError(Exception):
18+
"""Exception to raise when a http request does not return 200
19+
"""
20+
21+
def __init__(self, url, status_code):
22+
super().__init__('%s returned %d' % (url, status_code))
23+
24+
1625
def ncbi(kingdom, n_genomes, output):
1726
"""download random genomes sequences from ncbi genomes with entrez eutils
1827
and requests.
@@ -32,7 +41,6 @@ def ncbi(kingdom, n_genomes, output):
3241
'assembly',
3342
term='%s[Organism] AND "latest refseq"[filter] AND "complete genome"[filter]'
3443
% kingdom, retmax=100000))['IdList']
35-
genomes = []
3644
n = 0
3745
logger.info('Searching for %s to download' % kingdom)
3846
while n < n_genomes:
@@ -48,8 +56,16 @@ def ncbi(kingdom, n_genomes, output):
4856
genome_info['AssemblyAccession'],
4957
genome_info['AssemblyName'])
5058
logger.info('Downloading %s' % genome_info['AssemblyAccession'])
51-
assembly_to_fasta(url, output)
52-
n += 1
59+
try:
60+
assembly_to_fasta(url, output)
61+
except BadRequestError as e:
62+
logger.debug('Could not download %s' %
63+
genome_info['AssemblyAccession'])
64+
logger.debug('Skipping and waiting two seconds')
65+
time.sleep(2)
66+
else:
67+
n += 1
68+
full_id_list.remove(ident)
5369
return output
5470

5571

@@ -71,8 +87,11 @@ def assembly_to_fasta(url, output, chunk_size=1024):
7187
url = url.replace("ftp://", "https://")
7288
if url:
7389
request = requests.get(url)
74-
request = zlib.decompress(
75-
request.content, zlib.MAX_WBITS | 32).decode()
90+
if request.status_code == 200:
91+
request = zlib.decompress(
92+
request.content, zlib.MAX_WBITS | 32).decode()
93+
else:
94+
raise BadRequestError(url, request.status_code)
7695

7796
with io.StringIO(request) as fasta_io:
7897
seq_handle = SeqIO.parse(fasta_io, 'fasta')

iss/error_models/kde.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class KDErrorModel(ErrorModel):
2929
- the substitution for each nucleotide at each position (for R1 and R2)
3030
- the insertion and deletion rates for each position (for R1 and R2)
3131
"""
32+
3233
def __init__(self, npz_path):
3334
super().__init__()
3435
self.npz_path = npz_path
@@ -74,7 +75,8 @@ def gen_phred_scores(self, cdfs, orientation):
7475
mean = self.mean_reverse
7576

7677
norm_mean = mean / sum(mean)
77-
quality_bin = np.searchsorted(norm_mean, np.random.rand())
78+
# quality_bin = np.searchsorted(norm_mean, np.random.rand())
79+
quality_bin = np.random.choice(range(len(norm_mean)), p=norm_mean)
7880
# downgrades index out of bound (ex rand is 1, last bin in searchsorted
7981
# is 0.95) to best quality bin
8082
if quality_bin == 4:

iss/generator.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,21 +62,30 @@ def reads(record, ErrorModel, n_pairs, cpu_number, output, seed,
6262
# except ValueError as e:
6363
# logger.error('Skipping this record: %s' % record.id)
6464
# return
65-
forward, reverse = simulate_read(record, ErrorModel, i, cpu_number)
66-
if gc_bias:
67-
stiched_seq = forward.seq + reverse.seq
68-
gc_content = GC(stiched_seq)
69-
if 40 < gc_content < 60:
70-
read_tuple_list.append((forward, reverse))
71-
i += 1
72-
elif np.random.rand() < 0.90:
65+
try:
66+
forward, reverse = simulate_read(record, ErrorModel, i, cpu_number)
67+
except AssertionError as e:
68+
logger.warning(
69+
'%s shorter than read length for this ErrorModel' % record.id)
70+
logger.warning(
71+
'Skipping %s. You will have less reads than specified'
72+
% record.id)
73+
break
74+
else:
75+
if gc_bias:
76+
stiched_seq = forward.seq + reverse.seq
77+
gc_content = GC(stiched_seq)
78+
if 40 < gc_content < 60:
79+
read_tuple_list.append((forward, reverse))
80+
i += 1
81+
elif np.random.rand() < 0.90:
82+
read_tuple_list.append((forward, reverse))
83+
i += 1
84+
else:
85+
continue
86+
else:
7387
read_tuple_list.append((forward, reverse))
7488
i += 1
75-
else:
76-
continue
77-
else:
78-
read_tuple_list.append((forward, reverse))
79-
i += 1
8089

8190
temp_file_name = output + '.iss.tmp.%s.%s' % (record.id, cpu_number)
8291
to_fastq(read_tuple_list, temp_file_name)
@@ -112,10 +121,7 @@ def simulate_read(record, ErrorModel, i, cpu_number):
112121
forward_start = random.randrange(
113122
0, len(record.seq) - (2 * read_length + insert_size))
114123
except AssertionError as e:
115-
logger.error(
116-
'%s shorter than read length for this ErrorModel:%s'
117-
% (e, record.id))
118-
sys.exit(1)
124+
raise
119125
except ValueError as e:
120126
logger.debug(
121127
'%s shorter than template length for this ErrorModel:%s'

iss/test/test_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_simulate_and_save_short():
6161
generator.reads(ref_genome, err_mod, 1000, 0, 'data/.test', 0, True)
6262

6363

64-
@raises(SystemExit)
64+
@raises(AssertionError)
6565
def test_small_input():
6666
err_mod = kde.KDErrorModel('data/ecoli.npz')
6767
ref_genome = SeqRecord(

iss/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.4.5'
1+
__version__ = '1.4.6'

0 commit comments

Comments
 (0)