@@ -85,7 +85,7 @@ def generate_reads(args):
85
85
if args .draft :
86
86
logger .warning ('--draft is in early experimental stage.' )
87
87
logger .warning (
88
- '--draft disables --abundance_file and --coverage ' )
88
+ 'disabling --abundance_file, --coverage and --n_genomes ' )
89
89
logger .warning ('Defaulting to --abundance.' )
90
90
genome_files .extend (args .draft )
91
91
if args .ncbi and args .n_genomes_ncbi :
@@ -124,13 +124,22 @@ def generate_reads(args):
124
124
genome_files ,
125
125
output = genome_file )
126
126
127
+ # for n_genomes we use reservoir sampling to draw random genomes
128
+ # from the concatenated genome file. We then override the file.
129
+ if args .n_genomes and not args .draft and not args .ncbi :
130
+ genome_count = util .count_records (genome_file )
131
+ genome_files = [genome for genome in util .reservoir (
132
+ SeqIO .parse (genome_file , 'fasta' ),
133
+ genome_count ,
134
+ args .n_genomes )]
135
+ SeqIO .write (genome_files , genome_file , 'fasta' )
136
+
127
137
assert os .stat (genome_file ).st_size != 0
128
138
f = open (genome_file , 'r' )
129
139
with f : # count the number of records
130
140
genome_list = util .count_records (f )
131
141
except IOError as e :
132
142
logger .error ('Failed to open genome(s) file:%s' % e )
133
- raise
134
143
sys .exit (1 )
135
144
except AssertionError as e :
136
145
logger .error ('Genome(s) file seems empty: %s' % genome_file )
@@ -182,11 +191,8 @@ def generate_reads(args):
182
191
f = open (genome_file , 'r' ) # re-opens the file
183
192
with f :
184
193
fasta_file = SeqIO .parse (f , 'fasta' )
185
- if args .n_genomes and not args .ncbi :
186
- n = args .n_genomes [0 ][0 ]
187
- else :
188
- n = None
189
- for record in util .reservoir (fasta_file , genome_list , n ):
194
+
195
+ for record in fasta_file :
190
196
# generate reads for records
191
197
try :
192
198
species_abundance = abundance_dic [record .id ]
@@ -391,7 +397,6 @@ def main():
391
397
'--n_genomes' ,
392
398
'-u' ,
393
399
type = int ,
394
- action = 'append' ,
395
400
metavar = '<int>' ,
396
401
help = 'How many genomes will be used for the simulation. is set with \
397
402
--genomes/-g or/and --draft to take random genomes from the \
@@ -545,4 +550,4 @@ def main():
545
550
logger = logging .getLogger (__name__ )
546
551
logger .debug (e )
547
552
parser .print_help ()
548
- # raise # extra traceback to uncomment if all hell breaks lose
553
+ raise # extra traceback to uncomment if all hell breaks lose
0 commit comments