Skip to content

Commit

Permalink
add failed_reads
Browse files Browse the repository at this point in the history
  • Loading branch information
antgonza committed Oct 3, 2024
1 parent edc2eb7 commit 2db838f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
10 changes: 5 additions & 5 deletions qp_woltka/tests/test_woltka.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,11 +496,11 @@ def test_woltka_syndna_to_array(self):
'sjobs=`ls sams/*.sam | wc -l`\n',
'if [[ $sruns -eq $sjobs ]]; then\n',
' mkdir -p sams/final\n',
' while read -r fwd rev; do \n'
' echo "fastq_pair -t 50000000 reads/uneven/${fwd} '
'reads/uneven/${rev}; mv reads/uneven/${fwd}.paired.fq '
'reads/${fwd}; mv reads/uneven/${rev}.paired.fq reads/${rev}; '
'gzip reads/${fwd} reads/${rev}";\n done < ',
' while read -r fwd rev; do echo "fastq_pair -t 50000000 '
'reads/uneven/${fwd} reads/uneven/${rev}; mv '
'reads/uneven/${fwd}.paired.fq reads/${fwd}; '
'mv reads/uneven/${rev}.paired.fq reads/${rev}; '
'gzip reads/${fwd} reads/${rev}"; done < '
'finish_sample_details.txt | parallel -j 8\n',
' for f in `ls sams/fwd_*`;\n',
' do\n',
Expand Down
20 changes: 14 additions & 6 deletions qp_woltka/woltka.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,24 @@ def woltka_to_array(files, output, database_bowtie2, prep, url, name):
'to qiita.help@gmail.com')

lines = ['filename_1\trecord_count']
failed_reads = []
if rev:
lines = ['filename_1\tfilename_2\trecord_count']
for k, (fn, reads) in fwd.items():
line = f'{dname}/{fn}\t'
if k in rev:
rfn = rev.pop(k)[0]
rfn, rreads = rev.pop(k)
if int(rreads) != int(reads):
failed_reads.append(f'{basename(fn)} {basename(rfn)}')
line += f'{dname}/{rfn}\t'
line += f'{reads}'
lines.append(line)
if failed_reads:
failed_reads = '\n'.join(failed_reads)
raise ValueError(
'Some of the fwd/rev do not have the same number of reads; are '
'you using an artifact created with a newer command?\n\n'
f'Failed files:\n {failed_reads}')
files_list_fp = f'{output}/files_list.tsv'
with open(files_list_fp, 'w') as fp:
fp.write('\n'.join(lines))
Expand Down Expand Up @@ -482,9 +491,8 @@ def woltka_syndna_to_array(files, output, database_bowtie2, prep, url, name):
'sjobs=`ls sams/*.sam | wc -l`',
'if [[ $sruns -eq $sjobs ]]; then',
' mkdir -p sams/final',
' while read -r fwd rev; do '
' echo "fastq_pair -t 50000000 reads/uneven/${fwd} '
'reads/uneven/${rev}; '
' while read -r fwd rev; do echo "fastq_pair -t 50000000 '
'reads/uneven/${fwd} reads/uneven/${rev}; '
'mv reads/uneven/${fwd}.paired.fq reads/${fwd}; '
'mv reads/uneven/${rev}.paired.fq reads/${rev}; '
'gzip reads/${fwd} reads/${rev}"; done < '
Expand Down Expand Up @@ -581,9 +589,9 @@ def woltka_syndna(qclient, job_id, parameters, out_dir):
# resetting ainfo
ainfo = []
elif fwd is not None:
reads.append((f'{fp_seqs}/{f}', 'raw_forward_seqs'))
reads.append((f, 'raw_forward_seqs'))
else:
reads.append((f'{fp_seqs}/{f}', 'raw_reverse_seqs'))
reads.append((f, 'raw_reverse_seqs'))

if not errors:
ainfo.append(
Expand Down

0 comments on commit 2db838f

Please sign in to comment.