From a8a8aba3ec1ff5e1066962d1a1a3a7315836d0ac Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Wed, 2 Oct 2024 14:13:42 -0600 Subject: [PATCH] adding fastq_pair --- qp_woltka/tests/test_woltka.py | 11 +++++++++-- qp_woltka/woltka.py | 12 ++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/qp_woltka/tests/test_woltka.py b/qp_woltka/tests/test_woltka.py index 65df7ad..8949a08 100644 --- a/qp_woltka/tests/test_woltka.py +++ b/qp_woltka/tests/test_woltka.py @@ -456,7 +456,7 @@ def test_woltka_syndna_to_array(self): f'#SBATCH --error {out_dir}/{job_id}_%a.err\n', '#SBATCH --array 1-1%8\n', f'cd {out_dir}\n', - 'mkdir -p reads sams\n', + 'mkdir -p reads/uneven sams\n', f'{self.environment}\n', 'date\n', 'hostname\n', @@ -470,7 +470,8 @@ def test_woltka_syndna_to_array(self): f' bowtie2 -p 8 -x {database} -q ' '${f} -S $PWD/sams/${sn}.sam --seed 42 --very-sensitive -k 16 ' '--np 1 --mp "1,1" --rdg "0,1" --rfg "0,1" --score-min ' - '"L,0,-0.05" --no-head --no-unal --un $PWD/reads/${fn/.gz/}\n', + '"L,0,-0.05" --no-head --no-unal --un ' + '$PWD/reads/uneven/${fn/.gz/}\n', ' done < sample_details_${SLURM_ARRAY_TASK_ID}.txt\n', 'date'] self.assertEqual(main, exp_main) @@ -495,6 +496,12 @@ def test_woltka_syndna_to_array(self): 'sjobs=`ls sams/*.sam | wc -l`\n', 'if [[ $sruns -eq $sjobs ]]; then\n', ' mkdir -p sams/final\n', + ' while read -r fwd rev; do \n' + ' echo fastq_pair -t 50000000 reads/uneven/${fwd} ', + 'reads/uneven/${rev}; mv reads/uneven/${fwd}.paired.fq ', + 'reads/${fwd}; mv reads/uneven/${rev}.paired.fq reads/${rev};', + 'gzip reads/${fwd} reads/${rev}\n done < ', + 'finish_sample_details.txt | parallel -j 8\n', ' for f in `ls sams/fwd_*`;\n', ' do\n', ' fn=`basename $f`;\n', diff --git a/qp_woltka/woltka.py b/qp_woltka/woltka.py index 14203f5..e9b206f 100644 --- a/qp_woltka/woltka.py +++ b/qp_woltka/woltka.py @@ -425,7 +425,7 @@ def woltka_syndna_to_array(files, output, database_bowtie2, prep, url, name): '-q ${f} -S $PWD/sams/${sn}.sam ' +\ '--seed 42 --very-sensitive -k 16 --np 1 --mp "1,1" ' + \ '--rdg "0,1" --rfg "0,1" --score-min "L,0,-0.05" ' + \ - '--no-head --no-unal --un $PWD/reads/${fn/.gz/}' + '--no-head --no-unal --un $PWD/reads/uneven/${fn/.gz/}' # all the setup pieces lines = ['#!/bin/bash', @@ -440,7 +440,7 @@ def woltka_syndna_to_array(files, output, database_bowtie2, prep, url, name): f'#SBATCH --error {output}/{name}_%a.err', f'#SBATCH --array 1-{n_files}%{MAX_RUNNING}', f'cd {output}', - 'mkdir -p reads sams', + 'mkdir -p reads/uneven sams', f'{environment}', 'date', # start time 'hostname', # executing system @@ -482,6 +482,14 @@ def woltka_syndna_to_array(files, output, database_bowtie2, prep, url, name): 'sjobs=`ls sams/*.sam | wc -l`', 'if [[ $sruns -eq $sjobs ]]; then', ' mkdir -p sams/final', + ' while read -r fwd rev; do ', + ' echo fastq_pair -t 50000000 reads/uneven/${fwd} ' + 'reads/uneven/${rev}; ' + 'mv reads/uneven/${fwd}.paired.fq reads/${fwd};' + 'mv reads/uneven/${rev}.paired.fq reads/${rev};' + 'gzip reads/${fwd} reads/${rev}', + ' done < finish_sample_details.txt | ' + f'parallel -j {PPN}' ' for f in `ls sams/fwd_*`;', ' do', ' fn=`basename $f`;',