-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_hc.sh
executable file
·67 lines (48 loc) · 2.18 KB
/
run_hc.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
# Specify the total number of threads
total_threads=60
# Specify the list of subsampling rates
rates=("0.25x" "0.5x" "1x" "2x")
# Define the function to process each bam file
process_file_corrected() {
bam_file=$1
threads_per_job=$2
rate=$3
# Get the base name of the bam file (without extension)
base_name=$(basename "$bam_file" .bam)
# Log the initiation of the process for this bam file to stderr
echo "Processing $bam_file" >&2
# Run the haplocart pipeline
samtools bam2fq "$bam_file" | ./vgan_corrected/bin/vgan haplocart -np -t $threads_per_job -fq1 /dev/stdin \
--hc-files /home/projects/MAAG/Magpie/Magpie/vgan_corrected/share/vgan/hcfiles \
&>> "hc_results/$base_name.corrected.log" 2>&1
}
process_file_uncorrected() {
bam_file=$1
threads_per_job=$2
rate=$3
# Get the base name of the bam file (without extension)
base_name=$(basename "$bam_file" .bam)
# Log the initiation of the process for this bam file to stderr
echo "Processing $bam_file" >&2
# Run the haplocart pipeline
samtools bam2fq "$bam_file" | ./vgan_uncorrected/bin/vgan haplocart -np -t $threads_per_job -fq1 /dev/stdin \
--hc-files /home/projects/MAAG/Magpie/Magpie/vgan_corrected/share/vgan/hcfiles \
&>> "hc_results/$base_name.uncorrected.log" 2>&1
}
# Export the function to be available in the parallel environment
export -f process_file_corrected
export -f process_file_uncorrected
# Loop over all subsampling rates
for rate in "${rates[@]}"; do
# Specify the directory containing the bam files
dir="/home/projects/MAAG/Magpie/Magpie/haplocart_ancient/subsampled_reps/$rate/"
# Find the number of bam files
num_files=$(ls $dir/*.bam | wc -l)
# Calculate the number of threads per job
threads_per_job=$(( total_threads / num_files ))
threads_per_job=$(( threads_per_job < 1 ? 1 : threads_per_job ))
# Run the function in parallel over all bam files
nice -19 parallel --keep-order -j $num_files process_file_corrected ::: $(ls $dir/*.bam) ::: $threads_per_job ::: $rate
#nice -19 parallel --keep-order -j $num_files process_file_uncorrected ::: $(ls $dir/*.bam) ::: $threads_per_job ::: $rate
done