|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Set strict error handling |
| 4 | +set -euo pipefail |
| 5 | +IFS=$'\n\t' |
| 6 | + |
| 7 | +# Default directories |
| 8 | +INPUT_DIR="/home/ramsivakumar/nextflow_conversion/fastq" |
| 9 | +OUTPUT_DIR="/home/ramsivakumar/nextflow_conversion/test_out_bash" |
| 10 | + |
| 11 | +# Function to display usage information |
| 12 | +usage() { |
| 13 | + echo "Usage: $0 [-i INPUT_DIR] [-o OUTPUT_DIR] [-h]" |
| 14 | + echo "Options:" |
| 15 | + echo " -i INPUT_DIR Directory containing input FASTQ files (default: $INPUT_DIR)" |
| 16 | + echo " -o OUTPUT_DIR Output directory for results (default: $OUTPUT_DIR)" |
| 17 | + echo " -h Display this help message" |
| 18 | + exit 1 |
| 19 | +} |
| 20 | + |
| 21 | +# Function to check if required tools are installed |
| 22 | +check_dependencies() { |
| 23 | + local required_tools=("fastp" "fastqc" "multiqc" "unicycler") |
| 24 | + local missing_tools=() |
| 25 | + |
| 26 | + for tool in "${required_tools[@]}"; do |
| 27 | + if ! command -v "$tool" &> /dev/null; then |
| 28 | + missing_tools+=("$tool") |
| 29 | + fi |
| 30 | + done |
| 31 | + |
| 32 | + if [ ${#missing_tools[@]} -ne 0 ]; then |
| 33 | + echo "Error: The following required tools are missing:" |
| 34 | + printf '%s\n' "${missing_tools[@]}" |
| 35 | + exit 1 |
| 36 | + fi |
| 37 | +} |
| 38 | + |
| 39 | +# Function to create output directories |
| 40 | +create_output_dirs() { |
| 41 | + local dirs=("fastp_output" "fastqc_output" "unicycler_output" "multiqc_output") |
| 42 | + for dir in "${dirs[@]}"; do |
| 43 | + mkdir -p "$OUTPUT_DIR/$dir" |
| 44 | + done |
| 45 | +} |
| 46 | + |
| 47 | +# Function to process a single sample |
| 48 | +process_sample() { |
| 49 | + local sample_name="$1" |
| 50 | + local fastq_file_r1="$INPUT_DIR/${sample_name}_R1_001.fastq.gz" |
| 51 | + local fastq_file_r2="$INPUT_DIR/${sample_name}_R2_001.fastq.gz" |
| 52 | + |
| 53 | + # Check if input files exist |
| 54 | + for file in "$fastq_file_r1" "$fastq_file_r2"; do |
| 55 | + if [[ ! -f "$file" ]]; then |
| 56 | + echo "Error: Input file $file not found" |
| 57 | + return 1 |
| 58 | + fi |
| 59 | + done |
| 60 | + |
| 61 | + echo "Processing sample: $sample_name" |
| 62 | + |
| 63 | + # Step 1: Run fastp with error handling |
| 64 | + local fastp_output_r1="$OUTPUT_DIR/fastp_output/${sample_name}_R1.fastp.fastq" |
| 65 | + local fastp_output_r2="$OUTPUT_DIR/fastp_output/${sample_name}_R2.fastp.fastq" |
| 66 | + local fastp_json="$OUTPUT_DIR/fastp_output/${sample_name}.json" |
| 67 | + local fastp_html="$OUTPUT_DIR/fastp_output/${sample_name}.html" |
| 68 | + |
| 69 | + if ! fastp -i "$fastq_file_r1" -I "$fastq_file_r2" \ |
| 70 | + -o "$fastp_output_r1" -O "$fastp_output_r2" \ |
| 71 | + -j "$fastp_json" -h "$fastp_html" \ |
| 72 | + --detect_adapter_for_pe \ |
| 73 | + --thread 8; then |
| 74 | + echo "Error: fastp processing failed for $sample_name" |
| 75 | + return 1 |
| 76 | + fi |
| 77 | + |
| 78 | + # Step 2: Run FastQC with error handling |
| 79 | + local fastqc_output_dir="$OUTPUT_DIR/fastqc_output/${sample_name}_fastqc" |
| 80 | + mkdir -p "$fastqc_output_dir" |
| 81 | + |
| 82 | + if ! fastqc "$fastp_output_r1" "$fastp_output_r2" \ |
| 83 | + -o "$fastqc_output_dir" \ |
| 84 | + -t 8; then |
| 85 | + echo "Error: FastQC analysis failed for $sample_name" |
| 86 | + return 1 |
| 87 | + fi |
| 88 | + |
| 89 | + # Step 3: Run Unicycler with error handling |
| 90 | + local unicycler_output_dir="$OUTPUT_DIR/unicycler_output/${sample_name}_unicycler" |
| 91 | + mkdir -p "$unicycler_output_dir" |
| 92 | + |
| 93 | + if ! unicycler -1 "$fastp_output_r1" -2 "$fastp_output_r2" \ |
| 94 | + -o "$unicycler_output_dir" \ |
| 95 | + --threads 8; then |
| 96 | + echo "Error: Unicycler assembly failed for $sample_name" |
| 97 | + return 1 |
| 98 | + fi |
| 99 | + |
| 100 | + echo "Successfully processed sample: $sample_name" |
| 101 | + return 0 |
| 102 | +} |
| 103 | + |
| 104 | +# Parse command line arguments |
| 105 | +while getopts "i:o:h" opt; do |
| 106 | + case $opt in |
| 107 | + i) INPUT_DIR="$OPTARG" ;; |
| 108 | + o) OUTPUT_DIR="$OPTARG" ;; |
| 109 | + h) usage ;; |
| 110 | + ?) usage ;; |
| 111 | + esac |
| 112 | +done |
| 113 | + |
| 114 | +# Validate input and output directories |
| 115 | +if [[ ! -d "$INPUT_DIR" ]]; then |
| 116 | + echo "Error: Input directory $INPUT_DIR does not exist" |
| 117 | + exit 1 |
| 118 | +fi |
| 119 | + |
| 120 | +# Check dependencies |
| 121 | +check_dependencies |
| 122 | + |
| 123 | +# Create output directories |
| 124 | +create_output_dirs |
| 125 | + |
| 126 | +# Process all samples |
| 127 | +echo "Starting pipeline execution..." |
| 128 | +failed_samples=() |
| 129 | + |
| 130 | +# Find all R1 files and extract sample names |
| 131 | +for fastq_file_r1 in "$INPUT_DIR"/*_R1_001.fastq.gz; do |
| 132 | + if [[ -f "$fastq_file_r1" ]]; then |
| 133 | + sample_name=$(basename "$fastq_file_r1" _R1_001.fastq.gz) |
| 134 | + if ! process_sample "$sample_name"; then |
| 135 | + failed_samples+=("$sample_name") |
| 136 | + fi |
| 137 | + fi |
| 138 | +done |
| 139 | + |
| 140 | +# Run MultiQC on all FastQC results |
| 141 | +echo "Running MultiQC on all results..." |
| 142 | +multiqc_output_dir="$OUTPUT_DIR/multiqc_output" |
| 143 | +if ! multiqc "$OUTPUT_DIR/fastqc_output" -o "$multiqc_output_dir"; then |
| 144 | + echo "Warning: MultiQC analysis failed" |
| 145 | +fi |
| 146 | + |
| 147 | +# Print summary |
| 148 | +echo "Pipeline execution completed" |
| 149 | +if [ ${#failed_samples[@]} -eq 0 ]; then |
| 150 | + echo "All samples processed successfully" |
| 151 | +else |
| 152 | + echo "The following samples failed:" |
| 153 | + printf '%s\n' "${failed_samples[@]}" |
| 154 | + exit 1 |
| 155 | +fi |
0 commit comments