-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9db0c7b
commit ba02a97
Showing
1 changed file
with
155 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
#!/bin/bash | ||
|
||
# Set strict error handling | ||
set -euo pipefail | ||
IFS=$'\n\t' | ||
|
||
# Default directories | ||
INPUT_DIR="/home/ramsivakumar/nextflow_conversion/fastq" | ||
OUTPUT_DIR="/home/ramsivakumar/nextflow_conversion/test_out_bash" | ||
|
||
# Function to display usage information | ||
usage() { | ||
echo "Usage: $0 [-i INPUT_DIR] [-o OUTPUT_DIR] [-h]" | ||
echo "Options:" | ||
echo " -i INPUT_DIR Directory containing input FASTQ files (default: $INPUT_DIR)" | ||
echo " -o OUTPUT_DIR Output directory for results (default: $OUTPUT_DIR)" | ||
echo " -h Display this help message" | ||
exit 1 | ||
} | ||
|
||
# Function to check if required tools are installed | ||
check_dependencies() { | ||
local required_tools=("fastp" "fastqc" "multiqc" "unicycler") | ||
local missing_tools=() | ||
|
||
for tool in "${required_tools[@]}"; do | ||
if ! command -v "$tool" &> /dev/null; then | ||
missing_tools+=("$tool") | ||
fi | ||
done | ||
|
||
if [ ${#missing_tools[@]} -ne 0 ]; then | ||
echo "Error: The following required tools are missing:" | ||
printf '%s\n' "${missing_tools[@]}" | ||
exit 1 | ||
fi | ||
} | ||
|
||
# Function to create output directories | ||
create_output_dirs() { | ||
local dirs=("fastp_output" "fastqc_output" "unicycler_output" "multiqc_output") | ||
for dir in "${dirs[@]}"; do | ||
mkdir -p "$OUTPUT_DIR/$dir" | ||
done | ||
} | ||
|
||
# Function to process a single sample | ||
process_sample() { | ||
local sample_name="$1" | ||
local fastq_file_r1="$INPUT_DIR/${sample_name}_R1_001.fastq.gz" | ||
local fastq_file_r2="$INPUT_DIR/${sample_name}_R2_001.fastq.gz" | ||
|
||
# Check if input files exist | ||
for file in "$fastq_file_r1" "$fastq_file_r2"; do | ||
if [[ ! -f "$file" ]]; then | ||
echo "Error: Input file $file not found" | ||
return 1 | ||
fi | ||
done | ||
|
||
echo "Processing sample: $sample_name" | ||
|
||
# Step 1: Run fastp with error handling | ||
local fastp_output_r1="$OUTPUT_DIR/fastp_output/${sample_name}_R1.fastp.fastq" | ||
local fastp_output_r2="$OUTPUT_DIR/fastp_output/${sample_name}_R2.fastp.fastq" | ||
local fastp_json="$OUTPUT_DIR/fastp_output/${sample_name}.json" | ||
local fastp_html="$OUTPUT_DIR/fastp_output/${sample_name}.html" | ||
|
||
if ! fastp -i "$fastq_file_r1" -I "$fastq_file_r2" \ | ||
-o "$fastp_output_r1" -O "$fastp_output_r2" \ | ||
-j "$fastp_json" -h "$fastp_html" \ | ||
--detect_adapter_for_pe \ | ||
--thread 8; then | ||
echo "Error: fastp processing failed for $sample_name" | ||
return 1 | ||
fi | ||
|
||
# Step 2: Run FastQC with error handling | ||
local fastqc_output_dir="$OUTPUT_DIR/fastqc_output/${sample_name}_fastqc" | ||
mkdir -p "$fastqc_output_dir" | ||
|
||
if ! fastqc "$fastp_output_r1" "$fastp_output_r2" \ | ||
-o "$fastqc_output_dir" \ | ||
-t 8; then | ||
echo "Error: FastQC analysis failed for $sample_name" | ||
return 1 | ||
fi | ||
|
||
# Step 3: Run Unicycler with error handling | ||
local unicycler_output_dir="$OUTPUT_DIR/unicycler_output/${sample_name}_unicycler" | ||
mkdir -p "$unicycler_output_dir" | ||
|
||
if ! unicycler -1 "$fastp_output_r1" -2 "$fastp_output_r2" \ | ||
-o "$unicycler_output_dir" \ | ||
--threads 8; then | ||
echo "Error: Unicycler assembly failed for $sample_name" | ||
return 1 | ||
fi | ||
|
||
echo "Successfully processed sample: $sample_name" | ||
return 0 | ||
} | ||
|
||
# Parse command line arguments | ||
while getopts "i:o:h" opt; do | ||
case $opt in | ||
i) INPUT_DIR="$OPTARG" ;; | ||
o) OUTPUT_DIR="$OPTARG" ;; | ||
h) usage ;; | ||
?) usage ;; | ||
esac | ||
done | ||
|
||
# Validate input and output directories | ||
if [[ ! -d "$INPUT_DIR" ]]; then | ||
echo "Error: Input directory $INPUT_DIR does not exist" | ||
exit 1 | ||
fi | ||
|
||
# Check dependencies | ||
check_dependencies | ||
|
||
# Create output directories | ||
create_output_dirs | ||
|
||
# Process all samples | ||
echo "Starting pipeline execution..." | ||
failed_samples=() | ||
|
||
# Find all R1 files and extract sample names | ||
for fastq_file_r1 in "$INPUT_DIR"/*_R1_001.fastq.gz; do | ||
if [[ -f "$fastq_file_r1" ]]; then | ||
sample_name=$(basename "$fastq_file_r1" _R1_001.fastq.gz) | ||
if ! process_sample "$sample_name"; then | ||
failed_samples+=("$sample_name") | ||
fi | ||
fi | ||
done | ||
|
||
# Run MultiQC on all FastQC results | ||
echo "Running MultiQC on all results..." | ||
multiqc_output_dir="$OUTPUT_DIR/multiqc_output" | ||
if ! multiqc "$OUTPUT_DIR/fastqc_output" -o "$multiqc_output_dir"; then | ||
echo "Warning: MultiQC analysis failed" | ||
fi | ||
|
||
# Print summary | ||
echo "Pipeline execution completed" | ||
if [ ${#failed_samples[@]} -eq 0 ]; then | ||
echo "All samples processed successfully" | ||
else | ||
echo "The following samples failed:" | ||
printf '%s\n' "${failed_samples[@]}" | ||
exit 1 | ||
fi |