Create code_optim

STRIDES · Jan 22, 2025 · ba02a97 · ba02a97
1 parent 9db0c7b
commit ba02a97
Showing 1 changed file with 155 additions and 0 deletions.
diff --git a/docs/chatbot_comparison/results/amazon_q/code_optim b/docs/chatbot_comparison/results/amazon_q/code_optim
@@ -0,0 +1,155 @@
+#!/bin/bash
+
+# Set strict error handling
+set -euo pipefail
+IFS=$'\n\t'
+
+# Default directories
+INPUT_DIR="/home/ramsivakumar/nextflow_conversion/fastq"
+OUTPUT_DIR="/home/ramsivakumar/nextflow_conversion/test_out_bash"
+
+# Function to display usage information
+usage() {
+    echo "Usage: $0 [-i INPUT_DIR] [-o OUTPUT_DIR] [-h]"
+    echo "Options:"
+    echo "  -i INPUT_DIR   Directory containing input FASTQ files (default: $INPUT_DIR)"
+    echo "  -o OUTPUT_DIR  Output directory for results (default: $OUTPUT_DIR)"
+    echo "  -h            Display this help message"
+    exit 1
+}
+
+# Function to check if required tools are installed
+check_dependencies() {
+    local required_tools=("fastp" "fastqc" "multiqc" "unicycler")
+    local missing_tools=()
+
+    for tool in "${required_tools[@]}"; do
+        if ! command -v "$tool" &> /dev/null; then
+            missing_tools+=("$tool")
+        fi
+    done
+
+    if [ ${#missing_tools[@]} -ne 0 ]; then
+        echo "Error: The following required tools are missing:"
+        printf '%s\n' "${missing_tools[@]}"
+        exit 1
+    fi
+}
+
+# Function to create output directories
+create_output_dirs() {
+    local dirs=("fastp_output" "fastqc_output" "unicycler_output" "multiqc_output")
+    for dir in "${dirs[@]}"; do
+        mkdir -p "$OUTPUT_DIR/$dir"
+    done
+}
+
+# Function to process a single sample
+process_sample() {
+    local sample_name="$1"
+    local fastq_file_r1="$INPUT_DIR/${sample_name}_R1_001.fastq.gz"
+    local fastq_file_r2="$INPUT_DIR/${sample_name}_R2_001.fastq.gz"
+
+    # Check if input files exist
+    for file in "$fastq_file_r1" "$fastq_file_r2"; do
+        if [[ ! -f "$file" ]]; then
+            echo "Error: Input file $file not found"
+            return 1
+        fi
+    done
+
+    echo "Processing sample: $sample_name"
+
+    # Step 1: Run fastp with error handling
+    local fastp_output_r1="$OUTPUT_DIR/fastp_output/${sample_name}_R1.fastp.fastq"
+    local fastp_output_r2="$OUTPUT_DIR/fastp_output/${sample_name}_R2.fastp.fastq"
+    local fastp_json="$OUTPUT_DIR/fastp_output/${sample_name}.json"
+    local fastp_html="$OUTPUT_DIR/fastp_output/${sample_name}.html"
+
+    if ! fastp -i "$fastq_file_r1" -I "$fastq_file_r2" \
+               -o "$fastp_output_r1" -O "$fastp_output_r2" \
+               -j "$fastp_json" -h "$fastp_html" \
+               --detect_adapter_for_pe \
+               --thread 8; then
+        echo "Error: fastp processing failed for $sample_name"
+        return 1
+    fi
+
+    # Step 2: Run FastQC with error handling
+    local fastqc_output_dir="$OUTPUT_DIR/fastqc_output/${sample_name}_fastqc"
+    mkdir -p "$fastqc_output_dir"
+
+    if ! fastqc "$fastp_output_r1" "$fastp_output_r2" \
+                -o "$fastqc_output_dir" \
+                -t 8; then
+        echo "Error: FastQC analysis failed for $sample_name"
+        return 1
+    fi
+
+    # Step 3: Run Unicycler with error handling
+    local unicycler_output_dir="$OUTPUT_DIR/unicycler_output/${sample_name}_unicycler"
+    mkdir -p "$unicycler_output_dir"
+
+    if ! unicycler -1 "$fastp_output_r1" -2 "$fastp_output_r2" \
+                   -o "$unicycler_output_dir" \
+                   --threads 8; then
+        echo "Error: Unicycler assembly failed for $sample_name"
+        return 1
+    fi
+
+    echo "Successfully processed sample: $sample_name"
+    return 0
+}
+
+# Parse command line arguments
+while getopts "i:o:h" opt; do
+    case $opt in
+        i) INPUT_DIR="$OPTARG" ;;
+        o) OUTPUT_DIR="$OPTARG" ;;
+        h) usage ;;
+        ?) usage ;;
+    esac
+done
+
+# Validate input and output directories
+if [[ ! -d "$INPUT_DIR" ]]; then
+    echo "Error: Input directory $INPUT_DIR does not exist"
+    exit 1
+fi
+
+# Check dependencies
+check_dependencies
+
+# Create output directories
+create_output_dirs
+
+# Process all samples
+echo "Starting pipeline execution..."
+failed_samples=()
+
+# Find all R1 files and extract sample names
+for fastq_file_r1 in "$INPUT_DIR"/*_R1_001.fastq.gz; do
+    if [[ -f "$fastq_file_r1" ]]; then
+        sample_name=$(basename "$fastq_file_r1" _R1_001.fastq.gz)
+        if ! process_sample "$sample_name"; then
+            failed_samples+=("$sample_name")
+        fi
+    fi
+done
+
+# Run MultiQC on all FastQC results
+echo "Running MultiQC on all results..."
+multiqc_output_dir="$OUTPUT_DIR/multiqc_output"
+if ! multiqc "$OUTPUT_DIR/fastqc_output" -o "$multiqc_output_dir"; then
+    echo "Warning: MultiQC analysis failed"
+fi
+
+# Print summary
+echo "Pipeline execution completed"
+if [ ${#failed_samples[@]} -eq 0 ]; then
+    echo "All samples processed successfully"
+else
+    echo "The following samples failed:"
+    printf '%s\n' "${failed_samples[@]}"
+    exit 1
+fi