From ba02a9749af6d01aa2b4a420efaa5c74ca07b862 Mon Sep 17 00:00:00 2001 From: RamiyapriyaS <140844790+RamiyapriyaS@users.noreply.github.com> Date: Wed, 22 Jan 2025 14:52:13 -0500 Subject: [PATCH] Create code_optim --- .../results/amazon_q/code_optim | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 docs/chatbot_comparison/results/amazon_q/code_optim diff --git a/docs/chatbot_comparison/results/amazon_q/code_optim b/docs/chatbot_comparison/results/amazon_q/code_optim new file mode 100644 index 0000000..e08dd3c --- /dev/null +++ b/docs/chatbot_comparison/results/amazon_q/code_optim @@ -0,0 +1,155 @@ +#!/bin/bash + +# Set strict error handling +set -euo pipefail +IFS=$'\n\t' + +# Default directories +INPUT_DIR="/home/ramsivakumar/nextflow_conversion/fastq" +OUTPUT_DIR="/home/ramsivakumar/nextflow_conversion/test_out_bash" + +# Function to display usage information +usage() { + echo "Usage: $0 [-i INPUT_DIR] [-o OUTPUT_DIR] [-h]" + echo "Options:" + echo " -i INPUT_DIR Directory containing input FASTQ files (default: $INPUT_DIR)" + echo " -o OUTPUT_DIR Output directory for results (default: $OUTPUT_DIR)" + echo " -h Display this help message" + exit 1 +} + +# Function to check if required tools are installed +check_dependencies() { + local required_tools=("fastp" "fastqc" "multiqc" "unicycler") + local missing_tools=() + + for tool in "${required_tools[@]}"; do + if ! command -v "$tool" &> /dev/null; then + missing_tools+=("$tool") + fi + done + + if [ ${#missing_tools[@]} -ne 0 ]; then + echo "Error: The following required tools are missing:" + printf '%s\n' "${missing_tools[@]}" + exit 1 + fi +} + +# Function to create output directories +create_output_dirs() { + local dirs=("fastp_output" "fastqc_output" "unicycler_output" "multiqc_output") + for dir in "${dirs[@]}"; do + mkdir -p "$OUTPUT_DIR/$dir" + done +} + +# Function to process a single sample +process_sample() { + local sample_name="$1" + local fastq_file_r1="$INPUT_DIR/${sample_name}_R1_001.fastq.gz" + local fastq_file_r2="$INPUT_DIR/${sample_name}_R2_001.fastq.gz" + + # Check if input files exist + for file in "$fastq_file_r1" "$fastq_file_r2"; do + if [[ ! -f "$file" ]]; then + echo "Error: Input file $file not found" + return 1 + fi + done + + echo "Processing sample: $sample_name" + + # Step 1: Run fastp with error handling + local fastp_output_r1="$OUTPUT_DIR/fastp_output/${sample_name}_R1.fastp.fastq" + local fastp_output_r2="$OUTPUT_DIR/fastp_output/${sample_name}_R2.fastp.fastq" + local fastp_json="$OUTPUT_DIR/fastp_output/${sample_name}.json" + local fastp_html="$OUTPUT_DIR/fastp_output/${sample_name}.html" + + if ! fastp -i "$fastq_file_r1" -I "$fastq_file_r2" \ + -o "$fastp_output_r1" -O "$fastp_output_r2" \ + -j "$fastp_json" -h "$fastp_html" \ + --detect_adapter_for_pe \ + --thread 8; then + echo "Error: fastp processing failed for $sample_name" + return 1 + fi + + # Step 2: Run FastQC with error handling + local fastqc_output_dir="$OUTPUT_DIR/fastqc_output/${sample_name}_fastqc" + mkdir -p "$fastqc_output_dir" + + if ! fastqc "$fastp_output_r1" "$fastp_output_r2" \ + -o "$fastqc_output_dir" \ + -t 8; then + echo "Error: FastQC analysis failed for $sample_name" + return 1 + fi + + # Step 3: Run Unicycler with error handling + local unicycler_output_dir="$OUTPUT_DIR/unicycler_output/${sample_name}_unicycler" + mkdir -p "$unicycler_output_dir" + + if ! unicycler -1 "$fastp_output_r1" -2 "$fastp_output_r2" \ + -o "$unicycler_output_dir" \ + --threads 8; then + echo "Error: Unicycler assembly failed for $sample_name" + return 1 + fi + + echo "Successfully processed sample: $sample_name" + return 0 +} + +# Parse command line arguments +while getopts "i:o:h" opt; do + case $opt in + i) INPUT_DIR="$OPTARG" ;; + o) OUTPUT_DIR="$OPTARG" ;; + h) usage ;; + ?) usage ;; + esac +done + +# Validate input and output directories +if [[ ! -d "$INPUT_DIR" ]]; then + echo "Error: Input directory $INPUT_DIR does not exist" + exit 1 +fi + +# Check dependencies +check_dependencies + +# Create output directories +create_output_dirs + +# Process all samples +echo "Starting pipeline execution..." +failed_samples=() + +# Find all R1 files and extract sample names +for fastq_file_r1 in "$INPUT_DIR"/*_R1_001.fastq.gz; do + if [[ -f "$fastq_file_r1" ]]; then + sample_name=$(basename "$fastq_file_r1" _R1_001.fastq.gz) + if ! process_sample "$sample_name"; then + failed_samples+=("$sample_name") + fi + fi +done + +# Run MultiQC on all FastQC results +echo "Running MultiQC on all results..." +multiqc_output_dir="$OUTPUT_DIR/multiqc_output" +if ! multiqc "$OUTPUT_DIR/fastqc_output" -o "$multiqc_output_dir"; then + echo "Warning: MultiQC analysis failed" +fi + +# Print summary +echo "Pipeline execution completed" +if [ ${#failed_samples[@]} -eq 0 ]; then + echo "All samples processed successfully" +else + echo "The following samples failed:" + printf '%s\n' "${failed_samples[@]}" + exit 1 +fi