Skip to content

Commit

Permalink
Create code_optim
Browse files Browse the repository at this point in the history
  • Loading branch information
RamiyapriyaS authored Jan 22, 2025
1 parent 9db0c7b commit ba02a97
Showing 1 changed file with 155 additions and 0 deletions.
155 changes: 155 additions & 0 deletions docs/chatbot_comparison/results/amazon_q/code_optim
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/bin/bash

# Set strict error handling
set -euo pipefail
IFS=$'\n\t'

# Default directories
INPUT_DIR="/home/ramsivakumar/nextflow_conversion/fastq"
OUTPUT_DIR="/home/ramsivakumar/nextflow_conversion/test_out_bash"

# Function to display usage information
usage() {
echo "Usage: $0 [-i INPUT_DIR] [-o OUTPUT_DIR] [-h]"
echo "Options:"
echo " -i INPUT_DIR Directory containing input FASTQ files (default: $INPUT_DIR)"
echo " -o OUTPUT_DIR Output directory for results (default: $OUTPUT_DIR)"
echo " -h Display this help message"
exit 1
}

# Function to check if required tools are installed
check_dependencies() {
local required_tools=("fastp" "fastqc" "multiqc" "unicycler")
local missing_tools=()

for tool in "${required_tools[@]}"; do
if ! command -v "$tool" &> /dev/null; then
missing_tools+=("$tool")
fi
done

if [ ${#missing_tools[@]} -ne 0 ]; then
echo "Error: The following required tools are missing:"
printf '%s\n' "${missing_tools[@]}"
exit 1
fi
}

# Function to create output directories
create_output_dirs() {
local dirs=("fastp_output" "fastqc_output" "unicycler_output" "multiqc_output")
for dir in "${dirs[@]}"; do
mkdir -p "$OUTPUT_DIR/$dir"
done
}

# Function to process a single sample
process_sample() {
local sample_name="$1"
local fastq_file_r1="$INPUT_DIR/${sample_name}_R1_001.fastq.gz"
local fastq_file_r2="$INPUT_DIR/${sample_name}_R2_001.fastq.gz"

# Check if input files exist
for file in "$fastq_file_r1" "$fastq_file_r2"; do
if [[ ! -f "$file" ]]; then
echo "Error: Input file $file not found"
return 1
fi
done

echo "Processing sample: $sample_name"

# Step 1: Run fastp with error handling
local fastp_output_r1="$OUTPUT_DIR/fastp_output/${sample_name}_R1.fastp.fastq"
local fastp_output_r2="$OUTPUT_DIR/fastp_output/${sample_name}_R2.fastp.fastq"
local fastp_json="$OUTPUT_DIR/fastp_output/${sample_name}.json"
local fastp_html="$OUTPUT_DIR/fastp_output/${sample_name}.html"

if ! fastp -i "$fastq_file_r1" -I "$fastq_file_r2" \
-o "$fastp_output_r1" -O "$fastp_output_r2" \
-j "$fastp_json" -h "$fastp_html" \
--detect_adapter_for_pe \
--thread 8; then
echo "Error: fastp processing failed for $sample_name"
return 1
fi

# Step 2: Run FastQC with error handling
local fastqc_output_dir="$OUTPUT_DIR/fastqc_output/${sample_name}_fastqc"
mkdir -p "$fastqc_output_dir"

if ! fastqc "$fastp_output_r1" "$fastp_output_r2" \
-o "$fastqc_output_dir" \
-t 8; then
echo "Error: FastQC analysis failed for $sample_name"
return 1
fi

# Step 3: Run Unicycler with error handling
local unicycler_output_dir="$OUTPUT_DIR/unicycler_output/${sample_name}_unicycler"
mkdir -p "$unicycler_output_dir"

if ! unicycler -1 "$fastp_output_r1" -2 "$fastp_output_r2" \
-o "$unicycler_output_dir" \
--threads 8; then
echo "Error: Unicycler assembly failed for $sample_name"
return 1
fi

echo "Successfully processed sample: $sample_name"
return 0
}

# Parse command line arguments
while getopts "i:o:h" opt; do
case $opt in
i) INPUT_DIR="$OPTARG" ;;
o) OUTPUT_DIR="$OPTARG" ;;
h) usage ;;
?) usage ;;
esac
done

# Validate input and output directories
if [[ ! -d "$INPUT_DIR" ]]; then
echo "Error: Input directory $INPUT_DIR does not exist"
exit 1
fi

# Check dependencies
check_dependencies

# Create output directories
create_output_dirs

# Process all samples
echo "Starting pipeline execution..."
failed_samples=()

# Find all R1 files and extract sample names
for fastq_file_r1 in "$INPUT_DIR"/*_R1_001.fastq.gz; do
if [[ -f "$fastq_file_r1" ]]; then
sample_name=$(basename "$fastq_file_r1" _R1_001.fastq.gz)
if ! process_sample "$sample_name"; then
failed_samples+=("$sample_name")
fi
fi
done

# Run MultiQC on all FastQC results
echo "Running MultiQC on all results..."
multiqc_output_dir="$OUTPUT_DIR/multiqc_output"
if ! multiqc "$OUTPUT_DIR/fastqc_output" -o "$multiqc_output_dir"; then
echo "Warning: MultiQC analysis failed"
fi

# Print summary
echo "Pipeline execution completed"
if [ ${#failed_samples[@]} -eq 0 ]; then
echo "All samples processed successfully"
else
echo "The following samples failed:"
printf '%s\n' "${failed_samples[@]}"
exit 1
fi

0 comments on commit ba02a97

Please sign in to comment.