Skip to content

Commit ba02a97

Browse files
authored
Create code_optim
1 parent 9db0c7b commit ba02a97

File tree

1 file changed

+155
-0
lines changed
  • docs/chatbot_comparison/results/amazon_q

1 file changed

+155
-0
lines changed
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#!/bin/bash
2+
3+
# Set strict error handling
4+
set -euo pipefail
5+
IFS=$'\n\t'
6+
7+
# Default directories
8+
INPUT_DIR="/home/ramsivakumar/nextflow_conversion/fastq"
9+
OUTPUT_DIR="/home/ramsivakumar/nextflow_conversion/test_out_bash"
10+
11+
# Function to display usage information
12+
usage() {
13+
echo "Usage: $0 [-i INPUT_DIR] [-o OUTPUT_DIR] [-h]"
14+
echo "Options:"
15+
echo " -i INPUT_DIR Directory containing input FASTQ files (default: $INPUT_DIR)"
16+
echo " -o OUTPUT_DIR Output directory for results (default: $OUTPUT_DIR)"
17+
echo " -h Display this help message"
18+
exit 1
19+
}
20+
21+
# Function to check if required tools are installed
22+
check_dependencies() {
23+
local required_tools=("fastp" "fastqc" "multiqc" "unicycler")
24+
local missing_tools=()
25+
26+
for tool in "${required_tools[@]}"; do
27+
if ! command -v "$tool" &> /dev/null; then
28+
missing_tools+=("$tool")
29+
fi
30+
done
31+
32+
if [ ${#missing_tools[@]} -ne 0 ]; then
33+
echo "Error: The following required tools are missing:"
34+
printf '%s\n' "${missing_tools[@]}"
35+
exit 1
36+
fi
37+
}
38+
39+
# Function to create output directories
40+
create_output_dirs() {
41+
local dirs=("fastp_output" "fastqc_output" "unicycler_output" "multiqc_output")
42+
for dir in "${dirs[@]}"; do
43+
mkdir -p "$OUTPUT_DIR/$dir"
44+
done
45+
}
46+
47+
# Function to process a single sample
48+
process_sample() {
49+
local sample_name="$1"
50+
local fastq_file_r1="$INPUT_DIR/${sample_name}_R1_001.fastq.gz"
51+
local fastq_file_r2="$INPUT_DIR/${sample_name}_R2_001.fastq.gz"
52+
53+
# Check if input files exist
54+
for file in "$fastq_file_r1" "$fastq_file_r2"; do
55+
if [[ ! -f "$file" ]]; then
56+
echo "Error: Input file $file not found"
57+
return 1
58+
fi
59+
done
60+
61+
echo "Processing sample: $sample_name"
62+
63+
# Step 1: Run fastp with error handling
64+
local fastp_output_r1="$OUTPUT_DIR/fastp_output/${sample_name}_R1.fastp.fastq"
65+
local fastp_output_r2="$OUTPUT_DIR/fastp_output/${sample_name}_R2.fastp.fastq"
66+
local fastp_json="$OUTPUT_DIR/fastp_output/${sample_name}.json"
67+
local fastp_html="$OUTPUT_DIR/fastp_output/${sample_name}.html"
68+
69+
if ! fastp -i "$fastq_file_r1" -I "$fastq_file_r2" \
70+
-o "$fastp_output_r1" -O "$fastp_output_r2" \
71+
-j "$fastp_json" -h "$fastp_html" \
72+
--detect_adapter_for_pe \
73+
--thread 8; then
74+
echo "Error: fastp processing failed for $sample_name"
75+
return 1
76+
fi
77+
78+
# Step 2: Run FastQC with error handling
79+
local fastqc_output_dir="$OUTPUT_DIR/fastqc_output/${sample_name}_fastqc"
80+
mkdir -p "$fastqc_output_dir"
81+
82+
if ! fastqc "$fastp_output_r1" "$fastp_output_r2" \
83+
-o "$fastqc_output_dir" \
84+
-t 8; then
85+
echo "Error: FastQC analysis failed for $sample_name"
86+
return 1
87+
fi
88+
89+
# Step 3: Run Unicycler with error handling
90+
local unicycler_output_dir="$OUTPUT_DIR/unicycler_output/${sample_name}_unicycler"
91+
mkdir -p "$unicycler_output_dir"
92+
93+
if ! unicycler -1 "$fastp_output_r1" -2 "$fastp_output_r2" \
94+
-o "$unicycler_output_dir" \
95+
--threads 8; then
96+
echo "Error: Unicycler assembly failed for $sample_name"
97+
return 1
98+
fi
99+
100+
echo "Successfully processed sample: $sample_name"
101+
return 0
102+
}
103+
104+
# Parse command line arguments
105+
while getopts "i:o:h" opt; do
106+
case $opt in
107+
i) INPUT_DIR="$OPTARG" ;;
108+
o) OUTPUT_DIR="$OPTARG" ;;
109+
h) usage ;;
110+
?) usage ;;
111+
esac
112+
done
113+
114+
# Validate input and output directories
115+
if [[ ! -d "$INPUT_DIR" ]]; then
116+
echo "Error: Input directory $INPUT_DIR does not exist"
117+
exit 1
118+
fi
119+
120+
# Check dependencies
121+
check_dependencies
122+
123+
# Create output directories
124+
create_output_dirs
125+
126+
# Process all samples
127+
echo "Starting pipeline execution..."
128+
failed_samples=()
129+
130+
# Find all R1 files and extract sample names
131+
for fastq_file_r1 in "$INPUT_DIR"/*_R1_001.fastq.gz; do
132+
if [[ -f "$fastq_file_r1" ]]; then
133+
sample_name=$(basename "$fastq_file_r1" _R1_001.fastq.gz)
134+
if ! process_sample "$sample_name"; then
135+
failed_samples+=("$sample_name")
136+
fi
137+
fi
138+
done
139+
140+
# Run MultiQC on all FastQC results
141+
echo "Running MultiQC on all results..."
142+
multiqc_output_dir="$OUTPUT_DIR/multiqc_output"
143+
if ! multiqc "$OUTPUT_DIR/fastqc_output" -o "$multiqc_output_dir"; then
144+
echo "Warning: MultiQC analysis failed"
145+
fi
146+
147+
# Print summary
148+
echo "Pipeline execution completed"
149+
if [ ${#failed_samples[@]} -eq 0 ]; then
150+
echo "All samples processed successfully"
151+
else
152+
echo "The following samples failed:"
153+
printf '%s\n' "${failed_samples[@]}"
154+
exit 1
155+
fi

0 commit comments

Comments
 (0)