Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check insertion and deletion blast files #13

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions mumandco_v2.4.2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ echo ""
# > ""$prefix"_query".coordsg_matched

##get average quality
average_quality=$(tail -n +5 "$alignments_folder/""$prefix"_ref".delta_filter.coordsg" | awk '{sum+=$7} END{print sum/NR}')
average_quality=$(tail -n +5 "$alignments_folder/""$prefix"_ref".delta_filter.coordsg" | awk '{sum+=$7} END{if(NR){print sum/NR}}')

### filter for alignment less than 10kb (limit on translocation size)
### filter for any alignment overlapping another with more than 50bp and with lower quality
Expand Down Expand Up @@ -244,7 +244,7 @@ tail -n +5 "$alignments_folder/""$prefix"_ref".delta_filter.coordsg" |\
#

##filter any remaining alignments with less than (1.25 x standard deviation) less than the average of the remaining alignments
average_quality=$(cat ""$prefix"_pre_list.txt" | awk '{sum+=$7} END{print sum/NR}')
average_quality=$(cat ""$prefix"_pre_list.txt" | awk '{sum+=$7} END{if(NR){print sum/NR}}')

if [[ "$average_quality" == "100" ]]
then
Expand All @@ -256,7 +256,7 @@ if [[ "$average_quality" == "100" ]]

else

stdev125=$(cat ""$prefix"_pre_list.txt" | awk '{sum=sum+$7 ; sumX2+=(($7)^2)} END{print 1.25*(sqrt(sumX2/(NR) - ((sum/NR)^2)))}')
stdev125=$(cat ""$prefix"_pre_list.txt" | awk '{sum=sum+$7 ; sumX2+=(($7)^2)} END{if(NR){print 1.25*(sqrt(sumX2/(NR) - ((sum/NR)^2)))}}')
quality_filt=$(echo $average_quality - $stdev125 | bc)
cat ""$prefix"_pre_list.txt" | awk -v quality_filt="$quality_filt" '{if($7 > quality_filt) print $14"\t"$15}' | sort | uniq > ""$prefix"_chromosome_pairs.txt"

Expand Down Expand Up @@ -1082,7 +1082,13 @@ then

cat $prefix.filteredcalls | awk '{if($6!="insertion" && $6!="deletion") print $0}' > $prefix.noINDEL
echo "ref_chr query_chr ref_start ref_stop size SV_type query_start query_stop" > $prefix.SVs_all.tsv
cat $prefix.insertion_blast $prefix.deletion_blast $prefix.noINDEL | sort -k1,1 -k3V >> $prefix.SVs_all.tsv
if [ -f $prefix.insertion_blast ]; then
sort -k1,1 -k3V $prefix.insertion_blast >> $prefix.SVs_all.tsv
fi
if [ -f $prefix.deletion_blast ]; then
sort -k1,1 -k3V $prefix.deletion_blast >> $prefix.SVs_all.tsv
fi
sort -k1,1 -k3V $prefix.noINDEL >> $prefix.SVs_all.tsv

rm query_segment.fa
rm ref_segment.fa
Expand Down Expand Up @@ -1110,7 +1116,7 @@ echo ""
total=$(cat $prefix.filteredcalls | awk '{print $0}' | wc -l)
total_del=$(cat $prefix.filteredcalls | awk '/deletion/{print $0}' | wc -l)
total_ins=$(cat $prefix.filteredcalls | awk '/insertion/{print $0}' | wc -l)
#total_INDELmean=$(cat $prefix.filteredcalls | awk 'BEGIN{sum=0} !/double/&&/deletion/||/insertion/{sum+=$5} END{print sum/NR}')
#total_INDELmean=$(cat $prefix.filteredcalls | awk 'BEGIN{sum=0} !/double/&&/deletion/||/insertion/{sum+=$5} END{if(NR){print sum/NR}}')
total_dup=$(cat $prefix.filteredcalls | awk '/duplication/{print $0}' | wc -l)
total_inv=$(cat $prefix.filteredcalls | awk '/inversion/{print $0}' | wc -l)
total_trans=$(cat $prefix.filteredcalls | awk '!/complicated/&&/transloc/{print $0}' | wc -l)
Expand Down Expand Up @@ -1172,8 +1178,8 @@ then
rm $prefix.deletions
rm $prefix.blast_in
rm $prefix.blast_del
rm $prefix.deletion_blast
rm $prefix.insertion_blast
rm -f $prefix.deletion_blast
rm -f $prefix.insertion_blast
fi
fi

Expand Down