diff --git a/polishTE b/polishTE index af2219e..ab888b8 100755 --- a/polishTE +++ b/polishTE @@ -314,7 +314,8 @@ function get_blast_hit() { # run blastn echo "#qseqid sseqid pident length mismatch qstart qend sstart send sstrand" > $1.blast.o - blastn -query $2 -db $3 -outfmt "6 qseqid sseqid pident length mismatch qstart qend sstart send sstrand" -evalue $4 | awk -v "ml=$5" '{OFS="\t"; if ($4 > ml) {print $0}}' >> $1.blast.o + blastn -query $2 -db $3 -outfmt "6 qseqid sseqid pident length mismatch qstart qend sstart send sstrand" \ + -evalue $4 | awk -v "ml=$5" '{OFS="\t"; if ($4 > ml) {print $0}}' >> $1.blast.o # filter blastn results awk -v cons_len="$8" '{OFS="\t"; if ($1~/^#/) {} else {if ($3 > 80 && $7-$6<3*cons_len) {print}}}' < $1.blast.o > $1.blast.tmp.o diff --git a/utils/query_coverage.R b/utils/query_coverage.R index 320df21..048228d 100644 --- a/utils/query_coverage.R +++ b/utils/query_coverage.R @@ -9,8 +9,9 @@ cons_len = as.numeric(Args[7]) out_file = as.character(Args[8]) cons_coverage=function(blast_file=NULL, cons_len=NULL, out_file=NULL){ - blast = read.table(blast_file, sep="\t") - new_rownames <- c(paste(blast$V2,"-",as.character(blast$V8),":",as.character(blast$V9), sep = "")) + blast = read.table(blast_file, sep="\t", comment.char = "", skip=1) + + new_rownames <- c(paste(blast$V2,"-",as.character(blast$V8),":",as.character(blast$V9), sep = "")) #make the coverage matrix coverage = matrix(rep(0, length(blast$V1)*as.numeric(cons_len)), byrow = T, ncol = as.numeric(cons_len)) @@ -26,6 +27,7 @@ cons_coverage=function(blast_file=NULL, cons_len=NULL, out_file=NULL){ # number of full copies full=blast[abs(blast$V6-blast$V7) >= 0.9*as.numeric(cons_len),] + cat(nrow(full)) }