diff --git a/Dockerfile b/Dockerfile index 51fb4f2..19ca29c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,8 +19,8 @@ RUN cd /usr ; git clone https://github.com/TGAC/NanoOK ENV NANOOK_DIR="/usr/NanoOK" RUN echo "export PATH=/usr/NanoOK/bin:${PATH}" >> ~/.bashrc RUN Rscript -e "install.packages('ggplot2', repos='https://cran.ma.imperial.ac.uk/')" -RUN Rscript -e "install.packages('ggmap’, repos='https://cran.ma.imperial.ac.uk/')" -RUN Rscript -e "install.packages('plyr’, repos='https://cran.ma.imperial.ac.uk/')" +RUN Rscript -e "install.packages('ggmap', repos='https://cran.ma.imperial.ac.uk/')" +RUN Rscript -e "install.packages('plyr', repos='https://cran.ma.imperial.ac.uk/')" RUN Rscript -e "install.packages('scales', repos='https://cran.ma.imperial.ac.uk/')" RUN Rscript -e "install.packages('gridExtra', repos='https://cran.ma.imperial.ac.uk/')" RUN Rscript -e "install.packages('reshape', repos='https://cran.ma.imperial.ac.uk/')" \ No newline at end of file diff --git a/bin/nanook_plot_lengths.R b/bin/nanook_plot_lengths.R index f7a12fd..7c237c2 100755 --- a/bin/nanook_plot_lengths.R +++ b/bin/nanook_plot_lengths.R @@ -35,37 +35,53 @@ for (t in 1:3) { # Count vs length filename_lengths <- paste(analysisdir, "/", "all_",type,"_lengths.txt", sep=""); + filename_kmers <- paste(analysisdir, "/", "all_",type,"_kmers.txt", sep=""); if (file.exists(filename_lengths)) { data_lengths = read.table(filename_lengths, col.name=c("name", "length")) - if (format=="png") { - lengths_png <- paste(graphsdir, "/", "all_",type,"_lengths.png", sep=""); - png(lengths_png, width=1200, height=800) - print(ggplot(data_lengths, aes(x=data_lengths$length), xlab="Length") + geom_histogram(binwidth=1000, fill=colourcode) + xlab("Length") +ylab("Count") + scale_x_continuous(limits=c(0, 35000)) + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + if (nrow(data_lengths) > 1) { + if (format=="png") { + lengths_png <- paste(graphsdir, "/", "all_",type,"_lengths.png", sep=""); + png(lengths_png, width=1200, height=800) + print(ggplot(data_lengths, aes(x=data_lengths$length), xlab="Length") + geom_histogram(binwidth=1000, fill=colourcode) + xlab("Length") +ylab("Count") + scale_x_continuous(limits=c(0, 35000)) + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + lengths_pdf <- paste(graphsdir, "/", "all_",type,"_lengths.pdf", sep=""); + pdf(lengths_pdf, width=6, height=4) + print(ggplot(data_lengths, aes(x=data_lengths$length), xlab="Length") + geom_histogram(binwidth=1000, fill=colourcode) + xlab("Length") +ylab("Count") + scale_x_continuous(limits=c(0, 35000)) + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() } else { - lengths_pdf <- paste(graphsdir, "/", "all_",type,"_lengths.pdf", sep=""); - pdf(lengths_pdf, width=6, height=4) - print(ggplot(data_lengths, aes(x=data_lengths$length), xlab="Length") + geom_histogram(binwidth=1000, fill=colourcode) + xlab("Length") +ylab("Count") + scale_x_continuous(limits=c(0, 35000)) + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: No data in ", filename_lengths, "\n"); } - garbage <- dev.off() - - # Number of perfect 21mers verses length scatter - filename_kmers <- paste(analysisdir, "/", "all_",type,"_kmers.txt", sep=""); - data_alignments = read.table(filename_kmers, header=TRUE) + } else { + cat("WARNING: Couldn't find", filename_lengths, "\n"); + } + + # Number of perfect 21mers verses length scatter + if (file.exists(filename_kmers)) { + data_kmers = try(read.table(filename_kmers, header=TRUE), silent=TRUE) - if (format=="png") { - kmers_png <- paste(graphsdir, "/", "all_",type,"_21mers.png", sep=""); - png(kmers_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$Length, y=data_alignments$nk21), xlab="Read length") + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(breaks=seq(0, 40000, 4000)) + scale_y_continuous(breaks=seq(0, 400, 20)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + if (inherits(data_kmers, "try-error")) { + cat("WARNING: Couldn't read", filename_kmers,"\n"); } else { - kmers_pdf <- paste(graphsdir, "/", "all_",type,"_21mers.pdf", sep=""); - pdf(kmers_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$Length, y=data_alignments$nk21), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(breaks=seq(0, 40000, 4000)) + scale_y_continuous(breaks=seq(0, 400, 20)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + if (nrow(data_kmers) > 1) { + if (format=="png") { + kmers_png <- paste(graphsdir, "/", "all_",type,"_21mers.png", sep=""); + png(kmers_png, width=1200, height=800) + print(ggplot(data_kmers, aes(x=data_kmers$Length, y=data_kmers$nk21), xlab="Read length") + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(breaks=seq(0, 40000, 4000)) + scale_y_continuous(breaks=seq(0, 400, 20)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + kmers_pdf <- paste(graphsdir, "/", "all_",type,"_21mers.pdf", sep=""); + pdf(kmers_pdf, width=6, height=4) + print(ggplot(data_kmers, aes(x=data_kmers$Length, y=data_kmers$nk21), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(breaks=seq(0, 40000, 4000)) + scale_y_continuous(breaks=seq(0, 400, 20)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + } else { + cat("WARNING: No data in ", filename_kmers, "\n"); + } } - garbage <- dev.off() } else { - cat("WARNING: Couldn't find", filename_lengths, "\n"); + cat("WARNING: Couldn't find", filename_kmers, "\n"); } } diff --git a/bin/nanook_plot_reference.R b/bin/nanook_plot_reference.R index 088546d..43ef583 100755 --- a/bin/nanook_plot_reference.R +++ b/bin/nanook_plot_reference.R @@ -38,19 +38,29 @@ if (format=="png") { # Plot GC% vs position data_gc_filename <- paste(analysisdir, "/", refid, "/", refid, "_gc.txt", sep=""); -data_gc = read.table(data_gc_filename, col.name=c("Position", "Coverage")) -if (format=="png") { - png_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.png", sep=""); - cat("Writing", png_gc, "\n"); - png(png_gc, width=1600, height=400) - print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + +if (file.exists(data_gc_filename)) { + data_gc = read.table(data_gc_filename, col.name=c("Position", "Coverage")) + + if (nrow(data_gc) > 1) { + if (format=="png") { + png_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.png", sep=""); + cat("Writing", png_gc, "\n"); + png(png_gc, width=1600, height=400) + print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + pdf_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.pdf", sep=""); + cat("Writing", pdf_gc, "\n"); + pdf(pdf_gc, width=16, height=4) + print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + } else { + cat("WARNING: No data in ", data_gc_filename, "\n"); + } } else { - pdf_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.pdf", sep=""); - cat("Writing", pdf_gc, "\n"); - pdf(pdf_gc, width=16, height=4) - print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: Couldn't find", data_gc_filename, "\n"); } -garbage <- dev.off() listOfDataFrames <- NULL; count <-c(1); @@ -77,6 +87,8 @@ for (t in 1:3) { } } } + } else { + cat("WARNING: No data in ", data_perfect_cumulative_filename, "\n"); } } } @@ -96,18 +108,22 @@ for (t in 1:3) { if (file.exists(data_coverage_filename)) { data_coverage = read.table(data_coverage_filename, col.name=c("Position", "Coverage")) - if (format=="png") { - png_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.png", sep=""); - cat("Writing", png_coverage, "\n"); - png(png_coverage, width=1600, height=400) - print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + if (nrow(data_coverage) > 0) { + if (format=="png") { + png_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.png", sep=""); + cat("Writing", png_coverage, "\n"); + png(png_coverage, width=1600, height=400) + print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + pdf_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.pdf", sep=""); + cat("Writing", pdf_coverage, "\n"); + pdf(pdf_coverage, width=16, height=4) + print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() } else { - pdf_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.pdf", sep=""); - cat("Writing", pdf_coverage, "\n"); - pdf(pdf_coverage, width=16, height=4) - print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: No data in ", data_coverage_filename, "\n"); } - garbage <- dev.off() } else { cat("WARNING: Couldn't find", data_coverage_filename, "\n"); } @@ -118,19 +134,22 @@ for (t in 1:3) { if (file.exists(data_perfect_cumulative_filename)) { cat("Reading", data_perfect_cumulative_filename, "\n"); data_perfect_cumulative = read.table(data_perfect_cumulative_filename, col.name=c("Size", "n", "Perfect")) - - if (format=="png") { - png_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.png", sep=""); - cat("Writing", png_perfect_cumulative, "\n"); - png(png_perfect_cumulative, width=1200, height=800) - print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + if (nrow(data_perfect_cumulative) > 0) { + if (format=="png") { + png_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.png", sep=""); + cat("Writing", png_perfect_cumulative, "\n"); + png(png_perfect_cumulative, width=1200, height=800) + print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + pdf_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.pdf", sep=""); + cat("Writing", pdf_perfect_cumulative, "\n"); + pdf(pdf_perfect_cumulative, width=6, height=4) + print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() } else { - pdf_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.pdf", sep=""); - cat("Writing", pdf_perfect_cumulative, "\n"); - pdf(pdf_perfect_cumulative, width=6, height=4) - print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: No data in ", data_perfect_cumulative_filename, "\n"); } - garbage <- dev.off() } else { cat("WARNING: Couldn't find", data_perfect_cumulative_filename, "\n"); } @@ -156,20 +175,23 @@ for (t in 1:3) { data_insertions_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_insertions.txt", sep=""); cat("Reading", data_insertions_filename, "\n"); if (file.exists(data_insertions_filename)) { - if (format=="png") { - png_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.png", sep=""); - cat("Writing", png_insertions, "\n"); - png(png_insertions, width=1200, height=800) - data_insertions = read.table(data_insertions_filename, col.name=c("Size", "Percent")) - print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + data_insertions = read.table(data_insertions_filename, col.name=c("Size", "Percent")) + if (nrow(data_insertions) > 0) { + if (format=="png") { + png_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.png", sep=""); + cat("Writing", png_insertions, "\n"); + png(png_insertions, width=1200, height=800) + print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + pdf_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.pdf", sep=""); + cat("Writing", pdf_insertions, "\n"); + pdf(pdf_insertions, width=6, height=4) + print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() } else { - pdf_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.pdf", sep=""); - cat("Writing", pdf_insertions, "\n"); - pdf(pdf_insertions, width=6, height=4) - data_insertions = read.table(data_insertions_filename, col.name=c("Size", "Percent")) - print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: No data in ", data_insertions_filename, "\n"); } - garbage <- dev.off() } else { cat("WARNING: Couldn't find", data_insertions_filename, "\n"); } @@ -178,20 +200,23 @@ for (t in 1:3) { data_deletions_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_deletions.txt", sep=""); cat("Reading", data_deletions_filename, "\n"); if (file.exists(data_deletions_filename)) { - if (format=="png") { - png_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.png", sep=""); - cat("Writing", png_deletions, "\n"); - png(png_deletions, width=1200, height=800) - data_deletions = read.table(data_deletions_filename, col.name=c("Size", "Percent")) - print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + data_deletions = read.table(data_deletions_filename, col.name=c("Size", "Percent")) + if (nrow(data_deletions) > 0) { + if (format=="png") { + png_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.png", sep=""); + cat("Writing", png_deletions, "\n"); + png(png_deletions, width=1200, height=800) + print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + pdf_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.pdf", sep=""); + cat("Writing", pdf_deletions, "\n"); + pdf(pdf_deletions, width=6, height=4) + print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() } else { - pdf_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.pdf", sep=""); - cat("Writing", pdf_deletions, "\n"); - pdf(pdf_deletions, width=6, height=4) - data_deletions = read.table(data_deletions_filename, col.name=c("Size", "Percent")) - print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: No data in ", data_deletions_filename, "\n"); } - garbage <- dev.off() } else { cat("WARNING: Couldn't find", data_deletions_filename, "\n"); } @@ -203,200 +228,203 @@ for (t in 1:3) { if (file.exists(input_filename)) { data_alignments = read.table(input_filename, header=TRUE); - - # Length vs Identity histograms - if (format=="png") { - identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.png", sep="") - cat("Writing", identity_hist_png, "\n"); - png(identity_hist_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) - } else { - identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.pdf", sep="") - cat("Writing", identity_hist_pdf, "\n"); - pdf(identity_hist_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # GC histogram - if (format=="png") { - identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.png", sep="") - cat("Writing", identity_hist_png, "\n"); - png(identity_hist_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth=1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100)) ) - } else { - identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.pdf", sep="") - cat("Writing", identity_hist_pdf, "\n"); - pdf(identity_hist_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth = 1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100))) - } - garbage <- dev.off() - - # Identity vs Length Scatter plots - if (format=="png") { - identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.png", sep=""); - cat("Writing", identity_scatter_pdf, "\n"); - png(identity_scatter_pdf, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) - } else { - identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.pdf", sep=""); - cat("Writing", identity_scatter_pdf, "\n"); - pdf(identity_scatter_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # Identity vs Length heatmap - if (format=="png") { - identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.png", sep=""); - cat("Writing", identity_heatmap_png, "\n"); - png(identity_heatmap_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')) - #limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4) - } else { - identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.pdf", sep=""); - cat("Writing", identity_heatmap_pdf, "\n"); - pdf(identity_heatmap_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')) - } - garbage <- dev.off() - - # Identity vs Length heatmap zoomed - if (format=="png") { - identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.png", sep=""); - cat("Writing", identity_heatmap_png, "\n"); - png(identity_heatmap_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')+ scale_y_continuous(limits=c(60, 100))) - #limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4) - } else { - identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.pdf", sep=""); - cat("Writing", identity_heatmap_pdf, "\n"); - pdf(identity_heatmap_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')+ scale_y_continuous(limits=c(60, 100))) - } - garbage <- dev.off() - - # Identity boxplot - listOfDataFrames[[count]] <- data.frame(Readset=type, Variable=data_alignments$QueryPercentIdentity); - count <- count + 1; - - # Alignment identity vs. Fraction of read aligned scatter plots - if (format=="png") { - aid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.png", sep=""); - cat("Writing", aid_scatter_png, "\n"); - png(aid_scatter_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) - } else { - aid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.pdf", sep=""); - cat("Writing", aid_scatter_pdf, "\n"); - pdf(aid_scatter_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, alpha=0.4, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # Query identity vs. Fraction of read aligned scatter plots - if (format=="png") { - qid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.png", sep=""); - cat("Writing", qid_scatter_png, "\n"); - png(qid_scatter_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) - } else { - qid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.pdf", sep=""); - cat("Writing", qid_scatter_pdf, "\n"); - pdf(qid_scatter_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # Best perfect sequence vs. length scatters - if (format=="png") { - best_perf_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.png", sep=""); - cat("Writing", best_perf_scatter_png, "\n"); - png(best_perf_scatter_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) - } else { - best_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.pdf", sep=""); - cat("Writing", best_perf_scatter_pdf, "\n"); - pdf(best_perf_scatter_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # Best perfect sequence vs. length scatters zoomed - if (format=="png") { - best_perf_zoom_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.png", sep=""); - cat("Writing", best_perf_zoom_scatter_png, "\n"); - png(best_perf_zoom_scatter_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) - } else { - best_perf_zoom_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.pdf", sep=""); - cat("Writing", best_perf_zoom_scatter_pdf, "\n"); - pdf(best_perf_zoom_scatter_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # Plot %reads vs best perfect kmer - cat(data_alignments$LongestPerfectKmer); - - hdf <- hist(breaks=seq(0,maxk,by=10), x=data_alignments$LongestPerfectKmer, plot=FALSE, right=FALSE); # bins are 0-9, 10-19, 20-29 etc. - hdf$density = hdf$counts/sum(hdf$counts)*100 - tdf <- data.frame(Pos=hdf$mids, Counts=hdf$density); - - if (format=="png") { - png_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.png", sep=""); - cat("Writing", png_perfect_best, "\n"); - png(png_perfect_best, width=1200, height=800) - print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } else { - pdf_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.pdf", sep=""); - cat("Writing", pdf_perfect_best, "\n"); - pdf(pdf_perfect_best, width=6, height=4) - print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - - # Number of perfect 21mers verses length scatter - if (format=="png") { - nk21_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.png", sep=""); - cat("Writing", nk21_scatter_png, "\n"); - png(nk21_scatter_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) - } else { - nk21_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.pdf", sep=""); - cat("Writing", nk21_scatter_pdf, "\n"); - pdf(nk21_scatter_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - } - garbage <- dev.off() - - # Mean perfect sequence vs. length scatters - #mean_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_mean_perfect_vs_length_scatter.pdf", sep=""); - #pdf(mean_perf_scatter_pdf, height=4, width=6) - #print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$MeanPerfectKmer), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha) + xlab("Read length") +ylab("Mean perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000))) - #garbage <- dev.off() - - # Percentage of read aligned vs read length - if (format=="png") { - output_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.png", sep=""); - cat("Writing", output_png, "\n"); - png(output_png, width=1200, height=800) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + if (nrow(data_alignments) > 1) { + # Length vs Identity histograms + if (format=="png") { + identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.png", sep="") + cat("Writing", identity_hist_png, "\n"); + png(identity_hist_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) + } else { + identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.pdf", sep="") + cat("Writing", identity_hist_pdf, "\n"); + pdf(identity_hist_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # GC histogram + if (format=="png") { + identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.png", sep="") + cat("Writing", identity_hist_png, "\n"); + png(identity_hist_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth=1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100)) ) + } else { + identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.pdf", sep="") + cat("Writing", identity_hist_pdf, "\n"); + pdf(identity_hist_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth = 1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100))) + } + garbage <- dev.off() + + # Identity vs Length Scatter plots + if (format=="png") { + identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.png", sep=""); + cat("Writing", identity_scatter_pdf, "\n"); + png(identity_scatter_pdf, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.pdf", sep=""); + cat("Writing", identity_scatter_pdf, "\n"); + pdf(identity_scatter_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # Identity vs Length heatmap + if (format=="png") { + identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.png", sep=""); + cat("Writing", identity_heatmap_png, "\n"); + png(identity_heatmap_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')) + #limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4) + } else { + identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.pdf", sep=""); + cat("Writing", identity_heatmap_pdf, "\n"); + pdf(identity_heatmap_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')) + } + garbage <- dev.off() + + # Identity vs Length heatmap zoomed + if (format=="png") { + identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.png", sep=""); + cat("Writing", identity_heatmap_png, "\n"); + png(identity_heatmap_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')+ scale_y_continuous(limits=c(60, 100))) + #limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4) + } else { + identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.pdf", sep=""); + cat("Writing", identity_heatmap_pdf, "\n"); + pdf(identity_heatmap_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), na.value='blue')+ scale_y_continuous(limits=c(60, 100))) + } + garbage <- dev.off() + + # Identity boxplot + listOfDataFrames[[count]] <- data.frame(Readset=type, Variable=data_alignments$QueryPercentIdentity); + count <- count + 1; + + # Alignment identity vs. Fraction of read aligned scatter plots + if (format=="png") { + aid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.png", sep=""); + cat("Writing", aid_scatter_png, "\n"); + png(aid_scatter_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + aid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.pdf", sep=""); + cat("Writing", aid_scatter_pdf, "\n"); + pdf(aid_scatter_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, alpha=0.4, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # Query identity vs. Fraction of read aligned scatter plots + if (format=="png") { + qid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.png", sep=""); + cat("Writing", qid_scatter_png, "\n"); + png(qid_scatter_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + qid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.pdf", sep=""); + cat("Writing", qid_scatter_pdf, "\n"); + pdf(qid_scatter_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # Best perfect sequence vs. length scatters + if (format=="png") { + best_perf_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.png", sep=""); + cat("Writing", best_perf_scatter_png, "\n"); + png(best_perf_scatter_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + best_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.pdf", sep=""); + cat("Writing", best_perf_scatter_pdf, "\n"); + pdf(best_perf_scatter_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # Best perfect sequence vs. length scatters zoomed + if (format=="png") { + best_perf_zoom_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.png", sep=""); + cat("Writing", best_perf_zoom_scatter_png, "\n"); + png(best_perf_zoom_scatter_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + best_perf_zoom_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.pdf", sep=""); + cat("Writing", best_perf_zoom_scatter_pdf, "\n"); + pdf(best_perf_zoom_scatter_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # Plot %reads vs best perfect kmer + cat(data_alignments$LongestPerfectKmer); + + hdf <- hist(breaks=seq(0,maxk,by=10), x=data_alignments$LongestPerfectKmer, plot=FALSE, right=FALSE); # bins are 0-9, 10-19, 20-29 etc. + hdf$density = hdf$counts/sum(hdf$counts)*100 + tdf <- data.frame(Pos=hdf$mids, Counts=hdf$density); + + if (format=="png") { + png_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.png", sep=""); + cat("Writing", png_perfect_best, "\n"); + png(png_perfect_best, width=1200, height=800) + print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } else { + pdf_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.pdf", sep=""); + cat("Writing", pdf_perfect_best, "\n"); + pdf(pdf_perfect_best, width=6, height=4) + print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + + # Number of perfect 21mers verses length scatter + if (format=="png") { + nk21_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.png", sep=""); + cat("Writing", nk21_scatter_png, "\n"); + png(nk21_scatter_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + nk21_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.pdf", sep=""); + cat("Writing", nk21_scatter_pdf, "\n"); + pdf(nk21_scatter_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() + + # Mean perfect sequence vs. length scatters + #mean_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_mean_perfect_vs_length_scatter.pdf", sep=""); + #pdf(mean_perf_scatter_pdf, height=4, width=6) + #print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$MeanPerfectKmer), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha) + xlab("Read length") +ylab("Mean perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000))) + #garbage <- dev.off() + + # Percentage of read aligned vs read length + if (format=="png") { + output_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.png", sep=""); + cat("Writing", output_png, "\n"); + png(output_png, width=1200, height=800) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + output_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.pdf", sep=""); + cat("Writing", output_pdf, "\n"); + pdf(output_pdf, width=6, height=4) + print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + } + garbage <- dev.off() } else { - output_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.pdf", sep=""); - cat("Writing", output_pdf, "\n"); - pdf(output_pdf, width=6, height=4) - print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))) + cat("WARNING: No data in ", input_filename, "\n"); } - garbage <- dev.off() } else { cat("WARNING: Couldn't find", input_filename, "\n"); } @@ -409,19 +437,23 @@ for (t in 1:3) { if (file.exists(input_kmers)) { data_kmers = read.table(input_kmers, header=TRUE); - if (format=="png") { - kmer_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.png", sep=""); - cat("Writing", kmer_scatter_png, "\n"); - png(kmer_scatter_png, width=1200, height=1200) - print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=4) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) - #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + if (nrow(data_kmers) > 1) { + if (format=="png") { + kmer_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.png", sep=""); + cat("Writing", kmer_scatter_png, "\n"); + png(kmer_scatter_png, width=1200, height=1200) + print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=4) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) + #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth)) + } else { + kmer_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.pdf", sep=""); + cat("Writing", kmer_scatter_pdf, "\n"); + pdf(kmer_scatter_pdf, width=6, height=6) + print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=1) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) + } + garbage <- dev.off() } else { - kmer_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.pdf", sep=""); - cat("Writing", kmer_scatter_pdf, "\n"); - pdf(kmer_scatter_pdf, width=6, height=6) - print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=1) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) ) + cat("WARNING: No data in ", input_kmers, "\n"); } - garbage <- dev.off() } else { cat("WARNING: Couldn't find", input_kmers, "\n"); } diff --git a/dist/NanoOK.jar b/dist/NanoOK.jar index 8af76d5..15dea86 100644 Binary files a/dist/NanoOK.jar and b/dist/NanoOK.jar differ diff --git a/src/nanook/NanoOK.java b/src/nanook/NanoOK.java index 6f36e77..d35b695 100644 --- a/src/nanook/NanoOK.java +++ b/src/nanook/NanoOK.java @@ -25,7 +25,7 @@ * @author Richard Leggett */ public class NanoOK { - public final static String VERSION_STRING = "v1.09"; + public final static String VERSION_STRING = "v1.10"; public final static long SERIAL_VERSION = 3L; /** diff --git a/src/nanook/NanoOKOptions.java b/src/nanook/NanoOKOptions.java index a2259f9..45fce12 100644 --- a/src/nanook/NanoOKOptions.java +++ b/src/nanook/NanoOKOptions.java @@ -1246,6 +1246,9 @@ void readProcessFile() { } else if (tokens[0].compareToIgnoreCase("Reference") == 0) { referenceFile = tokens[1]; System.out.println(" Reference "+tokens[1]); + } else if (tokens[0].compareToIgnoreCase("Sample") == 0) { + sampleDirectory = tokens[1]; + System.out.println(" Sample "+tokens[1]); } else if (tokens[0].compareToIgnoreCase("Analysis") == 0) { parsingReads = true; System.out.println(" Analysis "+tokens[1]);