Fixes for comparison mode

TGAC · May 5, 2017 · a3e9bb8 · a3e9bb8
1 parent 5df32e6
commit a3e9bb8
Show file tree

Hide file tree

Showing 9 changed files with 119 additions and 34 deletions.
diff --git a/bin/nanook_plot_comparison.R b/bin/nanook_plot_comparison.R
@@ -6,9 +6,11 @@ library(reshape2)
 
 # Filenames
 args <- commandArgs(TRUE)
-samplelist <- args[1];
-outdir <- args[2];
-format <- args[3];
+analysisdir <- args[1];
+graphsdir <- args[2];
+samplelist <- args[3];
+outdir <- args[4];
+format <- args[5];
 
 types = c("2D", "Template", "Complement");
 colours = c("#68B5B9", "#CF746D", "#91A851");
@@ -44,7 +46,7 @@ for (t in 1:3) {
     for (i in 1:nrow(data_samples)) {
         type = types[t];
         sampledir <- data_samples[i, "SampleDir"];
-        filename_lengths <- paste(sampledir, "/analysis/", "all_",type,"_lengths.txt", sep="");
+        filename_lengths <- paste(sampledir, "/", analysisdir, "/", "all_",type,"_lengths.txt", sep="");
         data_lengths = read.table(filename_lengths, col.name=c("name", "length"));
         #df$size <- data_lengths$length;
         thisid <- data_samples[i, "SampleName"];
@@ -57,7 +59,7 @@ for (t in 1:3) {
     # Read lengths
     imagewidth <- 1 + (nrow(data_samples) * 0.5);
     df <- do.call("rbind", listOfDataFrames);
-    output_file <- paste(outdir, "/graphs/", type, "_lengths.pdf", sep="");
+    output_file <- paste(graphsdir, "/", type, "_lengths.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Length, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]));
@@ -66,10 +68,11 @@ for (t in 1:3) {
     # Bar stacked plot of mapping
     imagewidth <- 1 + (nrow(data_samples) * 0.5) + 1.5;
     filename_maps <- paste(outdir, "/", type,"_map_summary.txt", sep="");
+    message(filename_maps)
     #filename_maps <- c("~/temp/2D_map_summary.txt");
     data_maps = read.table(filename_maps, header=TRUE);
     df <- melt(data_maps, id.var="Sample")
-    output_file <- paste(outdir, "/graphs/", type, "_maps.pdf", sep="");
+    output_file <- paste(graphsdir, "/", type, "_maps.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x = Sample, y = value, fill = variable)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("%"));
@@ -80,15 +83,15 @@ for (t in 1:3) {
     # Number of reads
     filename_comparison <- paste(outdir, "/", type,"_comparison.txt", sep="");
     data_comparison = read.table(filename_comparison, header=TRUE);
-    output_file <- paste(outdir, "/graphs/", type, "_number_of_reads.pdf", sep="");
+    output_file <- paste(graphsdir, "/", type, "_number_of_reads.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(data_comparison, aes(x=data_comparison$Name, y=data_comparison$NumReads)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Sample") + ylab("Number of reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)))
     garbage <- dev.off();
 
     # Total bases
     data_comparison = read.table(filename_comparison, header=TRUE);
-    output_file <- paste(outdir, "/graphs/", type, "_total_bases.pdf", sep="");
+    output_file <- paste(graphsdir, "/", type, "_total_bases.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(data_comparison, aes(x=data_comparison$Name, y=data_comparison$TotalBases)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Sample") + ylab("Total bases") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)))

diff --git a/bin/nanook_plot_comparison_reference.R b/bin/nanook_plot_comparison_reference.R
@@ -5,10 +5,12 @@ library(gridExtra)
 
 # Filenames
 args <- commandArgs(TRUE)
-samplelist <- args[1];
-outdir <- args[2];
-reference <- args[3];
-format <- args[4];
+analysisdir <- args[1];
+graphsdir <- args[2];
+samplelist <- args[3];
+outdir <- args[4];
+reference <- args[5];
+format <- args[6];
 
 types = c("2D", "Template", "Complement");
 colours = c("#68B5B9", "#CF746D", "#91A851");
@@ -44,7 +46,8 @@ for (t in 1:3) {
     for (i in 1:nrow(data_samples)) {
         type = types[t];
         sampledir <- data_samples[i, "SampleDir"];
-        filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
+        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
+        message(filename_data);
         if (file.exists(filename_data)) {
             data_field = read.table(filename_data, header=TRUE);
             message(nrow(data_field));
@@ -57,13 +60,13 @@ for (t in 1:3) {
     }
 
     df <- do.call("rbind", listOfDataFrames);
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_query_identity.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %"));
     garbage <- dev.off();
 
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(60, 100)));
@@ -78,7 +81,7 @@ for (t in 1:3) {
     for (i in 1:nrow(data_samples)) {
         type = types[t];
         sampledir <- data_samples[i, "SampleDir"];
-        filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
+        filename_data <- paste(sampledir, "/", analysisdir ,"/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
         if (file.exists(filename_data)) {
             data_field = read.table(filename_data, header=TRUE);
             message(nrow(data_field));
@@ -91,7 +94,7 @@ for (t in 1:3) {
     }
 
     df <- do.call("rbind", listOfDataFrames);
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_query_gc.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_query_gc.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read GC %"));
@@ -106,7 +109,7 @@ for (t in 1:3) {
     for (i in 1:nrow(data_samples)) {
         type = types[t];
         sampledir <- data_samples[i, "SampleDir"];
-        filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
+        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
         if (file.exists(filename_data)) {
             data_field = read.table(filename_data, header=TRUE);
             if (nrow(data_field) > 0) {
@@ -118,7 +121,7 @@ for (t in 1:3) {
     }
 
     df <- do.call("rbind", listOfDataFrames);
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Best perfect kmer"));
@@ -133,7 +136,7 @@ for (t in 1:3) {
     for (i in 1:nrow(data_samples)) {
         type = types[t];
         sampledir <- data_samples[i, "SampleDir"];
-        filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
+        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
         if (file.exists(filename_data)) {
             data_field = read.table(filename_data, header=TRUE);
             if (nrow(data_field) > 0) {
@@ -145,13 +148,13 @@ for (t in 1:3) {
     }
 
     df <- do.call("rbind", listOfDataFrames);
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned"));
     garbage <- dev.off();
 
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned") + scale_y_continuous(limits=c(75, 100)));
@@ -166,7 +169,7 @@ for (t in 1:3) {
     for (i in 1:nrow(data_samples)) {
         type = types[t];
         sampledir <- data_samples[i, "SampleDir"];
-        filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
+        filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
         if (file.exists(filename_data)) {
             data_field = read.table(filename_data, header=TRUE);
             if (nrow(data_field) > 0) {
@@ -178,7 +181,7 @@ for (t in 1:3) {
     }
 
     df <- do.call("rbind", listOfDataFrames);
-    output_file <- paste(outdir, "/graphs/", reference, "_", type, "_alignment_size.pdf", sep="");
+    output_file <- paste(graphsdir, "/", reference, "_", type, "_alignment_size.pdf", sep="");
     message(output_file);
     pdf(output_file, width=imagewidth, height = 4);
     print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment size"));
@@ -209,4 +212,4 @@ for (t in 1:3) {
 #    pdf(output_file, width=imagewidth, height = 4);
 #    print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment identity %"));
 #    garbage <- dev.off();
-#}
+#}
diff --git a/bin/slurmit b/bin/slurmit
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+commandtorun=""
+nodes=1
+ntasks=1
+mem=2G
+maxtime="6-23:00"
+outfile=""
+partition=""
+cpuspertask=1
+
+function usage
+{
+cat << EOF
+
+Submit commands to SLURM
+
+Usage: slurmit [options] "command to execute"
+
+Submission script for SLURM
+
+OPTIONS:
+        -h      Show this message
+        -c      Number of processors per task (--cpus-per-task parameter) (dedault 1)
+        -m      memory required per node (--mem parameter) (defualt "2G")
+        -n      maximum number of tasks (--ntasks parameter) (default 1)
+        -o      Output file (stdout and stderr) (default undefined)
+        -p      Parition (e.g. "tgac-medium") (default undefined)
+        -t      Time limit (--time parameter) (default "6-23:00")
+        -N      minimum number of nodes (--nodes parameter) (default 1)
+
+Example: slurmit -o logfile.txt "ls -l"
+
+Don't forget to backslash dollar variables, as appropriate.
+
+EOF
+}
+
+
+while getopts c:hm:n:o:p:t:N: OPTION
+do
+    case $OPTION in
+                c) cpuspertask=$OPTARG;;
+                h) usage ; exit 1 ;;
+                m) mem=$OPTARG;;
+                n) ntasks=$OPTARG;;
+                o) outfile=" -o $OPTARG";;
+                p) partition=" -p $OPTARG";;
+                t) maxtime=$OPTARG;;
+                N) nodes=$OPTARG;;
+    esac
+done
+shift $((OPTIND-1))
+
+commandtorun=$@
+
+if [ "$commandtorun" == "" ] ; then
+    echo "You must specify a command to run"
+    exit
+fi
+
+sbatch --nodes ${nodes} --cpus-per-task=${cpuspertask} --ntasks ${ntasks} --time ${maxtime} --mem ${mem}${outfile}${partition} --wrap="echo \"SLURM job output\" ; echo "" ; echo \"Command: ${commandtorun}\" ; echo \"Job ID: \${SLURM_JOB_ID}\" ; echo -n \"Start time: \" ; date ; printf \"%0.s-\" {1..70} ; printf \"\n\n\" ; ${commandtorun} ; printf \"\n\" ; printf \"%0.s-\" {1..70} ; printf \"\n\n\" ; sstat -j \${SLURM_JOB_ID}.batch ; printf \"\n\" ; echo \"SLURM ended\"; echo -n \"End time: \" ; date"
diff --git a/dist/NanoOK.jar b/dist/NanoOK.jar
diff --git a/src/nanook/CIGARString.java b/src/nanook/CIGARString.java
@@ -169,6 +169,7 @@ public boolean processString() {
                         break;
                     case 'I':
                         if (n > 100) {
+                            // DEBUG MODE TURNS OFF THIS
                             System.out.println("");
                             System.out.println("Error: large I ("+n+") - read "+queryID+" ignored");
                             processed = false;

diff --git a/src/nanook/ComparisonReportWriter.java b/src/nanook/ComparisonReportWriter.java
@@ -172,7 +172,10 @@ private void writeReferenceSection() {
         ArrayList<ReferenceSequence> sortedRefs = options.getReferences().getSortedReferences();
         for (int i=0; i<sortedRefs.size(); i++) {
             ReferenceSequence rs = sortedRefs.get(i);
-            writeReferenceSection(rs);
+
+            if ((options.debugMode() && (!rs.getName().equalsIgnoreCase("DNA_CS")))) {
+                writeReferenceSection(rs);
+            }
         }
     }   
 

diff --git a/src/nanook/NanoOK.java b/src/nanook/NanoOK.java
@@ -25,7 +25,7 @@
  * @author Richard Leggett
  */
 public class NanoOK {
-    public final static String VERSION_STRING = "v1.21";
+    public final static String VERSION_STRING = "v1.22";
     public final static long SERIAL_VERSION = 3L;
 
     /**

diff --git a/src/nanook/NanoOKOptions.java b/src/nanook/NanoOKOptions.java
@@ -204,6 +204,7 @@ public void parseArgs(String[] args) {
             System.out.println("    -t|-numthreads <number> specifies the number of threads to use (default 1)");
             System.out.println("    -log <filename> enables debug logging to file");
             System.out.println("    -force to force NanoOK to ignore warnings");
+            System.out.println("    -timeout to set the number of seconds before giving up waiting for new reads (default 2)");
             System.out.println("");
             System.exit(0);
         }
@@ -572,22 +573,22 @@ public void checkAndMakeComparisonDirs() {
             f.mkdir();
         }
 
-        f = new File(comparisonDir+File.separator+"graphs");
+        f = new File(this.getGraphsDir());
         if (!f.exists()) {
             f.mkdir();
         }
 
-        f = new File(comparisonDir+File.separator+"latex");
+        f = new File(this.getLatexDir());
         if (!f.exists()) {
             f.mkdir();
         }
 
-        f = new File(comparisonDir+File.separator+"logs");
+        f = new File(this.getLogsDir());
         if (!f.exists()) {
             f.mkdir();
         }        
 
-        f = new File(comparisonDir+File.separator+"logs"+File.separator+"R");
+        f = new File(this.getLogsDir()+File.separator+"R");
         if (!f.exists()) {
             f.mkdir();
         }            
@@ -1373,4 +1374,8 @@ public double getMinQ() {
     public void initialiseReadMerger() {
         readFileMerger = new ReadFileMerger(this);
     }
+
+    public boolean debugMode() {
+        return false;
+    }
 }
diff --git a/src/nanook/RGraphPlotter.java b/src/nanook/RGraphPlotter.java
@@ -74,13 +74,19 @@ public void runScript(boolean fComparison, String scriptName, String logPrefix,
 
         args.add("Rscript");
         args.add(options.getScriptsDir() + File.separator + scriptName);
+
+        if (fComparison) {
+            File f = new File(options.getAnalysisDir());
+            args.add(f.getName());
+        } else {
+            args.add(options.getAnalysisDir());
+        }
+
+        args.add(options.getGraphsDir());
 
         if (fComparison) {
             args.add(options.getSampleList());
             args.add(options.getComparisonDir());
-        } else {
-            args.add(options.getAnalysisDir());
-            args.add(options.getGraphsDir());
         }
 
         if (refName != null) {
@@ -89,6 +95,8 @@ public void runScript(boolean fComparison, String scriptName, String logPrefix,
         }
 
         args.add(options.getImageFormat());
+
+        //System.out.println(args);
 
         executor.execute(new RGraphRunnable("Rscript", args, logFilename + ".txt"));
         writeProgress();