Skip to content

Commit

Permalink
Fixes for comparison mode
Browse files Browse the repository at this point in the history
  • Loading branch information
richardmleggett committed May 5, 2017
1 parent 5df32e6 commit a3e9bb8
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 34 deletions.
19 changes: 11 additions & 8 deletions bin/nanook_plot_comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ library(reshape2)

# Filenames
args <- commandArgs(TRUE)
samplelist <- args[1];
outdir <- args[2];
format <- args[3];
analysisdir <- args[1];
graphsdir <- args[2];
samplelist <- args[3];
outdir <- args[4];
format <- args[5];

types = c("2D", "Template", "Complement");
colours = c("#68B5B9", "#CF746D", "#91A851");
Expand Down Expand Up @@ -44,7 +46,7 @@ for (t in 1:3) {
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_lengths <- paste(sampledir, "/analysis/", "all_",type,"_lengths.txt", sep="");
filename_lengths <- paste(sampledir, "/", analysisdir, "/", "all_",type,"_lengths.txt", sep="");
data_lengths = read.table(filename_lengths, col.name=c("name", "length"));
#df$size <- data_lengths$length;
thisid <- data_samples[i, "SampleName"];
Expand All @@ -57,7 +59,7 @@ for (t in 1:3) {
# Read lengths
imagewidth <- 1 + (nrow(data_samples) * 0.5);
df <- do.call("rbind", listOfDataFrames);
output_file <- paste(outdir, "/graphs/", type, "_lengths.pdf", sep="");
output_file <- paste(graphsdir, "/", type, "_lengths.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Length, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]));
Expand All @@ -66,10 +68,11 @@ for (t in 1:3) {
# Bar stacked plot of mapping
imagewidth <- 1 + (nrow(data_samples) * 0.5) + 1.5;
filename_maps <- paste(outdir, "/", type,"_map_summary.txt", sep="");
message(filename_maps)
#filename_maps <- c("~/temp/2D_map_summary.txt");
data_maps = read.table(filename_maps, header=TRUE);
df <- melt(data_maps, id.var="Sample")
output_file <- paste(outdir, "/graphs/", type, "_maps.pdf", sep="");
output_file <- paste(graphsdir, "/", type, "_maps.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x = Sample, y = value, fill = variable)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("%"));
Expand All @@ -80,15 +83,15 @@ for (t in 1:3) {
# Number of reads
filename_comparison <- paste(outdir, "/", type,"_comparison.txt", sep="");
data_comparison = read.table(filename_comparison, header=TRUE);
output_file <- paste(outdir, "/graphs/", type, "_number_of_reads.pdf", sep="");
output_file <- paste(graphsdir, "/", type, "_number_of_reads.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(data_comparison, aes(x=data_comparison$Name, y=data_comparison$NumReads)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Sample") + ylab("Number of reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)))
garbage <- dev.off();

# Total bases
data_comparison = read.table(filename_comparison, header=TRUE);
output_file <- paste(outdir, "/graphs/", type, "_total_bases.pdf", sep="");
output_file <- paste(graphsdir, "/", type, "_total_bases.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(data_comparison, aes(x=data_comparison$Name, y=data_comparison$TotalBases)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Sample") + ylab("Total bases") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)))
Expand Down
37 changes: 20 additions & 17 deletions bin/nanook_plot_comparison_reference.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ library(gridExtra)

# Filenames
args <- commandArgs(TRUE)
samplelist <- args[1];
outdir <- args[2];
reference <- args[3];
format <- args[4];
analysisdir <- args[1];
graphsdir <- args[2];
samplelist <- args[3];
outdir <- args[4];
reference <- args[5];
format <- args[6];

types = c("2D", "Template", "Complement");
colours = c("#68B5B9", "#CF746D", "#91A851");
Expand Down Expand Up @@ -44,7 +46,8 @@ for (t in 1:3) {
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
message(filename_data);
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
message(nrow(data_field));
Expand All @@ -57,13 +60,13 @@ for (t in 1:3) {
}

df <- do.call("rbind", listOfDataFrames);
output_file <- paste(outdir, "/graphs/", reference, "_", type, "_query_identity.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %"));
garbage <- dev.off();

output_file <- paste(outdir, "/graphs/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(60, 100)));
Expand All @@ -78,7 +81,7 @@ for (t in 1:3) {
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
filename_data <- paste(sampledir, "/", analysisdir ,"/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
message(nrow(data_field));
Expand All @@ -91,7 +94,7 @@ for (t in 1:3) {
}

df <- do.call("rbind", listOfDataFrames);
output_file <- paste(outdir, "/graphs/", reference, "_", type, "_query_gc.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_query_gc.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read GC %"));
Expand All @@ -106,7 +109,7 @@ for (t in 1:3) {
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
if (nrow(data_field) > 0) {
Expand All @@ -118,7 +121,7 @@ for (t in 1:3) {
}

df <- do.call("rbind", listOfDataFrames);
output_file <- paste(outdir, "/graphs/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Best perfect kmer"));
Expand All @@ -133,7 +136,7 @@ for (t in 1:3) {
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
if (nrow(data_field) > 0) {
Expand All @@ -145,13 +148,13 @@ for (t in 1:3) {
}

df <- do.call("rbind", listOfDataFrames);
output_file <- paste(outdir, "/graphs/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned"));
garbage <- dev.off();

output_file <- paste(outdir, "/graphs/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned") + scale_y_continuous(limits=c(75, 100)));
Expand All @@ -166,7 +169,7 @@ for (t in 1:3) {
for (i in 1:nrow(data_samples)) {
type = types[t];
sampledir <- data_samples[i, "SampleDir"];
filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
if (file.exists(filename_data)) {
data_field = read.table(filename_data, header=TRUE);
if (nrow(data_field) > 0) {
Expand All @@ -178,7 +181,7 @@ for (t in 1:3) {
}

df <- do.call("rbind", listOfDataFrames);
output_file <- paste(outdir, "/graphs/", reference, "_", type, "_alignment_size.pdf", sep="");
output_file <- paste(graphsdir, "/", reference, "_", type, "_alignment_size.pdf", sep="");
message(output_file);
pdf(output_file, width=imagewidth, height = 4);
print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment size"));
Expand Down Expand Up @@ -209,4 +212,4 @@ for (t in 1:3) {
# pdf(output_file, width=imagewidth, height = 4);
# print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment identity %"));
# garbage <- dev.off();
#}
#}
62 changes: 62 additions & 0 deletions bin/slurmit
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash

commandtorun=""
nodes=1
ntasks=1
mem=2G
maxtime="6-23:00"
outfile=""
partition=""
cpuspertask=1

function usage
{
cat << EOF
Submit commands to SLURM
Usage: slurmit [options] "command to execute"
Submission script for SLURM
OPTIONS:
-h Show this message
-c Number of processors per task (--cpus-per-task parameter) (dedault 1)
-m memory required per node (--mem parameter) (defualt "2G")
-n maximum number of tasks (--ntasks parameter) (default 1)
-o Output file (stdout and stderr) (default undefined)
-p Parition (e.g. "tgac-medium") (default undefined)
-t Time limit (--time parameter) (default "6-23:00")
-N minimum number of nodes (--nodes parameter) (default 1)
Example: slurmit -o logfile.txt "ls -l"
Don't forget to backslash dollar variables, as appropriate.
EOF
}


while getopts c:hm:n:o:p:t:N: OPTION
do
case $OPTION in
c) cpuspertask=$OPTARG;;
h) usage ; exit 1 ;;
m) mem=$OPTARG;;
n) ntasks=$OPTARG;;
o) outfile=" -o $OPTARG";;
p) partition=" -p $OPTARG";;
t) maxtime=$OPTARG;;
N) nodes=$OPTARG;;
esac
done
shift $((OPTIND-1))

commandtorun=$@

if [ "$commandtorun" == "" ] ; then
echo "You must specify a command to run"
exit
fi

sbatch --nodes ${nodes} --cpus-per-task=${cpuspertask} --ntasks ${ntasks} --time ${maxtime} --mem ${mem}${outfile}${partition} --wrap="echo \"SLURM job output\" ; echo "" ; echo \"Command: ${commandtorun}\" ; echo \"Job ID: \${SLURM_JOB_ID}\" ; echo -n \"Start time: \" ; date ; printf \"%0.s-\" {1..70} ; printf \"\n\n\" ; ${commandtorun} ; printf \"\n\" ; printf \"%0.s-\" {1..70} ; printf \"\n\n\" ; sstat -j \${SLURM_JOB_ID}.batch ; printf \"\n\" ; echo \"SLURM ended\"; echo -n \"End time: \" ; date"
Binary file modified dist/NanoOK.jar
Binary file not shown.
1 change: 1 addition & 0 deletions src/nanook/CIGARString.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ public boolean processString() {
break;
case 'I':
if (n > 100) {
// DEBUG MODE TURNS OFF THIS
System.out.println("");
System.out.println("Error: large I ("+n+") - read "+queryID+" ignored");
processed = false;
Expand Down
5 changes: 4 additions & 1 deletion src/nanook/ComparisonReportWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ private void writeReferenceSection() {
ArrayList<ReferenceSequence> sortedRefs = options.getReferences().getSortedReferences();
for (int i=0; i<sortedRefs.size(); i++) {
ReferenceSequence rs = sortedRefs.get(i);
writeReferenceSection(rs);

if ((options.debugMode() && (!rs.getName().equalsIgnoreCase("DNA_CS")))) {
writeReferenceSection(rs);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/nanook/NanoOK.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* @author Richard Leggett
*/
public class NanoOK {
public final static String VERSION_STRING = "v1.21";
public final static String VERSION_STRING = "v1.22";
public final static long SERIAL_VERSION = 3L;

/**
Expand Down
13 changes: 9 additions & 4 deletions src/nanook/NanoOKOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ public void parseArgs(String[] args) {
System.out.println(" -t|-numthreads <number> specifies the number of threads to use (default 1)");
System.out.println(" -log <filename> enables debug logging to file");
System.out.println(" -force to force NanoOK to ignore warnings");
System.out.println(" -timeout to set the number of seconds before giving up waiting for new reads (default 2)");
System.out.println("");
System.exit(0);
}
Expand Down Expand Up @@ -572,22 +573,22 @@ public void checkAndMakeComparisonDirs() {
f.mkdir();
}

f = new File(comparisonDir+File.separator+"graphs");
f = new File(this.getGraphsDir());
if (!f.exists()) {
f.mkdir();
}

f = new File(comparisonDir+File.separator+"latex");
f = new File(this.getLatexDir());
if (!f.exists()) {
f.mkdir();
}

f = new File(comparisonDir+File.separator+"logs");
f = new File(this.getLogsDir());
if (!f.exists()) {
f.mkdir();
}

f = new File(comparisonDir+File.separator+"logs"+File.separator+"R");
f = new File(this.getLogsDir()+File.separator+"R");
if (!f.exists()) {
f.mkdir();
}
Expand Down Expand Up @@ -1373,4 +1374,8 @@ public double getMinQ() {
public void initialiseReadMerger() {
readFileMerger = new ReadFileMerger(this);
}

public boolean debugMode() {
return false;
}
}
14 changes: 11 additions & 3 deletions src/nanook/RGraphPlotter.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,19 @@ public void runScript(boolean fComparison, String scriptName, String logPrefix,

args.add("Rscript");
args.add(options.getScriptsDir() + File.separator + scriptName);

if (fComparison) {
File f = new File(options.getAnalysisDir());
args.add(f.getName());
} else {
args.add(options.getAnalysisDir());
}

args.add(options.getGraphsDir());

if (fComparison) {
args.add(options.getSampleList());
args.add(options.getComparisonDir());
} else {
args.add(options.getAnalysisDir());
args.add(options.getGraphsDir());
}

if (refName != null) {
Expand All @@ -89,6 +95,8 @@ public void runScript(boolean fComparison, String scriptName, String logPrefix,
}

args.add(options.getImageFormat());

//System.out.println(args);

executor.execute(new RGraphRunnable("Rscript", args, logFilename + ".txt"));
writeProgress();
Expand Down

0 comments on commit a3e9bb8

Please sign in to comment.