Skip to content

Commit

Permalink
Added gzip output for DNA shape tools
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminbeer256 committed Aug 28, 2023
1 parent 0d4277f commit 9e250c7
Show file tree
Hide file tree
Showing 12 changed files with 74 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class DNAShapefromBEDCLI implements Callable<Integer> {
@Option(names = { "-o",
"--output" }, description = "Specify basename for output files, files for each shape indicated will share this name with a different suffix")
private String outputBasename = null;
@Option(names = {"-z", "--gzip"}, description = "output compressed output (default=false)")
private boolean zip = false;
@Option(names = { "--avg-composite" }, description = "Save average composite")
private boolean avgComposite = false;
@Option(names = { "-n", "--no-force" }, description = "don't force-strandedness (default is to force strandedness)")
Expand Down Expand Up @@ -74,7 +76,7 @@ public Integer call() throws Exception {
try {
// Generate Composite Plot
DNAShapefromBED script_obj = new DNAShapefromBED(genomeFASTA, bedFile, outputBasename, OUTPUT_TYPE,
forceStrand, new PrintStream[] { null, null, null, null });
forceStrand, new PrintStream[] { null, null, null, null }, zip);
script_obj.run();

if (avgComposite) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class DNAShapefromFASTACLI implements Callable<Integer> {
private String outputBasename = null;
@Option(names = { "--avg-composite" }, description = "Save average composite")
private boolean avgComposite = false;
@Option(names = {"-z", "--gzip"}, description = "output compressed output (default=false)")
private boolean zip = false;

@ArgGroup(validate = false, heading = "Shape Options%n")
ShapeType shape = new ShapeType();
Expand Down Expand Up @@ -68,7 +70,7 @@ public Integer call() throws Exception {

// Generate Composite Plot
DNAShapefromFASTA script_obj = new DNAShapefromFASTA(fastaFile, outputBasename, OUTPUT_TYPE,
new PrintStream[] { null, null, null, null });
new PrintStream[] { null, null, null, null }, zip);
script_obj.run();

// Print Composite Scores
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public class RandomizeFASTACLI implements Callable<Integer> {
private File output;
@Option(names = {"-s", "--seed"}, description = "specify an integer seed for reproducible outputs")
private Integer seed = null;
@Option(names = {"-z", "--gzip"}, description = "output compressed output (default=false)")
private boolean zip = false;

@Override
public Integer call() throws Exception {
Expand All @@ -44,7 +46,7 @@ public Integer call() throws Exception {
System.exit(1);
}

RandomizeFASTA.randomizeFASTA(fastaFile, output, seed);
RandomizeFASTA.randomizeFASTA(fastaFile, output, seed, zip);

System.err.println("Randomization Complete.");
return (0);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package scriptmanager.scripts.Figure_Generation;

import java.awt.Color;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;

import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
Expand All @@ -16,6 +16,7 @@
import scriptmanager.charts.CompositePlot;
import scriptmanager.util.ColorSeries;
import scriptmanager.util.ExtensionFileFilter;
import scriptmanager.util.GZipUtilities;

/**
* The script class to create/display line plot images based on the output files
Expand Down Expand Up @@ -64,11 +65,13 @@ public class PlotComposite {
* @throws FileNotFoundException
*/
public static JFreeChart plotCompositeFile(File input, File OUT_PATH, boolean outputImage, String title, ArrayList<Color> COLORS, boolean legend, int pxHeight, int pxWidth) throws IOException, IllegalArgumentException, FileNotFoundException {
Scanner scan = new Scanner(input);
BufferedReader br = GZipUtilities.makeReader(input);
// parse x values
String[] tokens = scan.nextLine().split("\t");
String line;
String[] tokens = {""};
if ((line = br.readLine()) != null){ tokens = line.split("\t"); }
if (!tokens[0].equals("")) {
scan.close();
br.close();
throw new IllegalArgumentException("(!) First row of input file must have an empty first column (as x-values)");
}
double[] x = new double[tokens.length - 1];
Expand All @@ -81,11 +84,11 @@ public static JFreeChart plotCompositeFile(File input, File OUT_PATH, boolean ou

XYSeries s;
// line-by-line through file
while (scan.hasNextLine()) {
tokens = scan.nextLine().split("\t");
while ((line = br.readLine()) != null) {
tokens = line.split("\t");
// check for format consistency: number of x-values matches y-values
if (tokens.length - 1 != x.length) {
scan.close();
br.close();
throw new IllegalArgumentException("(!) Check number of x-values matches number of y-values");
}
// skip any rows with blank labels
Expand All @@ -94,7 +97,7 @@ public static JFreeChart plotCompositeFile(File input, File OUT_PATH, boolean ou
if (x[i - 1] != Double.parseDouble(tokens[i])) {
System.err.println(x[i - 1]);
System.err.println(tokens[i]);
scan.close();
br.close();
throw new IllegalArgumentException("(!) Check dataseries based on same x-scale file");
}
}
Expand All @@ -108,7 +111,7 @@ public static JFreeChart plotCompositeFile(File input, File OUT_PATH, boolean ou
}
dataset.addSeries(s);
}
scan.close();
br.close();

// Set-up colors
if (COLORS==null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class DNAShapefromBED {
private String OUTBASENAME = null;
private boolean[] OUTPUT_TYPE = null;
private File BED = null;
private boolean GZIP_OUTPUT;

private boolean STRAND = true;
private boolean INDEX = true;
Expand Down Expand Up @@ -73,16 +74,18 @@ public class DNAShapefromBED {
* @param str force strandedness (true=forced, false=not forced)
* @param ps list of four PrintStream objects corresponding to each shape type
* (for GUI)
* @param gzOutput Whether to output compressed file
* @throws IOException
*/
public DNAShapefromBED(File gen, File b, String out, boolean[] type, boolean str, PrintStream[] ps)
public DNAShapefromBED(File gen, File b, String out, boolean[] type, boolean str, PrintStream[] ps, boolean gzOutput)
throws IOException {
GENOME = gen;
BED = b;
OUTBASENAME = out;
OUTPUT_TYPE = type;
STRAND = str;
PS = ps;
GZIP_OUTPUT = gzOutput;

File FAI = new File(GENOME + ".fai");
// Check if FAI index file exists
Expand Down Expand Up @@ -394,18 +397,18 @@ private void openOutputFiles() {
// Open Output File
try {
if (OUTPUT_TYPE[0]) {
OUT_M = new PrintStream(new File(OUTBASENAME + "_MGW.cdt"));
OUT_M = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_MGW.cdt"), GZIP_OUTPUT);
}
if (OUTPUT_TYPE[1]) {
OUT_P = new PrintStream(new File(OUTBASENAME + "_PropT.cdt"));
OUT_P = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_PropT.cdt"), GZIP_OUTPUT);
}
if (OUTPUT_TYPE[2]) {
OUT_H = new PrintStream(new File(OUTBASENAME + "_HelT.cdt"));
OUT_H = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_HelT.cdt"), GZIP_OUTPUT);
}
if (OUTPUT_TYPE[3]) {
OUT_R = new PrintStream(new File(OUTBASENAME + "_Roll.cdt"));
OUT_R = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_Roll.cdt"), GZIP_OUTPUT);
}
} catch (FileNotFoundException e) {
} catch (IOException e) {
e.printStackTrace();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import java.awt.Component;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import scriptmanager.charts.CompositePlot;
import scriptmanager.util.DNAShapeReference;
import scriptmanager.util.GZipUtilities;
Expand All @@ -29,6 +29,7 @@ public class DNAShapefromFASTA {
private String OUTBASENAME = null;
private boolean[] OUTPUT_TYPE = null;
private File FASTA = null;
private boolean GZIP_OUTPUT;

private PrintStream OUT_M = null;
private PrintStream OUT_P = null;
Expand Down Expand Up @@ -58,12 +59,14 @@ public class DNAShapefromFASTA {
* (no enforcement on size)
* @param ps list of four PrintStream objects corresponding to each shape type
* (for GUI)
* @param gzOutput Whether to output compressed file
*/
public DNAShapefromFASTA(File fa, String out, boolean[] type, PrintStream[] ps) {
public DNAShapefromFASTA(File fa, String out, boolean[] type, PrintStream[] ps, boolean gzOutput) {
FASTA = fa;
OUTBASENAME = out;
OUTPUT_TYPE = type;
PS = ps;
GZIP_OUTPUT = gzOutput;

STRUCTURE = DNAShapeReference.InitializeStructure();
}
Expand Down Expand Up @@ -304,18 +307,18 @@ private void openOutputFiles() {
// Open Output File
try {
if (OUTPUT_TYPE[0]) {
OUT_M = new PrintStream(new File(OUTBASENAME + "_MGW.cdt"));
OUT_M = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_MGW.cdt"), GZIP_OUTPUT);
}
if (OUTPUT_TYPE[1]) {
OUT_P = new PrintStream(new File(OUTBASENAME + "_PropT.cdt"));
OUT_P = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_PropT.cdt"), GZIP_OUTPUT);
}
if (OUTPUT_TYPE[2]) {
OUT_H = new PrintStream(new File(OUTBASENAME + "_HelT.cdt"));
OUT_H = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_HelT.cdt"), GZIP_OUTPUT);
}
if (OUTPUT_TYPE[3]) {
OUT_R = new PrintStream(new File(OUTBASENAME + "_Roll.cdt"));
OUT_R = GZipUtilities.makePrintStream(new File(OUTBASENAME + "_Roll.cdt"), GZIP_OUTPUT);
}
} catch (FileNotFoundException e) {
} catch (IOException e) {
e.printStackTrace();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Random;
import java.util.zip.GZIPInputStream;

import scriptmanager.util.GZipUtilities;

/**
Expand All @@ -28,16 +24,17 @@ public class RandomizeFASTA {
* @param FASTA filepath to FASTA-formatted sequences to randomize
* @param RANDOUT filepath to write randomized sequences to
* @param seed set a random seed
* @param gzOutput Whether to output a compressed file
* @return name of output filename
* @throws IOException
*/
public static File randomizeFASTA(File FASTA, File RANDOUT, Integer seed) throws IOException {
public static File randomizeFASTA(File FASTA, File RANDOUT, Integer seed, boolean gzOutput) throws IOException {
Random randnum = new Random();
if( seed != null) {
System.err.println("Set Seed=" + seed);
randnum.setSeed(seed);
}
PrintStream OUT = new PrintStream(RANDOUT);
PrintStream OUT = GZipUtilities.makePrintStream(RANDOUT, gzOutput);

// Check if file is gzipped and instantiate appropriate BufferedReader
BufferedReader br = GZipUtilities.makeReader(FASTA);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class DNAShapefromBEDOutput extends JFrame {
private File OUT_DIR = null;
private boolean[] OUTPUT_TYPE = null;
private ArrayList<File> BED = null;
private boolean OUTPUT_GZIP;

private boolean STRAND = true;

Expand All @@ -51,8 +52,9 @@ public class DNAShapefromBEDOutput extends JFrame {
* @param out_dir the output directory to save output files to
* @param type the information on the shape types to generate
* @param str the force-strandedness to pass to the script
* @param gzOutput Whether to output compressed file
*/
public DNAShapefromBEDOutput(File gen, ArrayList<File> b, File out_dir, boolean[] type, boolean str) {
public DNAShapefromBEDOutput(File gen, ArrayList<File> b, File out_dir, boolean[] type, boolean str, boolean gzOutput) {
setTitle("DNA Shape Prediction Composite");
setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
setBounds(150, 150, 800, 600);
Expand Down Expand Up @@ -80,6 +82,7 @@ public DNAShapefromBEDOutput(File gen, ArrayList<File> b, File out_dir, boolean[
OUT_DIR = out_dir;
OUTPUT_TYPE = type;
STRAND = str;
OUTPUT_GZIP = gzOutput;
}

/**
Expand Down Expand Up @@ -134,7 +137,7 @@ public void run() throws IOException, InterruptedException {
}

// Initialize Script Object and execute calculations
DNAShapefromBED script_obj = new DNAShapefromBED(GENOME, BED.get(x), BASENAME, OUTPUT_TYPE, STRAND, PS);
DNAShapefromBED script_obj = new DNAShapefromBED(GENOME, BED.get(x), BASENAME, OUTPUT_TYPE, STRAND, PS, OUTPUT_GZIP);
script_obj.run();

// Exit if FAI failed checks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class DNAShapefromBEDWindow extends JFrame implements ActionListener, Pro

private JCheckBox chckbxStrand;
private JCheckBox chckbxAll;
private JCheckBox chckbxGzipOutput;
private JCheckBox chckbxMinorGrooveWidth;
private JCheckBox chckbxRoll;
private JCheckBox chckbxHelicalTwist;
Expand Down Expand Up @@ -93,7 +94,7 @@ public Void doInBackground() throws IOException {
OUTPUT_TYPE[3] = chckbxRoll.isSelected();

DNAShapefromBEDOutput signal = new DNAShapefromBEDOutput(INPUT, BEDFiles, OUT_DIR, OUTPUT_TYPE,
chckbxStrand.isSelected());
chckbxStrand.isSelected(), chckbxGzipOutput.isSelected());

signal.addPropertyChangeListener("fa", new PropertyChangeListener() {
public void propertyChange(PropertyChangeEvent propertyChangeEvent) {
Expand Down Expand Up @@ -182,6 +183,11 @@ public void actionPerformed(ActionEvent arg0) {
sl_contentPane.putConstraint(SpringLayout.SOUTH, btnCalculate, -10, SpringLayout.SOUTH, contentPane);
contentPane.add(btnCalculate);

chckbxGzipOutput = new JCheckBox("Output Gzip");
sl_contentPane.putConstraint(SpringLayout.NORTH, chckbxGzipOutput, 0, SpringLayout.NORTH, btnCalculate);
sl_contentPane.putConstraint(SpringLayout.WEST, chckbxGzipOutput, 30, SpringLayout.WEST, contentPane);
contentPane.add(chckbxGzipOutput);

progressBar = new JProgressBar();
sl_contentPane.putConstraint(SpringLayout.SOUTH, progressBar, -10, SpringLayout.SOUTH, contentPane);
sl_contentPane.putConstraint(SpringLayout.EAST, progressBar, 0, SpringLayout.EAST, scrollPane);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public class DNAShapefromFASTAOutput extends JFrame {
private File OUT_DIR = null;
private boolean[] OUTPUT_TYPE = null;
private ArrayList<File> FASTA = null;
private boolean OUTPUT_GZIP;

final JLayeredPane layeredPane;
final JTabbedPane tabbedPane;
Expand All @@ -43,8 +44,9 @@ public class DNAShapefromFASTAOutput extends JFrame {
* @param fa the FASTA-formatted sequences to generate the shape scores from
* @param out_dir the output directory to save output files to
* @param type the shape types to generate
* @param gzOutput Whether to output compressed file
*/
public DNAShapefromFASTAOutput(ArrayList<File> fa, File out_dir, boolean[] type) {
public DNAShapefromFASTAOutput(ArrayList<File> fa, File out_dir, boolean[] type, boolean gzOutput) {
setTitle("DNA Shape Prediction Composite");
setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
setBounds(150, 150, 800, 600);
Expand All @@ -70,6 +72,7 @@ public DNAShapefromFASTAOutput(ArrayList<File> fa, File out_dir, boolean[] type)
FASTA = fa;
OUT_DIR = out_dir;
OUTPUT_TYPE = type;
OUTPUT_GZIP = gzOutput;
}

/**
Expand Down Expand Up @@ -122,7 +125,7 @@ public void run() throws IOException, InterruptedException {
}

// Initialize Script Object and execute calculations
DNAShapefromFASTA script_obj = new DNAShapefromFASTA(FASTA.get(x), BASENAME, OUTPUT_TYPE, PS);
DNAShapefromFASTA script_obj = new DNAShapefromFASTA(FASTA.get(x), BASENAME, OUTPUT_TYPE, PS, OUTPUT_GZIP);
script_obj.run();

// Convert average and statistics to output tabs panes
Expand Down
Loading

0 comments on commit 9e250c7

Please sign in to comment.