Skip to content

Commit

Permalink
Merge pull request #143 from CEGRcode/gzip-support
Browse files Browse the repository at this point in the history
Added gzip support to plain text tools addressing #91
  • Loading branch information
owlang authored Nov 27, 2023
2 parents 7cbfeed + b818d09 commit abcc167
Show file tree
Hide file tree
Showing 91 changed files with 685 additions and 517 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public BAMtoBEDCLI(){}
private File output = null;
@Option(names = {"-s", "--stdout"}, description = "stream output file to STDOUT (cannot be used with \"-o\" flag)" )
private boolean stdout = false;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

//Read
@ArgGroup(exclusive = true, multiplicity = "0..1", heading = "%nSelect Read to output:%n\t@|fg(red) (select no more than one of these options)|@%n")
Expand Down Expand Up @@ -81,7 +83,7 @@ public Integer call() throws Exception {
System.exit(1);
}

BAMtoBED script_obj = new BAMtoBED(bamFile, output, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null);
BAMtoBED script_obj = new BAMtoBED(bamFile, output, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null, gzOutput);
script_obj.run();

System.err.println("Conversion Complete");
Expand Down Expand Up @@ -123,7 +125,7 @@ private String validateInput() throws IOException {
}else if(stdout){
if(output!=null){ r += "(!)Cannot use -s flag with -o.\n"; }
//check output filename is valid
}else{
} else {
//check directory
if(output.getParent()==null){
// System.err.println("default to current directory");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public BAMtoGFFCLI(){}
private File output = null;
@Option(names = {"-s", "--stdout"}, description = "stream output file to STDOUT (cannot be used with \"-o\" flag)" )
private boolean stdout = false;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

//Read
@ArgGroup(exclusive = true, multiplicity = "0..1", heading = "%nSelect Read to output:%n\t@|fg(red) (select no more than one of these options)|@%n")
Expand Down Expand Up @@ -81,7 +83,7 @@ public Integer call() throws Exception {
System.exit(1);
}

BAMtoGFF script_obj = new BAMtoGFF(bamFile, output, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null);
BAMtoGFF script_obj = new BAMtoGFF(bamFile, output, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null, gzOutput);
script_obj.run();

System.err.println("Conversion Complete");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public BAMtobedGraphCLI(){}

@Option(names = {"-o", "--output"}, description = "specify output directory (name will be same as original with _<strand>.bedgraph ext)" )
private String outputBasename = null;

@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

//Read
@ArgGroup(exclusive = true, multiplicity = "0..1", heading = "%nSelect Read to output:%n\t@|fg(red) (select no more than one of these options)|@%n")
Expand Down Expand Up @@ -77,7 +80,7 @@ public Integer call() throws Exception {
System.exit(1);
}

BAMtobedGraph script_obj = new BAMtobedGraph(bamFile, outputBasename, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null);
BAMtobedGraph script_obj = new BAMtobedGraph(bamFile, outputBasename, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null, gzOutput);
script_obj.run();

System.err.println("Conversion Complete");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ public BAMtoscIDXCLI(){}
private File output = null;
@Option(names = {"-s", "--stdout"}, description = "stream output file to STDOUT (cannot be used with \"-o\" flag)" )
private boolean stdout = false;

@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
//Read
@ArgGroup(exclusive = true, multiplicity = "0..1", heading = "%nSelect Read to output:%n\t@|fg(red) (select no more than one of these options)|@%n")
ReadType readType = new ReadType();
Expand Down Expand Up @@ -78,8 +79,8 @@ public Integer call() throws Exception {
System.err.println("Invalid input. Check usage using '-h' or '--help'");
System.exit(1);
}
BAMtoscIDX script_obj = new BAMtoscIDX(bamFile, output, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null);

BAMtoscIDX script_obj = new BAMtoscIDX(bamFile, output, STRAND, PAIR, MIN_INSERT, MAX_INSERT, null, gzOutput);
script_obj.run();

System.err.println("Conversion Complete");
Expand Down Expand Up @@ -120,7 +121,7 @@ private String validateInput() throws IOException {
}else if(stdout){
if(output!=null){ r += "(!)Cannot use -s flag with -o.\n"; }
//check output filename is valid
}else{
} else {
//check directory
if(output.getParent()==null){
// System.err.println("default to current directory");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private String validateInput() throws IOException {
}
//set default output filename
if(output==null && !stdout){
String NAME = ExtensionFileFilter.stripExtension(bedFile) + ".gff";
String NAME = ExtensionFileFilter.stripExtensionIgnoreGZ(bedFile) + ".gff";
NAME += gzOutput ? ".gz" : "";
output = new File(NAME);
//check stdout and output not both selected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ private String validateInput() throws IOException {
if (!stdout) {
String NAME = ExtensionFileFilter.stripExtension(bedFile);
NAME += byCenter ? "_" + Integer.toString(SIZE) + "bp.bed" : "_border_" + Integer.toString(SIZE) + "bp.bed";
NAME += gzOutput ? ".gz" : "";
output = new File(NAME);
//check stdout and output not both selected
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ private String validateInput() throws IOException {

//set default output filename
if(outputBasename==null){
outputBasename = ExtensionFileFilter.stripExtension(bedFile) + "_SORT";
outputBasename = ExtensionFileFilter.stripExtensionIgnoreGZ(bedFile) + "_SORT";
//check output filename is valid
}else{
//no extension check
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ public ExpandGFFCLI(){}

@Option(names = {"-o", "--output"}, description = "specify output filename (name will be same as original with coordinate info appended)")
private File output = null;
@Option(names = {"-s", "--stdout"}, description = "output bed to STDOUT")
@Option(names = {"-s", "--stdout"}, description = "output gff to STDOUT")
private boolean stdout = false;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

@ArgGroup(validate = false, heading = "%nType of Expansion%n")
ExpandType expandType = new ExpandType();
Expand Down Expand Up @@ -67,7 +69,7 @@ public Integer call() throws Exception {
System.exit(1);
}

ExpandGFF.expandGFFBorders(output, gffFile, SIZE, byCenter);
ExpandGFF.expandGFFBorders(output, gffFile, SIZE, byCenter, gzOutput);

System.err.println("Expansion Complete");
return(0);
Expand All @@ -82,7 +84,7 @@ private String validateInput() throws IOException {
return(r);
}
//check input extensions
if(!"gff".equals(ExtensionFileFilter.getExtension(gffFile))){
if(!"gff".equals(ExtensionFileFilter.getExtensionIgnoreGZ(gffFile))){
r += "(!)Is this a GFF file? Check extension: " + gffFile.getName() + "\n";
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ public class GFFtoBEDCLI implements Callable<Integer> {
private File output = null;
@Option(names = {"-s", "--stdout"}, description = "output bed to STDOUT")
private boolean stdout = false;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

/**
* Runs when this subcommand is called, running script in respective script package with user defined arguments
Expand All @@ -49,7 +51,7 @@ public Integer call() throws Exception {
System.exit(1);
}

GFFtoBED.convertGFFtoBED(output, gffFile);
GFFtoBED.convertGFFtoBED(output, gffFile, gzOutput);

System.err.println("Conversion Complete");
return(0);
Expand All @@ -65,7 +67,7 @@ private String validateInput() throws IOException {
}
//set default output filename
if(output==null && !stdout){
output = new File(ExtensionFileFilter.stripExtension(gffFile) + ".bed");
output = new File(ExtensionFileFilter.stripExtensionIgnoreGZ(gffFile) + ".bed");
//check stdout and output not both selected
}else if(stdout){
if(output!=null){ r += "(!)Cannot use -s flag with -o.\n"; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class SortGFFCLI implements Callable<Integer> {

@Option(names = {"-o", "--output"}, description = "specify output file basename with no .cdt/.gff/.jtv extension (default=<gffFile>_SORT")
private String outputBasename = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
@Option(names = {"-c", "--center"}, description = "sort by center on the input size of expansion in bins (default=100)")
private int center = -999;
@Option(names = {"-x", "--index"}, description = "sort by index from the specified start to the specified stop (0-indexed and half-open interval)",
Expand Down Expand Up @@ -64,7 +66,7 @@ public Integer call() throws Exception {
index[1] = (CDT_SIZE / 2) + (center / 2);
}

SortGFF.sortGFFbyCDT(outputBasename, gffFile, cdtFile, index[0], index[1]);
SortGFF.sortGFFbyCDT(outputBasename, gffFile, cdtFile, index[0], index[1], gzOutput);

System.err.println("Sort Complete");
return(0);
Expand All @@ -82,10 +84,10 @@ private String validateInput() throws IOException {
}
if(!"".equals(r)){ return(r); }
//check input extensions
if(!"gff".equals(ExtensionFileFilter.getExtension(gffFile))){
if(!"gff".equals(ExtensionFileFilter.getExtensionIgnoreGZ(gffFile))){
r += "(!)Is this a GFF file? Check extension: " + gffFile.getName() + "\n";
}
if(!"cdt".equals(ExtensionFileFilter.getExtension(cdtFile))){
if(!"cdt".equals(ExtensionFileFilter.getExtensionIgnoreGZ(cdtFile))){
r += "(!)Is this a CDT file? Check extension: " + cdtFile.getName() + "\n";
}
// validate CDT as file, with consistent row size, and save row_size value
Expand All @@ -98,7 +100,7 @@ private String validateInput() throws IOException {

//set default output filename
if(outputBasename==null){
outputBasename = ExtensionFileFilter.stripExtension(gffFile) + "_SORT";
outputBasename = ExtensionFileFilter.stripExtensionIgnoreGZ(gffFile) + "_SORT";
//check output filename is valid
}else{
//no extension check
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,7 @@ private String validateInput() throws IOException {
if(outputFilepath==null){
String SUFFIX = shift < 0 ? "_shift" + shift + "bp." : "_shift+" + shift + "bp.";
SUFFIX += isGFF ? "gff" : "bed";
SUFFIX += gzOutput ? ".gz" : "";
outputFilepath = ExtensionFileFilter.stripExtension(input) + SUFFIX;
outputFilepath = ExtensionFileFilter.stripExtensionIgnoreGZ(input) + SUFFIX;
//check output filename is valid
}else{
//no extension check
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public ConvertBEDChrNamesCLI(){}

@Option(names = {"-m", "--chrmt"}, description = "converter will map \"chrM\" --> \"chrmt\" (default with no flag is \"chrmt\" --> \"chrM\")")
private boolean useChrmt = false;

@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

Expand Down Expand Up @@ -88,13 +89,8 @@ private String validateInput() throws IOException {
if (output == null) {
// Set suffix format
String SUFFIX = toArabic ? "_toRoman.gff" : "_toArabic.gff";
SUFFIX += gzOutput ? ".gz" : "";
// Set output filepath with name and output directory
String OUTPUT = ExtensionFileFilter.stripExtension(coordFile);
// Strip second extension if input has ".gz" first extension
if (coordFile.getName().endsWith(".gff.gz")) {
OUTPUT = ExtensionFileFilter.stripExtensionPath(new File(OUTPUT)) ;
}
String OUTPUT = ExtensionFileFilter.stripExtensionIgnoreGZ(coordFile);
output = new File(OUTPUT + SUFFIX);
}else{
//check directory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public ConvertGFFChrNamesCLI(){}

@Option(names = {"-m", "--chrmt"}, description = "converter will map \"chrM\" --> \"chrmt\" (default with no flag is \"chrmt\" --> \"chrM\")")
private boolean useChrmt = false;

@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

Expand Down Expand Up @@ -89,13 +90,8 @@ private String validateInput() throws IOException {
if (output == null) {
// Set suffix format
String SUFFIX = toArabic ? "_toRoman.bed" : "_toArabic.bed";
SUFFIX += gzOutput ? ".gz" : "";
// Set output filepath with name and output directory
String OUTPUT = ExtensionFileFilter.stripExtension(coordFile);
// Strip second extension if input has ".gz" first extension
if (coordFile.getName().endsWith(".bed.gz")) {
OUTPUT = ExtensionFileFilter.stripExtensionPath(new File(OUTPUT)) ;
}
String OUTPUT = ExtensionFileFilter.stripExtensionIgnoreGZ(coordFile);
output = new File(OUTPUT + SUFFIX);
}else{
//check directory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ public BEDPeakAligntoRefCLI(){}

@Option(names = {"-o", "--output"}, description = "Specify output file (default = <peakBED>_<refBED>_Output.cdt)")
private File output = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

/**
* Runs when this subcommand is called, running script in respective script package with user defined arguments
Expand All @@ -53,7 +55,7 @@ public Integer call() throws Exception {
System.exit(1);
}

BEDPeakAligntoRef script_obj = new BEDPeakAligntoRef(refBED, peakBED, output, null);
BEDPeakAligntoRef script_obj = new BEDPeakAligntoRef(refBED, peakBED, output, null, gzOutput);
script_obj.run();

System.err.println( "Peak Align Complete." );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class FilterBEDbyProximityCLI implements Callable<Integer> {

@Option(names = {"-o", "--output"}, description = "Specify basename for output files (default = <bedFilename>_<exclusionNum>bp)")
private String outputBasename = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
@Option(names = {"-e", "--exclusion"}, description = "exclusion distance in bp (default=100)")
private int exclusion = 100;

Expand All @@ -49,7 +51,7 @@ public Integer call() throws Exception {
System.exit(1);
}

FilterBEDbyProximity script_obj = new FilterBEDbyProximity(bedFile, exclusion, outputBasename, null);
FilterBEDbyProximity script_obj = new FilterBEDbyProximity(bedFile, exclusion, outputBasename, null, gzOutput);
script_obj.run();

System.err.println( "Filter Complete." );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class RandomCoordinateCLI implements Callable<Integer> {

@Option(names = {"-o", "--output"}, description = "Specify output directory (default = current working directory), file name will be random_coordinates_<genomeName>_<window>bp.<ext>")
private File output = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
@Option(names = {"-f", "--gff"}, description = "file format output as GFF (default format as BED)")
private boolean formatIsBed = true;
@Option(names = {"-n", "--num-sites"}, description = "number of sites (default=1000)")
Expand All @@ -53,7 +55,7 @@ public Integer call() throws Exception {
System.exit(1);
}

RandomCoordinate.execute(genomeName, numSites, window, formatIsBed, output);
RandomCoordinate.execute(genomeName, numSites, window, formatIsBed, output, gzOutput);

System.err.println( "Random Coordinate Generation Complete." );
return(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class TileGenomeCLI implements Callable<Integer> {

@Option(names = {"-o", "--output"}, description = "Specify output directory (default = current working directory), file name will be genome_tiles_<genomeName>_<window>bp.<ext>")
private File output = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
@Option(names = {"-f", "--gff"}, description = "file format output as GFF (default format as BED)")
private boolean formatIsBed = true;
@Option(names = {"-w", "--window"}, description = "window size in bp (default=200)")
Expand All @@ -51,7 +53,7 @@ public Integer call() throws Exception {
System.exit(1);
}

TileGenome.execute(genomeName, window, formatIsBed, output);
TileGenome.execute(genomeName, window, formatIsBed, output, gzOutput);

System.err.println( "Genomic Tiling Complete." );
return(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public AggregateDataCLI() {}
private boolean fileList = false;
@Option(names = {"-o", "--output"}, description = "Specify output file (default = <input1>_SCORES.out, <input2_SCORES.out, ... or ALL_SCORES.out if -m flag is used)")
private File output;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;

@ArgGroup(exclusive = true, heading = "Aggregation Method%n")
AggType aggr = new AggType();
Expand Down Expand Up @@ -84,7 +86,7 @@ public Integer call() throws Exception {
System.exit(1);
}

AggregateData script_obj = new AggregateData(matFiles, output, merge, startROW, startCOL, aggType);
AggregateData script_obj = new AggregateData(matFiles, output, merge, startROW, startCOL, aggType, gzOutput);
script_obj.run();

System.err.println(script_obj.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class ScaleMatrixCLI implements Callable<Integer> {

@Option(names = {"-o", "--output"}, description = "Specify output file (default = <matrixFilename>_SCALE.tab)")
private File output = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
@Option(names = {"-s", "--scaling-factor"}, description = "scaling factor (default=1)")
private double scale = 1;
@Option(names = {"-r", "--start-row"}, description = "")
Expand All @@ -54,7 +56,7 @@ public Integer call() throws Exception {
System.exit(1);
}

ScaleMatrix script_obj = new ScaleMatrix(matrixFile, output, scale, startROW, startCOL);
ScaleMatrix script_obj = new ScaleMatrix(matrixFile, output, scale, startROW, startCOL, gzOutput);
script_obj.run();

System.err.println("All Matrices Scaled.");
Expand All @@ -72,7 +74,7 @@ private String validateInput() throws IOException {
//no check ext
//set default output filename
if(output==null){
output = new File(ExtensionFileFilter.stripExtension(matrixFile) + "_SCALE." + ExtensionFileFilter.getExtension(matrixFile));
output = new File(ExtensionFileFilter.stripExtensionIgnoreGZ(matrixFile) + "_SCALE." + ExtensionFileFilter.getExtensionIgnoreGZ(matrixFile));
//check output filename is valid
}else if( output.isDirectory() ){
r += "(!)Must indicate file (not a directory) for your output.";
Expand Down
Loading

0 comments on commit abcc167

Please sign in to comment.