Skip to content

Commit

Permalink
added vcluster --out-unvarying-clusters
Browse files Browse the repository at this point in the history
  • Loading branch information
rsuchecki committed Jun 9, 2021
1 parent 3ede924 commit ffb7eef
Show file tree
Hide file tree
Showing 6 changed files with 607 additions and 210 deletions.
2 changes: 1 addition & 1 deletion build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

<target name="-pre-init">
<!-- <property name="project.name" value="NAME" />-->
<property name="version.num" value="0.9.3" />
<property name="version.num" value="0.9.4" />
<tstamp>
<format property="NOW" pattern="yyyy-MM-dd HH:mm:ss z" />
</tstamp>
Expand Down
755 changes: 567 additions & 188 deletions nbproject/build-impl.xml

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions nbproject/genfiles.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ build.xml.script.CRC32=17cdf327
build.xml.stylesheet.CRC32=8064a381@1.75.2.48
# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
nbproject/build-impl.xml.data.CRC32=4397f2f8
nbproject/build-impl.xml.script.CRC32=2022ac8e
nbproject/build-impl.xml.stylesheet.CRC32=830a3534@1.80.1.48
nbproject/build-impl.xml.data.CRC32=37bf6ae2
nbproject/build-impl.xml.script.CRC32=6318acc5
nbproject/build-impl.xml.stylesheet.CRC32=d549e5cc@1.98.0.48
17 changes: 16 additions & 1 deletion nbproject/project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ build.test.results.dir=${build.dir}/test/results
#debug.transport=dt_socket
debug.classpath=\
${run.classpath}
debug.modulepath=\
${run.modulepath}
debug.test.classpath=\
${run.test.classpath}
debug.test.modulepath=\
${run.test.modulepath}
# Files in build.classes.dir which should be excluded from distribution jar
dist.archive.excludes=
# This directory is removed when the project is cleaned:
Expand All @@ -38,18 +42,23 @@ javac.classpath=
javac.compilerargs=
javac.deprecation=false
javac.external.vm=false
javac.modulepath=
javac.processormodulepath=
javac.processorpath=\
${javac.classpath}
javac.source=1.8
javac.target=1.8
javac.test.classpath=\
${javac.classpath}:\
${build.classes.dir}
javac.test.modulepath=\
${javac.modulepath}
javac.test.processorpath=\
${javac.test.classpath}
javadoc.additionalparam=
javadoc.author=false
javadoc.encoding=${source.encoding}
javadoc.html5=false
javadoc.noindex=false
javadoc.nonavbar=false
javadoc.notree=false
Expand All @@ -58,6 +67,8 @@ javadoc.splitindex=true
javadoc.use=true
javadoc.version=false
javadoc.windowtitle=
jlink.launcher=false
jlink.launcher.name=yakat
jnlp.codebase.type=no.codebase
jnlp.descriptor=application
jnlp.enabled=false
Expand All @@ -79,7 +90,7 @@ manifest.custom.permissions=
manifest.file=manifest.mf
meta.inf.dir=${src.dir}/META-INF
mkdist.disabled=false
platform.active=default_platform
platform.active=JDK_8
project.license=apache20
run.classpath=\
${javac.classpath}:\
Expand All @@ -88,9 +99,13 @@ run.classpath=\
# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
# To set system properties for unit tests define test-sys-prop.name=value:
run.jvmargs=
run.modulepath=\
${javac.modulepath}
run.test.classpath=\
${javac.test.classpath}:\
${build.test.classes.dir}
run.test.modulepath=\
${javac.test.modulepath}
source.encoding=UTF-8
src.dir=src
test.src.dir=test
1 change: 1 addition & 0 deletions nbproject/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<configuration>
<data xmlns="http://www.netbeans.org/ns/j2se-project/3">
<name>yakat</name>
<explicit-platform explicit-source-supported="true"/>
<source-roots>
<root id="src.dir"/>
</source-roots>
Expand Down
36 changes: 19 additions & 17 deletions src/vsearchprocess/VsearchClustersCaller.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public VsearchClustersCaller(String[] args, String callerName, String toolName)

private OptSet populateOptSet() {
OptSet optSet = new OptSet("Parse MSA output of VSEARCH clustering, call variants within each cluster. "
+ "Variants printed to stdout.");
+ "Variants printed to stdout.");

//CONSIDER SETTINGS
// INPUT LABELS TO DISTINGUISH SAMPLES, PREVENT CALLING SNPS WITHIN SAMPLES
Expand All @@ -79,7 +79,7 @@ private OptSet populateOptSet() {
//INPUT
optSet.setListingGroupLabel("[Input settings]");
optSet.addOpt(new Opt(null, "sample-ids", "Space separated sample identifiers which form the prefices of the input FASTA identifiers")
.setMinValueArgs(1).setMaxValueArgs(Integer.MAX_VALUE).setRequired(true));
.setMinValueArgs(1).setMaxValueArgs(Integer.MAX_VALUE).setRequired(true));
optSet.addOpt(new Opt(null, "clusters-msa", "The vsearch cluster msaout file, alternatively use stdin", 1));
// optSet.addOpt(new Opt(null, "original-fasta", "The original FASTA file gven to vsearch, if specified it will be used to extract the input sequences' lengths", 1));
// optSet.incrementLisitngGroup();
Expand Down Expand Up @@ -120,6 +120,7 @@ private OptSet populateOptSet() {
optSet.addOpt(new Opt(null, "out-clusters-fasta", "Output clustered sequences (for which SNPs were called) to <arg> FASTA file", 1));
optSet.addOpt(new Opt(null, "out-unclustered-fasta", "Output unclustered sequences to <arg> FASTA file", 1));
optSet.addOpt(new Opt(null, "out-unclustered-min-len", "Minimum length required to output an unclustered sequence", 1).setMinValue(1).setDefaultValue(100));
optSet.addOpt(new Opt(null, "out-unvarying-clusters", "Output clusters (FASTA and/or MSA) for which no SNPs/indels called. Not reported by default"));
optSet.addOpt(new Opt('o', "stdout-redirect", "Redirect stdout to this file", 1));
optSet.addOpt(new Opt('e', "stderr-redirect", "Redirect stderr to this file", 1));
//// String headerNote = "Can be useful for external parallization (print header once)";
Expand Down Expand Up @@ -280,25 +281,26 @@ public void readAndProcessMSASequencesFromFasta(String fileName, OptSet optSet)

}

private int processCluster(ClusteredSequencesMSA clusteredSeqs, OptSet optSet, int clusterNumber, BufferedWriter clustersFastaOut,
private int processCluster(ClusteredSequencesMSA clusteredSeqs, OptSet optSet, int clusterNumber, BufferedWriter clustersFastaOut,
BufferedWriter clustersMsaOut) throws IOException {

int minSamplesClustered = (int) optSet.getOpt("min-samples-clustered").getValueOrDefault();
int minSeqsClusteredIn = (int) optSet.getOpt("min-seqs-clustered-in").getValueOrDefault();
int maxSeqsClusteredIn = (int) optSet.getOpt("max-seqs-clustered-in").getValueOrDefault();

int maxSeqsClusteredOut = (int) optSet.getOpt("max-seqs-clustered-out").getValueOrDefault();
int maxIntraSnps = (int) optSet.getOpt("max-intra-snps").getValueOrDefault();
int maxInterSnps = (int) optSet.getOpt("max-inter-snps").getValueOrDefault(Integer.MAX_VALUE);

int maxIndelLength = (int) optSet.getOpt("max-indel-length").getValueOrDefault();
int minIndelDistFromEnds = (int) optSet.getOpt("min-indel-distance").getValueOrDefault();

double minInterIdentity = (double) optSet.getOpt("min-inter-identity").getValueOrDefault();

boolean reverseLex = optSet.getOpt("reverse-lex-order").getOptFlag();
boolean supressIntra = optSet.getOpt("supress-intra-snps").getOptFlag();
boolean supressInter = optSet.getOpt("supress-inter-snps").getOptFlag();
boolean outputUnvaryingClusters = optSet.getOpt("out-unvarying-clusters").getOptFlag();

boolean appendSequencesToSnpList = true;
int size = clusteredSeqs.size();
Expand All @@ -315,9 +317,9 @@ private int processCluster(ClusteredSequencesMSA clusteredSeqs, OptSet optSet, i
//MERGE NON-CONFLICTING SEQUENCES WITHIN EACH SAMPLE
if (clusteredSeqs.mergeSequencesWithinSamples()) {
// clusterString = "MERGED";
suffix= "MERGED";
suffix = "MERGED";
}

//CALL BETWEEN SAMPLES
clusteredSeqs.callSNPsBetweenAllSamples(maxIndelLength, minIndelDistFromEnds);
// boolean hasInter = false;
Expand All @@ -332,22 +334,22 @@ private int processCluster(ClusteredSequencesMSA clusteredSeqs, OptSet optSet, i
// } catch (NoSuchElementException e) {
// int x =0;
// }

int intra = clusteredSeqs.getIntraSnps().size();
int inter = clusteredSeqs.getInterSnps().size();
// clusteredSeqs.printInterSnps(clusterNumber, reverseLex, DELIMITER, suffix, 0, appendSequencesToSnpList);

//PRINT
if (intra <= maxIntraSnps && inter <= maxInterSnps && clusteredSeqs.size() <= maxSeqsClusteredOut) {
if (intra <= maxIntraSnps && inter <= maxInterSnps && clusteredSeqs.size() <= maxSeqsClusteredOut) {
++clusterNumber;
// clusteredSeqs.printCluster((clusterNumber) + " " + clusterString, maxIndelLength);
if (clustersFastaOut != null && ((hasIntra && !supressIntra) || (hasInter && !supressInter))) {
clustersFastaOut.write(clusteredSeqs.getClusterForPrint(clusterNumber, true).toString());
// clustersFastaOut.newLine();
}
if (clustersMsaOut != null && ((hasIntra && !supressIntra) || (hasInter && !supressInter))) {
clustersMsaOut.write(clusteredSeqs.getClusterForPrint(clusterNumber, false).toString());
// clustersFastaOut.newLine();
if ((hasIntra && !supressIntra) || (hasInter && !supressInter) || outputUnvaryingClusters) {
if (clustersFastaOut != null) {
clustersFastaOut.write(clusteredSeqs.getClusterForPrint(clusterNumber, true).toString());
}
if (clustersMsaOut != null) {
clustersMsaOut.write(clusteredSeqs.getClusterForPrint(clusterNumber, false).toString());
}
}
if (!supressIntra) {
clusteredSeqs.printIntraSnps(clusterNumber, reverseLex, DELIMITER, "INTRA", appendSequencesToSnpList);
Expand Down

0 comments on commit ffb7eef

Please sign in to comment.