From f3582a413d53420c0a8a08150ef30f08b9e76534 Mon Sep 17 00:00:00 2001 From: steuernb Date: Mon, 18 Jun 2018 10:41:58 +0100 Subject: [PATCH] -s parameter added to cli The -s parameter allows to filter for a set of identifiers. Docu updated --- Readme.md | 3 +++ src/mutantHunter/MutChromSeq.java | 41 ++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/Readme.md b/Readme.md index 8885360..6ea3d81 100644 --- a/Readme.md +++ b/Readme.md @@ -96,6 +96,8 @@ parameter | argument | description This will provide the candidate contigs. You will have to allocate a bit of memory to the java vm. To add e.g. 16 Gb or RAM you would write `java -Xmx16000M -jar ...` +The `-s` parameter allows to load a list of identifiers that refer to sequences in the reference assembly. Further processing is only done for those sequences. If `-s` is ommitted, the entire set of sequences is used. The argument to `-s` is a either text file containing one identifier per line or a TSV file where the first column contains identifiers. + ``` java -jar MutChromSeq.jar -w wildtype.pileup.xml -m mutant1.pileup.xml mutant2.plieup.xml [...] -o output.txt -n 6 -c 10 -a 0.01 -z 2 ``` @@ -108,6 +110,7 @@ parameter | argument | description **-c** | *int* | Minimum coverage for position to be regarded. Default is 10 **-n** | *int* | Minimum number of mutants to report a contig. Default is 2 **-z** | *int* | Number mutant lines that are allowed to have SNV in same position. Default is 2 +**-s** | *Str* | A file with a list of sequence identifiers diff --git a/src/mutantHunter/MutChromSeq.java b/src/mutantHunter/MutChromSeq.java index ac4f2a1..69b3faf 100644 --- a/src/mutantHunter/MutChromSeq.java +++ b/src/mutantHunter/MutChromSeq.java @@ -47,13 +47,13 @@ /** * - * @version 3.0 + * @version 3.1 * @author steuernb * */ public class MutChromSeq { - public static final double version = 3.0; + public static final double version = 4; public static final String wildtype = "wildtype"; Hashtable targetContigs; @@ -145,6 +145,27 @@ public void filterContigList(HashSet contigs){ + public void filterContigList(File contigList)throws IOException{ + HashSet contigs = new HashSet(); + BufferedReader in = new BufferedReader(new FileReader(contigList)); + + for (String inputline = in.readLine(); inputline != null; inputline = in.readLine()) { + String[] split = inputline.split("\t"); + contigs.add(split[0].trim()); + } + + in.close(); + + + Hashtable h = new Hashtable(); + for(Enumeration myenum = targetContigs.keys(); myenum.hasMoreElements();){ + String key = myenum.nextElement(); + if(contigs.contains(key)){ + h.put(key, targetContigs.get(key)); + } + } + this.targetContigs = h; + } @@ -787,6 +808,16 @@ public static void main(String[] args){ mutChromSeq.addXMLQuick(new File(mutant),false); } + if(cli.hasOption("s")){ + File contigList = new File(cli.getArg("s")); + if(!contigList.exists()){ + throw new CLIParseException("File " + contigList.getName() + " does not exist. Aborting."); + } + + mutChromSeq.filterContigList(contigList); + } + + Vector v = new Vector(); for(Iterator iterator = mutChromSeq.mutantLines.iterator(); iterator.hasNext();){ String s = iterator.next(); @@ -797,6 +828,9 @@ public static void main(String[] args){ } + + + int minCoverageToConsiderSNP = 15; double maxReferenceAlleleFrequency = 0.01; int minNumberOfTotalMutants = 5; @@ -867,7 +901,8 @@ public static void main(String[] args){ "-n \t\t\tMinimum number of mutants to report a contig. Default is 2\n"+ "-c \t\t\tMininum coverage for mappings to be regarded. Default is 10\n"+ "-a \t\t\tMaximum reference allele frequency to consider a SNP. Default is 0.01\n"+ - "-z \t\t\tNumber mutant lines that are allowed to have SNV in same position. Default is 2\n"; + "-z \t\t\tNumber mutant lines that are allowed to have SNV in same position. Default is 2\n"+ + "-s \t\tRestrict analysis to subset of contigs listed in file contiglist.txt"; System.err.println(s);