diff --git a/src/main/java/com/astrazeneca/vardict/VarDictLauncher.java b/src/main/java/com/astrazeneca/vardict/VarDictLauncher.java index 04230d4..f90f6d2 100644 --- a/src/main/java/com/astrazeneca/vardict/VarDictLauncher.java +++ b/src/main/java/com/astrazeneca/vardict/VarDictLauncher.java @@ -4,6 +4,7 @@ import com.astrazeneca.vardict.data.ReferenceResource; import com.astrazeneca.vardict.data.Region; import com.astrazeneca.vardict.data.scopedata.GlobalReadOnlyScope; +import com.astrazeneca.vardict.exception.RegionMissedSourceException; import com.astrazeneca.vardict.modes.*; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMSequenceRecord; @@ -73,6 +74,9 @@ public void start(Configuration config) { */ private void initResources(Configuration conf) { try { + if (conf.regionOfInterest == null && conf.bed == null) { + throw new RegionMissedSourceException(); + } Map chrLengths = readChr(conf.bam.getBamX()); Tuple.Tuple2 samples; if ((conf.regionOfInterest != null) && (conf.bam.hasBam2())) { diff --git a/src/main/java/com/astrazeneca/vardict/data/Patterns.java b/src/main/java/com/astrazeneca/vardict/data/Patterns.java index 0ba3b83..ecbcba4 100644 --- a/src/main/java/com/astrazeneca/vardict/data/Patterns.java +++ b/src/main/java/com/astrazeneca/vardict/data/Patterns.java @@ -158,4 +158,7 @@ public class Patterns { public static final jregex.Pattern BEGIN_NUM_S_OR_BEGIN_NUM_H = new jregex.Pattern("^(\\d+)S|^\\d+H"); public static final jregex.Pattern END_NUM_S_OR_NUM_H = new jregex.Pattern("(\\d+)S$|H$"); + //Exception patterns + public static final Pattern UNABLE_FIND_CONTIG = Pattern.compile("Unable to find entry for contig"); + public static final Pattern WRONG_START_OR_END = Pattern.compile("Malformed query"); } diff --git a/src/main/java/com/astrazeneca/vardict/data/ReferenceResource.java b/src/main/java/com/astrazeneca/vardict/data/ReferenceResource.java index ac2b86d..9701b34 100644 --- a/src/main/java/com/astrazeneca/vardict/data/ReferenceResource.java +++ b/src/main/java/com/astrazeneca/vardict/data/ReferenceResource.java @@ -1,6 +1,9 @@ package com.astrazeneca.vardict.data; import com.astrazeneca.vardict.Configuration; +import com.astrazeneca.vardict.exception.RegionBoundariesException; +import com.astrazeneca.vardict.exception.WrongFastaOrBamException; +import htsjdk.samtools.SAMException; import htsjdk.samtools.reference.IndexedFastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequence; @@ -9,6 +12,8 @@ import java.time.LocalDateTime; import java.util.*; +import static com.astrazeneca.vardict.data.Patterns.UNABLE_FIND_CONTIG; +import static com.astrazeneca.vardict.data.Patterns.WRONG_START_OR_END; import static com.astrazeneca.vardict.data.scopedata.GlobalReadOnlyScope.instance; import static com.astrazeneca.vardict.Utils.substr; @@ -43,10 +48,20 @@ synchronized private IndexedFastaSequenceFile fetchFasta(String file) { * @return array of nucleotide bases in the region of fasta */ public String[] retrieveSubSeq(String fasta, String chr, int start, int end) { - IndexedFastaSequenceFile idx = fetchFasta(fasta); - ReferenceSequence seq = idx.getSubsequenceAt(chr, start, end); - byte[] bases = seq.getBases(); - return new String[] { ">" + chr + ":" + start + "-" + end, bases != null ? new String(bases) : "" }; + try { + IndexedFastaSequenceFile idx = fetchFasta(fasta); + ReferenceSequence seq = idx.getSubsequenceAt(chr, start, end); + byte[] bases = seq.getBases(); + return new String[]{">" + chr + ":" + start + "-" + end, bases != null ? new String(bases) : ""}; + } catch (SAMException e){ + if (UNABLE_FIND_CONTIG.matcher(e.getMessage()).find()){ + throw new WrongFastaOrBamException(chr, e); + } else if (WRONG_START_OR_END.matcher(e.getMessage()).find()){ + throw new RegionBoundariesException(chr, start, end, e); + } else { + throw e; + } + } } /** diff --git a/src/main/java/com/astrazeneca/vardict/exception/RegionBoundariesException.java b/src/main/java/com/astrazeneca/vardict/exception/RegionBoundariesException.java new file mode 100644 index 0000000..c115f37 --- /dev/null +++ b/src/main/java/com/astrazeneca/vardict/exception/RegionBoundariesException.java @@ -0,0 +1,14 @@ +package com.astrazeneca.vardict.exception; + + +import java.util.Locale; + +public class RegionBoundariesException extends RuntimeException { + public final static String RegionBoundariesExceptionMessage = "The region %s:%d-%d is wrong. " + + "We have problem while reading it, possible the start is after the end of the region or " + + "the fasta doesn't contain this region."; + + public RegionBoundariesException(String chr, int start, int end, Throwable e) { + super(String.format(Locale.US, RegionBoundariesExceptionMessage, chr, start, end) , e); + } +} \ No newline at end of file diff --git a/src/main/java/com/astrazeneca/vardict/exception/RegionMissedSourceException.java b/src/main/java/com/astrazeneca/vardict/exception/RegionMissedSourceException.java new file mode 100644 index 0000000..24bc4b5 --- /dev/null +++ b/src/main/java/com/astrazeneca/vardict/exception/RegionMissedSourceException.java @@ -0,0 +1,11 @@ +package com.astrazeneca.vardict.exception; + + +public class RegionMissedSourceException extends RuntimeException { + public final static String RegionSourceMissedMessage = "The required BED file or region missed, please, set it " + + "with path to BED or with -R option."; + + public RegionMissedSourceException() { + super(RegionSourceMissedMessage); + } +} \ No newline at end of file diff --git a/src/main/java/com/astrazeneca/vardict/exception/WrongFastaOrBamException.java b/src/main/java/com/astrazeneca/vardict/exception/WrongFastaOrBamException.java new file mode 100644 index 0000000..93de2a4 --- /dev/null +++ b/src/main/java/com/astrazeneca/vardict/exception/WrongFastaOrBamException.java @@ -0,0 +1,14 @@ +package com.astrazeneca.vardict.exception; + + +import java.util.Locale; + +public class WrongFastaOrBamException extends RuntimeException { + public final static String WrongFastaOrBamExceptionMeassage = "The name of this chromosome \"%s\" is missing in your" + + " fasta file. Please be sure that chromosome names in BAM, fasta and BED are in correspondence " + + "with each other and you use correct fasta for your BAM (can be checked in BAM header)."; + + public WrongFastaOrBamException(String chr, Throwable e) { + super(String.format(Locale.US, WrongFastaOrBamExceptionMeassage, chr), e); + } +} \ No newline at end of file diff --git a/src/main/java/com/astrazeneca/vardict/modes/AbstractMode.java b/src/main/java/com/astrazeneca/vardict/modes/AbstractMode.java index 8739b80..55699bb 100644 --- a/src/main/java/com/astrazeneca/vardict/modes/AbstractMode.java +++ b/src/main/java/com/astrazeneca/vardict/modes/AbstractMode.java @@ -147,4 +147,13 @@ void process() { */ public abstract void printHeader(); + public Reference tryToGetReference(Region region) { + Reference reference = new Reference(); + try { + reference = referenceResource.getReference(region); + } catch (Exception ex) { + stopVardictWithException(region, ex); + } + return reference; + } } diff --git a/src/main/java/com/astrazeneca/vardict/modes/AmpliconMode.java b/src/main/java/com/astrazeneca/vardict/modes/AmpliconMode.java index 118c595..460eaf8 100644 --- a/src/main/java/com/astrazeneca/vardict/modes/AmpliconMode.java +++ b/src/main/java/com/astrazeneca/vardict/modes/AmpliconMode.java @@ -1,5 +1,6 @@ package com.astrazeneca.vardict.modes; +import com.astrazeneca.vardict.data.Reference; import com.astrazeneca.vardict.data.ReferenceResource; import com.astrazeneca.vardict.collection.ConcurrentHashSet; import com.astrazeneca.vardict.collection.DirectThreadExecutor; @@ -58,7 +59,7 @@ public void notParallel() { list.add(tuple(ampliconNumber, region)); } Scope initialScope = new Scope<>(instance().conf.bam.getBam1(), region, - referenceResource.getReference(region), referenceResource, 0, splice, + tryToGetReference(region), referenceResource, 0, splice, variantPrinter, new InitialData()); CompletableFuture> pipeline = pipeline(initialScope, new DirectThreadExecutor()); @@ -93,8 +94,9 @@ void produceTasks() throws InterruptedException, ExecutionException { list.add(tuple(j, region)); } VariantPrinter variantPrinter = VariantPrinter.createPrinter(instance().printerTypeOut); + Reference reference = tryToGetReference(region); Scope initialScope = new Scope<>(instance().conf.bam.getBam1(), region, - referenceResource.getReference(region), referenceResource, 0, splice, + reference, referenceResource, 0, splice, variantPrinter, new InitialData()); CompletableFuture> pipeline = pipeline(initialScope, executor); diff --git a/src/main/java/com/astrazeneca/vardict/modes/SimpleMode.java b/src/main/java/com/astrazeneca/vardict/modes/SimpleMode.java index 08c4eb8..19d610c 100644 --- a/src/main/java/com/astrazeneca/vardict/modes/SimpleMode.java +++ b/src/main/java/com/astrazeneca/vardict/modes/SimpleMode.java @@ -95,9 +95,9 @@ public OutputStream call() { * @param out variant printer used for output */ private void processBamInPipeline(Region region, VariantPrinter out) { - Reference ref = referenceResource.getReference(region); + Reference reference = tryToGetReference(region); Scope initialScope = new Scope<>(instance().conf.bam.getBam1(), region, - ref, referenceResource, 0, new HashSet<>(), + reference, referenceResource, 0, new HashSet<>(), out, new InitialData()); CompletableFuture> pipeline = pipeline(initialScope, new DirectThreadExecutor()); diff --git a/src/main/java/com/astrazeneca/vardict/modes/SomaticMode.java b/src/main/java/com/astrazeneca/vardict/modes/SomaticMode.java index 36b1f4f..2465b8f 100644 --- a/src/main/java/com/astrazeneca/vardict/modes/SomaticMode.java +++ b/src/main/java/com/astrazeneca/vardict/modes/SomaticMode.java @@ -45,7 +45,7 @@ public void notParallel() { for (List list : segments) { for (Region region : list) { final Set splice = new ConcurrentHashSet<>(); - Reference ref = referenceResource.getReference(region); + Reference ref = tryToGetReference(region); processBothBamsInPipeline(variantPrinter, region, splice, ref); } } @@ -62,7 +62,7 @@ void produceTasks() throws InterruptedException, ExecutionException { for (List list : segments) { for (Region region : list) { final Set splice = new ConcurrentHashSet<>(); - Reference ref1 = referenceResource.getReference(region); + Reference ref1 = tryToGetReference(region); Future f2 = executor.submit(new SomdictWorker(region, splice, ref1)); toPrint.put(f2); } diff --git a/src/main/java/com/astrazeneca/vardict/modes/SplicingMode.java b/src/main/java/com/astrazeneca/vardict/modes/SplicingMode.java index b08548d..b790eea 100644 --- a/src/main/java/com/astrazeneca/vardict/modes/SplicingMode.java +++ b/src/main/java/com/astrazeneca/vardict/modes/SplicingMode.java @@ -51,9 +51,9 @@ public void notParallel() { * @param out variant printer used for output */ private void processRegion(Region region, VariantPrinter out) { - Reference ref = referenceResource.getReference(region); + Reference reference = tryToGetReference(region); Scope initialScope = new Scope<>(instance().conf.bam.getBam1(), region, - ref, referenceResource, 0, new HashSet<>(), + reference, referenceResource, 0, new HashSet<>(), out, new InitialData()); CompletableFuture> pipeline = splicingPipeline(initialScope, new DirectThreadExecutor());