Skip to content

Commit

Permalink
Add handling of exceptions for fasta and BED: in multithreading mode …
Browse files Browse the repository at this point in the history
…and sinlge modes.
  • Loading branch information
PolinaBevad committed Apr 12, 2019
1 parent e2995c9 commit 8b72904
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 12 deletions.
4 changes: 4 additions & 0 deletions src/main/java/com/astrazeneca/vardict/VarDictLauncher.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.astrazeneca.vardict.data.ReferenceResource;
import com.astrazeneca.vardict.data.Region;
import com.astrazeneca.vardict.data.scopedata.GlobalReadOnlyScope;
import com.astrazeneca.vardict.exception.RegionMissedSourceException;
import com.astrazeneca.vardict.modes.*;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
Expand Down Expand Up @@ -73,6 +74,9 @@ public void start(Configuration config) {
*/
private void initResources(Configuration conf) {
try {
if (conf.regionOfInterest == null && conf.bed == null) {
throw new RegionMissedSourceException();
}
Map<String, Integer> chrLengths = readChr(conf.bam.getBamX());
Tuple.Tuple2<String, String> samples;
if ((conf.regionOfInterest != null) && (conf.bam.hasBam2())) {
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/astrazeneca/vardict/data/Patterns.java
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,7 @@ public class Patterns {
public static final jregex.Pattern BEGIN_NUM_S_OR_BEGIN_NUM_H = new jregex.Pattern("^(\\d+)S|^\\d+H");
public static final jregex.Pattern END_NUM_S_OR_NUM_H = new jregex.Pattern("(\\d+)S$|H$");

//Exception patterns
public static final Pattern UNABLE_FIND_CONTIG = Pattern.compile("Unable to find entry for contig");
public static final Pattern WRONG_START_OR_END = Pattern.compile("Malformed query");
}
23 changes: 19 additions & 4 deletions src/main/java/com/astrazeneca/vardict/data/ReferenceResource.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package com.astrazeneca.vardict.data;

import com.astrazeneca.vardict.Configuration;
import com.astrazeneca.vardict.exception.RegionBoundariesException;
import com.astrazeneca.vardict.exception.WrongFastaOrBamException;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequence;

Expand All @@ -9,6 +12,8 @@
import java.time.LocalDateTime;
import java.util.*;

import static com.astrazeneca.vardict.data.Patterns.UNABLE_FIND_CONTIG;
import static com.astrazeneca.vardict.data.Patterns.WRONG_START_OR_END;
import static com.astrazeneca.vardict.data.scopedata.GlobalReadOnlyScope.instance;
import static com.astrazeneca.vardict.Utils.substr;

Expand Down Expand Up @@ -43,10 +48,20 @@ synchronized private IndexedFastaSequenceFile fetchFasta(String file) {
* @return array of nucleotide bases in the region of fasta
*/
public String[] retrieveSubSeq(String fasta, String chr, int start, int end) {
IndexedFastaSequenceFile idx = fetchFasta(fasta);
ReferenceSequence seq = idx.getSubsequenceAt(chr, start, end);
byte[] bases = seq.getBases();
return new String[] { ">" + chr + ":" + start + "-" + end, bases != null ? new String(bases) : "" };
try {
IndexedFastaSequenceFile idx = fetchFasta(fasta);
ReferenceSequence seq = idx.getSubsequenceAt(chr, start, end);
byte[] bases = seq.getBases();
return new String[]{">" + chr + ":" + start + "-" + end, bases != null ? new String(bases) : ""};
} catch (SAMException e){
if (UNABLE_FIND_CONTIG.matcher(e.getMessage()).find()){
throw new WrongFastaOrBamException(chr, e);
} else if (WRONG_START_OR_END.matcher(e.getMessage()).find()){
throw new RegionBoundariesException(chr, start, end, e);
} else {
throw e;
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.astrazeneca.vardict.exception;


import java.util.Locale;

public class RegionBoundariesException extends RuntimeException {
public final static String RegionBoundariesExceptionMessage = "The region %s:%d-%d is wrong. " +
"We have problem while reading it, possible the start is after the end of the region or " +
"the fasta doesn't contain this region.";

public RegionBoundariesException(String chr, int start, int end, Throwable e) {
super(String.format(Locale.US, RegionBoundariesExceptionMessage, chr, start, end) , e);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.astrazeneca.vardict.exception;


public class RegionMissedSourceException extends RuntimeException {
public final static String RegionSourceMissedMessage = "The required BED file or region missed, please, set it " +
"with path to BED or with -R option.";

public RegionMissedSourceException() {
super(RegionSourceMissedMessage);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.astrazeneca.vardict.exception;


import java.util.Locale;

public class WrongFastaOrBamException extends RuntimeException {
public final static String WrongFastaOrBamExceptionMeassage = "The name of this chromosome \"%s\" is missing in your" +
" fasta file. Please be sure that chromosome names in BAM, fasta and BED are in correspondence " +
"with each other and you use correct fasta for your BAM (can be checked in BAM header).";

public WrongFastaOrBamException(String chr, Throwable e) {
super(String.format(Locale.US, WrongFastaOrBamExceptionMeassage, chr), e);
}
}
9 changes: 9 additions & 0 deletions src/main/java/com/astrazeneca/vardict/modes/AbstractMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,13 @@ void process() {
*/
public abstract void printHeader();

public Reference tryToGetReference(Region region) {
Reference reference = new Reference();
try {
reference = referenceResource.getReference(region);
} catch (Exception ex) {
stopVardictWithException(region, ex);
}
return reference;
}
}
6 changes: 4 additions & 2 deletions src/main/java/com/astrazeneca/vardict/modes/AmpliconMode.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.astrazeneca.vardict.modes;

import com.astrazeneca.vardict.data.Reference;
import com.astrazeneca.vardict.data.ReferenceResource;
import com.astrazeneca.vardict.collection.ConcurrentHashSet;
import com.astrazeneca.vardict.collection.DirectThreadExecutor;
Expand Down Expand Up @@ -58,7 +59,7 @@ public void notParallel() {
list.add(tuple(ampliconNumber, region));
}
Scope<InitialData> initialScope = new Scope<>(instance().conf.bam.getBam1(), region,
referenceResource.getReference(region), referenceResource, 0, splice,
tryToGetReference(region), referenceResource, 0, splice,
variantPrinter, new InitialData());
CompletableFuture<Scope<AlignedVarsData>> pipeline = pipeline(initialScope,
new DirectThreadExecutor());
Expand Down Expand Up @@ -93,8 +94,9 @@ void produceTasks() throws InterruptedException, ExecutionException {
list.add(tuple(j, region));
}
VariantPrinter variantPrinter = VariantPrinter.createPrinter(instance().printerTypeOut);
Reference reference = tryToGetReference(region);
Scope<InitialData> initialScope = new Scope<>(instance().conf.bam.getBam1(), region,
referenceResource.getReference(region), referenceResource, 0, splice,
reference, referenceResource, 0, splice,
variantPrinter, new InitialData());

CompletableFuture<Scope<AlignedVarsData>> pipeline = pipeline(initialScope, executor);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/astrazeneca/vardict/modes/SimpleMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ public OutputStream call() {
* @param out variant printer used for output
*/
private void processBamInPipeline(Region region, VariantPrinter out) {
Reference ref = referenceResource.getReference(region);
Reference reference = tryToGetReference(region);
Scope<InitialData> initialScope = new Scope<>(instance().conf.bam.getBam1(), region,
ref, referenceResource, 0, new HashSet<>(),
reference, referenceResource, 0, new HashSet<>(),
out, new InitialData());

CompletableFuture<Scope<AlignedVarsData>> pipeline = pipeline(initialScope, new DirectThreadExecutor());
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/astrazeneca/vardict/modes/SomaticMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public void notParallel() {
for (List<Region> list : segments) {
for (Region region : list) {
final Set<String> splice = new ConcurrentHashSet<>();
Reference ref = referenceResource.getReference(region);
Reference ref = tryToGetReference(region);
processBothBamsInPipeline(variantPrinter, region, splice, ref);
}
}
Expand All @@ -62,7 +62,7 @@ void produceTasks() throws InterruptedException, ExecutionException {
for (List<Region> list : segments) {
for (Region region : list) {
final Set<String> splice = new ConcurrentHashSet<>();
Reference ref1 = referenceResource.getReference(region);
Reference ref1 = tryToGetReference(region);
Future<OutputStream> f2 = executor.submit(new SomdictWorker(region, splice, ref1));
toPrint.put(f2);
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/astrazeneca/vardict/modes/SplicingMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ public void notParallel() {
* @param out variant printer used for output
*/
private void processRegion(Region region, VariantPrinter out) {
Reference ref = referenceResource.getReference(region);
Reference reference = tryToGetReference(region);
Scope<InitialData> initialScope = new Scope<>(instance().conf.bam.getBam1(), region,
ref, referenceResource, 0, new HashSet<>(),
reference, referenceResource, 0, new HashSet<>(),
out, new InitialData());

CompletableFuture<Scope<VariationData>> pipeline = splicingPipeline(initialScope, new DirectThreadExecutor());
Expand Down

0 comments on commit 8b72904

Please sign in to comment.