From 231be5a935e312ae8b05677067e9aca998b065a8 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 6 Jan 2020 14:03:10 -0500 Subject: [PATCH 1/9] Adding a new GATKTool level argument to control which if any output variants are filtered. --- .../hellbender/engine/GATKTool.java | 55 +++++-- .../tools/walkers/GenotypeGVCFs.java | 52 +++---- .../gnarlyGenotyper/GnarlyGenotyper.java | 28 +--- .../writers/IntervalFilteringVcfWriter.java | 140 ++++++++++++++++++ .../engine/GatkToolIntegrationTest.java | 73 +++++++++ .../walkers/GenotypeGVCFsIntegrationTest.java | 12 +- .../GnarlyGenotyperIntegrationTest.java | 9 +- .../variantutils/ReblockGVCFUnitTest.java | 7 +- .../variant/writers/GVCFWriterUnitTest.java | 82 +++------- .../IntervalFilteringVcfWriterUnitTest.java | 91 ++++++++++++ .../writers/SomaticGVCFWriterUnitTest.java | 6 +- .../utils/variant/writers/MockVcfWriter.java | 42 ++++++ 12 files changed, 458 insertions(+), 139 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java create mode 100644 src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java create mode 100644 src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index fc15bcd9425..5607e56d33b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -16,8 +16,12 @@ import java.util.*; import java.util.stream.Stream; + +import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineException; +import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; @@ -45,6 +49,11 @@ import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; + +//TODO: +//UserException overloads +//VCF outs /** * Base class for all GATK tools. Tool authors that want to write a "GATK" tool but not use one of @@ -127,6 +136,14 @@ public abstract class GATKTool extends CommandLineProgram { doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) public boolean outputSitesOnlyVCFs = false; + public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE = "variant-output-interval-filtering-mode"; + @Argument(fullName = VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, + doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", + optional = true) + @Advanced + public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); + + /** * Master sequence dictionary to be used instead of all other dictionaries (if provided). */ @@ -417,6 +434,13 @@ public int getDefaultCloudIndexPrefetchBufferSize() { */ public String getProgressMeterRecordLabel() { return ProgressMeter.DEFAULT_RECORD_LABEL; } + /** + * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. + */ + public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ + return null; + } + protected List transformTraversalIntervals(final List getIntervals, final SAMSequenceDictionary sequenceDictionary) { return getIntervals; } @@ -600,7 +624,7 @@ public boolean requiresIntervals() { /** * Does this tool want to disable the progress meter? If so, override here to return true - * + * * @return true if this tools wants to disable progress meter output, otherwise false */ public boolean disableProgressMeter() { @@ -727,12 +751,16 @@ protected void onStartup() { initializeIntervals(); // Must be initialized after reference, reads and features, since intervals currently require a sequence dictionary from another data source - if ( seqValidationArguments.performSequenceDictionaryValidation()) { + if (seqValidationArguments.performSequenceDictionaryValidation()) { validateSequenceDictionaries(); } checkToolRequirements(); + if (outputVariantIntervalFilteringMode != null && userIntervals == null){ + throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + VARIANT_OUTPUT_INTERVAL_FILTERING_MODE + " was specified."); + } + initializeProgressMeter(getProgressMeterRecordLabel()); } @@ -911,20 +939,27 @@ public VariantContextWriter createVCFWriter(final Path outPath) { if (outputSitesOnlyVCFs) { options.add(Options.DO_NOT_WRITE_GENOTYPES); } - + final VariantContextWriter unfilteredWriter; if (maxVariantsPerShard > 0) { - return new ShardingVCFWriter( + unfilteredWriter = new ShardingVCFWriter( outPath, maxVariantsPerShard, sequenceDictionary, createOutputVariantMD5, - options.toArray(new Options[options.size()])); + options.toArray(new Options[0])); + } else { + unfilteredWriter = GATKVariantContextUtils.createVCFWriter( + outPath, + sequenceDictionary, + createOutputVariantMD5, + options.toArray(new Options[0])); } - return GATKVariantContextUtils.createVCFWriter( - outPath, - sequenceDictionary, - createOutputVariantMD5, - options.toArray(new Options[options.size()])); + + return outputVariantIntervalFilteringMode== null ? + unfilteredWriter : + new IntervalFilteringVcfWriter(unfilteredWriter, + intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()), + outputVariantIntervalFilteringMode); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java index cec6dbec129..bdb98b48574 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java @@ -7,7 +7,10 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; -import org.broadinstitute.barclay.argparser.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineException; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKReadFilterPluginDescriptor; @@ -15,6 +18,7 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; @@ -29,11 +33,22 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection; -import org.broadinstitute.hellbender.utils.*; +import org.broadinstitute.hellbender.utils.GenomeLoc; +import org.broadinstitute.hellbender.utils.GenomeLocParser; +import org.broadinstitute.hellbender.utils.GenomeLocSortedSet; +import org.broadinstitute.hellbender.utils.IntervalMergingRule; +import org.broadinstitute.hellbender.utils.IntervalSetRule; +import org.broadinstitute.hellbender.utils.IntervalUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; import java.util.stream.Collectors; /** @@ -114,7 +129,7 @@ public final class GenotypeGVCFs extends VariantLocusWalker { /** * Import all data between specified intervals. Improves performance using large lists of intervals, as in exome * sequencing, especially if GVCF data only exists for specified intervals. Use with - * --only-output-calls-starting-in-intervals if input GVCFs contain calls outside the specified intervals. + * --{@value GATKTool#VARIANT_OUTPUT_INTERVAL_FILTERING_MODE} if input GVCFs contain calls outside the specified intervals. */ @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, @@ -155,16 +170,6 @@ public final class GenotypeGVCFs extends VariantLocusWalker { @ArgumentCollection private GenomicsDBArgumentCollection genomicsdbArgs = new GenomicsDBArgumentCollection(); - /** - * This option can only be activated if intervals are specified. - */ - @Advanced - @Argument(fullName= ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, - doc="Restrict variant output to sites that start within provided intervals", - optional=true) - private boolean onlyOutputCallsStartingInIntervals = false; - - @Argument(fullName = FORCE_OUTPUT_INTERVALS_NAME, suppressFileExpansion = true, doc = "sites at which to output genotypes even if non-variant in samples", optional = true) protected final List forceOutputIntervalStrings = new ArrayList<>(); @@ -186,9 +191,6 @@ public final class GenotypeGVCFs extends VariantLocusWalker { private VariantContextWriter vcfWriter; - /** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */ - private List intervals; - private OverlapDetector forceOutputIntervals; private boolean forceOutputIntervalsPresent; @@ -269,15 +271,6 @@ public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - if(onlyOutputCallsStartingInIntervals) { - if( !hasUserSuppliedIntervals()) { - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); - } - } - - intervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : - Collections.emptyList(); - final Collection variantAnnotations = makeVariantAnnotations(); final Set annotationsToKeep = getAnnotationsToKeep(); annotationEngine = new VariantAnnotatorEngine(variantAnnotations, dbsnp.dbsnp, Collections.emptyList(), false, keepCombined, annotationsToKeep); @@ -285,7 +278,7 @@ public void onTraversalStart() { merger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants(), somaticInput, false, true); //methods that cannot be called in engine bc its protected - Set defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); + final Set defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); vcfWriter = createVCFWriter(outputFile); //create engine object @@ -294,7 +287,6 @@ public void onTraversalStart() { //call initialize method in engine class that creates VCFWriter object and writes a header to it vcfWriter = gvcfEngine.setupVCFWriter(defaultToolVCFHeaderLines, keepCombined, dbsnp, vcfWriter); - } private Set getAnnotationsToKeep() { @@ -316,9 +308,7 @@ public void apply(final Locatable loc, List variants, ReadsConte final VariantContext regenotypedVC = gvcfEngine.callRegion(loc, variants, ref, features, merger, somaticInput, tlodThreshold, afTolerance, forceOutput); if (regenotypedVC != null) { - final SimpleInterval variantStart = new SimpleInterval(regenotypedVC.getContig(), regenotypedVC.getStart(), regenotypedVC.getStart()); - if ((forceOutput || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC)) && - (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains (variantStart)))) { + if ((forceOutput || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC))) { vcfWriter.add(regenotypedVC); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java index b6f192cb735..f69fe38061a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java @@ -108,15 +108,6 @@ public final class GnarlyGenotyper extends VariantWalker { @Argument(fullName = "keep-all-sites", doc="Retain low quality and non-variant sites, applying appropriate filters", optional=true) private boolean keepAllSites = false; - /** - * This option can only be activated if intervals are specified. - */ - @Advanced - @Argument(fullName = GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, - doc="Restrict variant output to sites that start within provided intervals", - optional=true) - private boolean onlyOutputCallsStartingInIntervals = false; - @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, doc = "Boolean flag to read in all data in between intervals. Improves performance reading from GenomicsDB " + @@ -145,9 +136,6 @@ public final class GnarlyGenotyper extends VariantWalker { private final RMSMappingQuality mqCalculator = RMSMappingQuality.getInstance(); private final Set> allAlleleSpecificAnnotations = new HashSet<>(); - /** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */ - private List intervals; - @Override public boolean requiresReference() { return true; @@ -182,14 +170,6 @@ protected GenomicsDBOptions getGenomicsDBOptions() { public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - if(onlyOutputCallsStartingInIntervals) { - if( !intervalArgumentCollection.intervalsSpecified()) { - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); - } - } - intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : - Collections.emptyList(); - final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); setupVCFWriter(inputVCFHeader, samples); @@ -260,11 +240,11 @@ private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) { @SuppressWarnings({"unchecked", "rawtypes"}) @Override public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) { - SimpleInterval variantStart = new SimpleInterval(variant.getContig(), variant.getStart(), variant.getStart()); //return early if there's no non-symbolic ALT since GDB already did the merging if ( !variant.isVariant() || !GATKVariantContextUtils.isProperlyPolymorphic(variant) - || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 - || (onlyOutputCallsStartingInIntervals && !intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) { + || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 ) + // todo this changes is a slight de-optimization since we will now process some sites whihc were previously ignored + { if (keepAllSites) { VariantContextBuilder builder = new VariantContextBuilder(mqCalculator.finalizeRawMQ(variant)); //don't fill in QUAL here because there's no alt data builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME); @@ -291,7 +271,7 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r finalizedVC = genotyperEngine.finalizeGenotype(variant); } //could return null if the variant didn't pass the genotyping arg calling/emission threshold - if (finalizedVC != null && (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) { + if (finalizedVC != null) { vcfWriter.add(finalizedVC); } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java new file mode 100644 index 00000000000..6f81da1efd6 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -0,0 +1,140 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.samtools.util.Locatable; +import htsjdk.samtools.util.OverlapDetector; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.List; +import java.util.Set; + +/** + * A {@link VariantContextWriter} decorator which filters out variants that don't match a given set of intervals. + */ +public class IntervalFilteringVcfWriter implements VariantContextWriter { + + /** + * Comparison modes which allow matching intervals in different ways. + */ + public enum Mode { + + /** + * Matches if the query starts within any of the given intervals. + */ + STARTS_IN{ + @Override + boolean test(OverlapDetector detector, final VariantContext query) { + final SimpleInterval startPosition = new SimpleInterval(query.getContig(), query.getStart(), query.getStart()); + return detector.overlapsAny(startPosition); + } + }, + + /** + * Matches if the query ends within any of the given intervals + */ + ENDS_IN{ + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + final SimpleInterval endPosition = new SimpleInterval(query.getContig(), query.getEnd(), query.getEnd()); + return detector.overlapsAny(endPosition); + } + }, + + /** + * Matches if any part of the query overlaps any one of the given intervals + */ + OVERLAPS{ + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + return detector.overlapsAny(query); + } + }, + + /** + * Matches if the entirety of the query is contained within one of the intervals + */ + CONTAINED { + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + final Set overlaps = detector.getOverlaps(query); + for( final Locatable loc : overlaps){ + if(loc.contains(query)){ + return true; + } + } + return false; + } + }, + + /** + * Always matches, may be used to not perform any filtering, alternatively a + */ + ANYWHERE { + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + return true; + } + }; + + /** + * @param detector The OverlapDetector to compare against + * @param query The variant being tested + * @return true iff the variant matches the given intervals + */ + abstract boolean test(OverlapDetector detector, VariantContext query); + } + + private final VariantContextWriter writer; + private final OverlapDetector detector; + private final Mode mode; + + /** + * @param writer the writer to wrap + * @param intervals the intervals to compare against, note that these are not merged so if they should be merged than the input list should be preprocessed + * @param mode the matching mode to use + */ + public IntervalFilteringVcfWriter(final VariantContextWriter writer, List intervals, Mode mode) { + Utils.nonNull(writer); + Utils.nonEmpty(intervals); + Utils.nonNull(mode); + + this.writer = writer; + this.detector = OverlapDetector.create(intervals); + this.mode = mode; + } + + @Override + public void writeHeader(final VCFHeader header) { + writer.writeHeader(header); + } + + @Override + public void setHeader(final VCFHeader header) { + writer.setHeader(header); + } + + @Override + public void close() { + writer.close(); + } + + @Override + public boolean checkError() { + return writer.checkError(); + } + + /** + * Add the given variant to the writer and output it if it matches. + * @param vc the variant to potentially write + */ + @Override + public void add(final VariantContext vc) { + if(mode.test(detector, vc)) { + writer.add(vc); + } + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java index 7e1164b89c9..6031808e4e0 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -1,18 +1,29 @@ package org.broadinstitute.hellbender.engine; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; +import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.FileExtensions; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.TestProgramGroup; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -91,4 +102,66 @@ public void testSharding() { Assert.assertTrue(Files.exists(Paths.get(firstShard + FileExtensions.COMPRESSED_VCF_INDEX))); Assert.assertTrue(Files.exists(Paths.get(secondShard + FileExtensions.COMPRESSED_VCF_INDEX))); } + + @CommandLineProgramProperties(summary = "testTool which emits specific variants", + oneLineSummary = "Test tool", + programGroup = TestProgramGroup.class) + public static class VariantEmitter extends GATKTool{ + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) + File output; + + @Override + public void traverse() { + //nope + } + + @Override + public void onTraversalStart() { + try(final VariantContextWriter vcfWriter = createVCFWriter(output)){ + vcfWriter.writeHeader(new VCFHeader()); + final VariantContextBuilder vcb = new VariantContextBuilder(); + vcb.alleles("AAAAAA", "A").chr("1"); + + vcfWriter.add(vcb.start(10).stop(15).make()); + vcfWriter.add(vcb.start(100).stop(105).make()); + vcfWriter.add(vcb.start(1000).stop(1005).make()); + vcfWriter.add(vcb.start(10000).stop(10005).make()); + + vcb.chr("2"); + vcfWriter.add(vcb.start(20).stop(25).make()); + vcfWriter.add(vcb.start(200).stop(205).make()); + vcfWriter.add(vcb.start(2000).stop(2005).make()); + vcfWriter.add(vcb.start(20000).stop(20005).make()); + } + } + } + + @DataProvider + public Object[][] getIntervalsAndOverlapMode(){ + return new Object[][]{ + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ANYWHERE, 8}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.OVERLAPS, 6}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.STARTS_IN, 4}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ENDS_IN, 4}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.CONTAINED, 2}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), null, 8}, + }; + } + + @Test(dataProvider = "getIntervalsAndOverlapMode") + public void testVcfOutputFilterMode(List intervals, IntervalFilteringVcfWriter.Mode mode, int variantsIncluded){ + final ArgumentsBuilder args = new ArgumentsBuilder(); + final File out = createTempFile("out", ".vcf"); + args.addOutput(out); + intervals.forEach(args::addInterval); + args.addReference(b37Reference); + if( mode != null) { + args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, mode); + } + + runCommandLine(args, VariantEmitter.class.getSimpleName()); + final Pair> vcfHeaderListPair = VariantContextTestUtils.readEntireVCFIntoMemory(out.toString()); + + Assert.assertEquals(vcfHeaderListPair.getRight().size(), variantsIncluded); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java index 21205489283..bee52cf9aed 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java @@ -15,6 +15,7 @@ import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; @@ -29,6 +30,7 @@ import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.runtime.ProcessController; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -83,7 +85,7 @@ public Object[][] gvcfsToGenotype() { //combine not supported yet, see https://github.com/broadinstitute/gatk/issues/2429 and https://github.com/broadinstitute/gatk/issues/2584 //{"combine.single.sample.pipeline.1.vcf", null, Arrays.asList("-V", getTestFile("combine.single.sample.pipeline.2.vcf").toString() , "-V", getTestFile("combine.single.sample.pipeline.3.vcf").toString()), b37_reference_20_21}, - {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME), b37_reference_20_21}, + {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b37_reference_20_21}, {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionExpected.vcf"), Arrays.asList("-L", "20:69512-69513"), b37_reference_20_21}, {getTestFile(BASE_PAIR_GVCF), getTestFile( BASE_PAIR_EXPECTED), NO_EXTRA_ARGS, b37_reference_20_21}, //base pair level gvcf {getTestFile("testUpdatePGT.gvcf"), getTestFile( "testUpdatePGT.gatk3.7_30_ga4f720357.output.vcf"), NO_EXTRA_ARGS, b37_reference_20_21}, //testUpdatePGT @@ -421,7 +423,7 @@ private void runAndCheckGenomicsDBOutput(final ArgumentsBuilder args, final File } @Test(dataProvider = "getGVCFsForGenomicsDBOverMultipleIntervals") - public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, List intervals, String reference) throws IOException { + public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, List intervals, String reference) { final File tempGenomicsDB = GenomicsDBTestUtils.createTempGenomicsDB(input, intervals, true); final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGenomicsDB); @@ -432,8 +434,8 @@ public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, Li .add("V", genomicsDBUri); args.addOutput(output); intervals.forEach(args::addInterval); - args.addRaw("--" + GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME); - args.addRaw("--only-output-calls-starting-in-intervals"); //note that this will restrict calls to just the specified intervals + args.add(GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, true); + args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); //note that this will restrict calls to just the specified intervals runAndCheckGenomicsDBOutput(args, expected, output); @@ -570,7 +572,7 @@ public void testIntervalsAndOnlyOutputCallsStartingInIntervalsAreMutuallyRequire .addVCF(getTestFile("leadingDeletion.g.vcf")) .addReference(new File(b37_reference_20_21)) .addOutput( createTempFile("tmp",".vcf")) - .add(GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, true); + .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); Assert.assertThrows(CommandLineException.MissingArgument.class, () -> runCommandLine(args)); args.add("L", "20:69512-69513"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index 6f80f8904cf..764467059a8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -10,12 +10,15 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.broadinstitute.hellbender.engine.GATKTool; +import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -68,10 +71,10 @@ public Object[][] getVCFdata() { // Simple Test, spanning deletions; standard calling confidence //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, // Same as above, but with GenomicsDB using BCF2Codec for interchange {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals", "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString(), "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, //lower calling confidence //same as above except (different intervals and) with SNPs with 40 < QUAL < 60 and INDELs with 49 < QUAL < 69 {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, @@ -176,7 +179,7 @@ public void testOnHailOutput() { args.addReference(new File(hg38Reference)) .add("V", input) .add("L", "chr20:10000000-10030000") - .add("only-output-calls-starting-in-intervals", true) + .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN) .add("keep-all-sites", true) .addOutput(outputPath) .add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java index 3a15b204383..8bdc501bf8e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java @@ -24,6 +24,7 @@ import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriterUnitTest; +import org.broadinstitute.hellbender.utils.variant.writers.MockVcfWriter; import org.broadinstitute.hellbender.utils.variant.writers.ReblockingGVCFWriter; import org.broadinstitute.hellbender.utils.variant.writers.ReblockingOptions; import org.testng.Assert; @@ -50,7 +51,7 @@ public void testCleanUpHighQualityVariant() { //We need an annotation engine for cleanUpHighQualityVariant(), but this is just a dummy; annotations won't initialize properly without runCommandLine() reblocker.createAnnotationEngine(); //...and a vcfwriter - reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); + reblocker.vcfWriter = new ReblockingGVCFWriter(new MockVcfWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); reblocker.dropLowQuals = true; reblocker.doQualApprox = true; @@ -93,7 +94,7 @@ public void testCleanUpHighQualityVariant() { @Test public void testLowQualVariantToGQ0HomRef() { final ReblockGVCF reblocker = new ReblockGVCF(); - reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); + reblocker.vcfWriter = new ReblockingGVCFWriter(new MockVcfWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); reblocker.dropLowQuals = true; final Genotype g = VariantContextTestUtils.makeG("sample1", 11, LONG_REF, Allele.NON_REF_ALLELE, 200, 100, 200, 11, 0, 37); @@ -188,7 +189,7 @@ public void testBadCalls() { @Test public void testPosteriors() { final ReblockGVCF reblocker = new ReblockGVCF(); - reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); + reblocker.vcfWriter = new ReblockingGVCFWriter(new MockVcfWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); reblocker.posteriorsKey = "GP"; final GenotypeBuilder gb = new GenotypeBuilder("sample1", Arrays.asList(LONG_REF, LONG_REF)); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java index 9abea785e40..b971a109e8b 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java @@ -37,53 +37,17 @@ public class GVCFWriterUnitTest extends GATKBaseTest { private static final String CHR1 = "1"; private static final String CHR2 = "2"; - private static final Allele REF = Allele.create("G", true); - private static final Allele ALT = Allele.create("A"); - private static final List ALLELES = ImmutableList.of(REF, Allele.NON_REF_ALLELE); + private static final List ALLELES = ImmutableList.of(Allele.REF_G, Allele.NON_REF_ALLELE); private static final String SAMPLE_NAME = "XXYYZZ"; - static public final class MockWriter implements VariantContextWriter { - final List emitted = new ArrayList<>(); - boolean headerWritten = false; - boolean closed = false; - boolean error = false; - boolean headerSet = false; - - @Override - public void writeHeader(VCFHeader header) { - headerSet = true; - headerWritten = true; - } - - @Override - public void close() { - closed = true; - } - - @Override - public boolean checkError() { - return error; - } - - @Override - public void add(VariantContext vc) { - emitted.add(vc); - } - - @Override - public void setHeader(VCFHeader header) { - headerSet = true; - } - } - private static final List standardPartition = ImmutableList.of(1, 10, 20); private static final List highConfLowConf = ImmutableList.of(20,100); @Test public void testHeaderWriting() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.writeHeader(new VCFHeader()); Assert.assertTrue(mockWriter.headerSet); @@ -92,7 +56,7 @@ public void testHeaderWriting() { @Test public void testHeaderSetting(){ - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.setHeader(new VCFHeader()); Assert.assertTrue(mockWriter.headerSet); @@ -101,17 +65,15 @@ public void testHeaderSetting(){ @Test public void testClose() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.close(); Assert.assertTrue(mockWriter.closed); } - - @Test public void testCloseEmitsLastVariant() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -124,7 +86,7 @@ public void testCloseEmitsLastVariant() { @Test public void testCloseDoesntEmitsLastVariantWhenNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeNonRef(CHR1, 1)); @@ -137,7 +99,7 @@ public void testCloseDoesntEmitsLastVariantWhenNonRef() { @Test public void testCrossingContigBoundaryRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -154,7 +116,7 @@ public void testCrossingContigBoundaryRef() { @Test public void testCrossingContigBoundaryToLowerPositionsRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(30)); @@ -171,7 +133,7 @@ public void testCrossingContigBoundaryToLowerPositionsRef() { @Test public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeNonRef(CHR1, 20)); @@ -187,7 +149,7 @@ public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() { @Test public void testCrossingContigBoundaryNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -201,7 +163,7 @@ public void testCrossingContigBoundaryNonRef() { @Test public void testCrossingContigBoundaryNonRefThenNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeNonRef(CHR1, 1)); @@ -250,7 +212,7 @@ private static void assertGoodVC(final VariantContext vc, final String contig, f @Test public void testVariantForcesNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -268,7 +230,7 @@ public void testVariantForcesNonRef() { @Test public void testEmittingTwoBands() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -285,14 +247,14 @@ public void testEmittingTwoBands() { @Test public void testBandingUsingPP() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); int[] PPs1 = {0,63,128}; int[] PPs2 = {0,67,145}; writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10000, 10000, - ALLELES), Arrays.asList(REF, REF), 2, PPs1)); - writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10001, 10001, ALLELES), Arrays.asList(REF, REF), 21, PPs2)); + ALLELES), Arrays.asList(Allele.REF_G, Allele.REF_G), 2, PPs1)); + writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10001, 10001, ALLELES), Arrays.asList(Allele.REF_G, Allele.REF_G), 21, PPs2)); writer.close(); Assert.assertEquals(mockWriter.emitted.size(), 1); assertGoodVCwithPPs(mockWriter.emitted.get(0), CHR1, 10000, 10001, false); @@ -301,7 +263,7 @@ public void testBandingUsingPP() { @Test public void testNonContiguousBlocks() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -316,7 +278,7 @@ public void testNonContiguousBlocks() { @Test public void testInputBlocks() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, highConfLowConf); writer.add(makeHomRef("20", 1, 16, 600)); @@ -328,7 +290,7 @@ public void testInputBlocks() { @Test public void testDeletion() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -347,7 +309,7 @@ public void testDeletion() { @Test public void testHomRefAlt() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -406,7 +368,7 @@ public void testBadPartitionsThrowException(final List partitions){ @Test public void testCheckError(){ - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter gvcfWriter = new GVCFWriter(mockWriter, standardPartition); mockWriter.error = false; Assert.assertEquals(gvcfWriter.checkError(), mockWriter.checkError()); @@ -636,7 +598,7 @@ public void testOverlappingDeletions() { .genotypes(gb.make()).attribute(VCFConstants.END_KEY, 10025); final VariantContext block2 = (new HomRefBlock(vcb.make(), 20, 100, 2).toVariantContext(SAMPLE_NAME, false)); - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, Arrays.asList(20,100)); writer.add(deletion1); writer.add(block2); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java new file mode 100644 index 00000000000..f96e937c7c4 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java @@ -0,0 +1,91 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.samtools.util.OverlapDetector; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class IntervalFilteringVcfWriterUnitTest extends GATKBaseTest { + + + @DataProvider + public Object[][] getIntervalsAndMode(){ + final VariantContext noOverlap = new VariantContextBuilder("test", "1", 200, 300, Arrays.asList(Allele.create(Utils.repeatChars('A', 101), true), Allele.ALT_A)).make(); + final VariantContext contained = new VariantContextBuilder("test", "1", 101, 104, Arrays.asList(Allele.create(Utils.repeatChars('A', 4), true), Allele.ALT_A)).make(); + final VariantContext overlaps = new VariantContextBuilder("test", "1", 90, 120, Arrays.asList(Allele.create(Utils.repeatChars('A', 31), true), Allele.ALT_A)).make(); + final VariantContext startsIn = new VariantContextBuilder("test", "1", 103, 140, Arrays.asList(Allele.create(Utils.repeatChars('A', 38), true), Allele.ALT_A)).make(); + final VariantContext endsIn = new VariantContextBuilder("test", "1", 90, 103, Arrays.asList(Allele.create(Utils.repeatChars('A', 14), true), Allele.ALT_A)).make(); + final VariantContext anotherContig = new VariantContextBuilder("test", "2", 90, 140, Arrays.asList(Allele.create(Utils.repeatChars('A', 51), true), Allele.ALT_A)).make(); + final List vcs = Arrays.asList(noOverlap, contained, overlaps, startsIn, endsIn, anotherContig); + + final SimpleInterval interval = new SimpleInterval("1", 100, 105); + + return new Object[][]{ + // no overlap, contained, overlaps, starts in, ends in, another contig + {interval, vcs, IntervalFilteringVcfWriter.Mode.ANYWHERE, new boolean[]{ true, true, true, true, true, true}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.CONTAINED, new boolean[]{ false, true, false, false, false, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.OVERLAPS, new boolean[]{ false, true, true, true, true, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.STARTS_IN, new boolean[]{ false, true, false, true, false, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.ENDS_IN, new boolean[]{ false, true, false, false, true, false}}, + }; + } + + @Test(dataProvider = "getIntervalsAndMode") + public void testModes(SimpleInterval interval, List vcs, IntervalFilteringVcfWriter.Mode mode, boolean[] expected) { + final OverlapDetector detector = OverlapDetector.create(Collections.singletonList(interval)); + for(int i = 0; i < expected.length; i++){ + Assert.assertEquals(mode.test(detector,vcs.get(i)), expected[i], "mode " + mode + " mismatches at " + i); + } + } + + @Test + public void testHeaderWriting() { + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.writeHeader(new VCFHeader()); + Assert.assertTrue(mockWriter.headerSet); + Assert.assertTrue(mockWriter.headerWritten); + } + + @Test + public void testHeaderSetting(){ + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.setHeader(new VCFHeader()); + Assert.assertTrue(mockWriter.headerSet); + Assert.assertFalse(mockWriter.headerWritten); + } + + @Test + public void testClose() { + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.close(); + Assert.assertTrue(mockWriter.closed); + } + + @Test + public void testCheckError(){ + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + Assert.assertFalse(writer.checkError()); + mockWriter.error = true; + Assert.assertTrue(writer.checkError()); + } + +} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java index 1fb3522a4a6..4b51da5f2fe 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java @@ -22,7 +22,7 @@ public class SomaticGVCFWriterUnitTest { @Test public void testValueBinning() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, standardPartition); //derives partitionPrecision 1 from standardPartition values Assert.assertTrue(writer.convertLODtoInt(2.3) == 23); @@ -46,7 +46,7 @@ public void testValueBinning() { @Test public void testAddingAndMerging() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, standardPartition); final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF, REF)); int pos = 1; @@ -89,7 +89,7 @@ public void testAddingAndMerging() { @Test public void testPrecision() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, precisionTwoPartition); Assert.assertTrue(((SomaticGVCFBlockCombiner)writer.gvcfBlockCombiner).partitionPrecision == 2); diff --git a/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java b/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java new file mode 100644 index 00000000000..4009c710530 --- /dev/null +++ b/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java @@ -0,0 +1,42 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; + +import java.util.ArrayList; +import java.util.List; + +public final class MockVcfWriter implements VariantContextWriter { + final List emitted = new ArrayList<>(); + boolean headerWritten = false; + boolean closed = false; + boolean error = false; + boolean headerSet = false; + + @Override + public void writeHeader(VCFHeader header) { + headerSet = true; + headerWritten = true; + } + + @Override + public void close() { + closed = true; + } + + @Override + public boolean checkError() { + return error; + } + + @Override + public void add(VariantContext vc) { + emitted.add(vc); + } + + @Override + public void setHeader(VCFHeader header) { + headerSet = true; + } +} From e96d43941ebca7a6febea5c1562a6fa8a8b78120 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 13 Dec 2021 12:33:29 -0500 Subject: [PATCH 2/9] adding doc strings --- .../writers/IntervalFilteringVcfWriter.java | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java index 6f81da1efd6..a44370af501 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -5,6 +5,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.barclay.argparser.CommandLineParser; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -19,12 +20,13 @@ public class IntervalFilteringVcfWriter implements VariantContextWriter { /** * Comparison modes which allow matching intervals in different ways. */ - public enum Mode { + public enum Mode implements CommandLineParser.ClpEnum { /** * Matches if the query starts within any of the given intervals. */ - STARTS_IN{ + STARTS_IN("starts within any of the given intervals"){ + @Override boolean test(OverlapDetector detector, final VariantContext query) { final SimpleInterval startPosition = new SimpleInterval(query.getContig(), query.getStart(), query.getStart()); @@ -35,7 +37,7 @@ boolean test(OverlapDetector detector, final VariantContext /** * Matches if the query ends within any of the given intervals */ - ENDS_IN{ + ENDS_IN("ends within any of the given intervals"){ @Override boolean test(final OverlapDetector detector, final VariantContext query) { final SimpleInterval endPosition = new SimpleInterval(query.getContig(), query.getEnd(), query.getEnd()); @@ -46,17 +48,19 @@ boolean test(final OverlapDetector detector, final VariantC /** * Matches if any part of the query overlaps any one of the given intervals */ - OVERLAPS{ + OVERLAPS("overlaps any of the given intervals"){ @Override boolean test(final OverlapDetector detector, final VariantContext query) { return detector.overlapsAny(query); } }, + // TODO finish this exception here... /** - * Matches if the entirety of the query is contained within one of the intervals + * Matches if the entirety of the query is contained within one of the intervals. Note that adjacent intervals + * may be merged into a single interval depending on the values in */ - CONTAINED { + CONTAINED("contained completely within a contiguous block of intervals without overlap") { @Override boolean test(final OverlapDetector detector, final VariantContext query) { final Set overlaps = detector.getOverlaps(query); @@ -70,21 +74,32 @@ boolean test(final OverlapDetector detector, final VariantC }, /** - * Always matches, may be used to not perform any filtering, alternatively a + * Always matches, may be used to not perform any filtering */ - ANYWHERE { + ANYWHERE("no filtering") { @Override boolean test(final OverlapDetector detector, final VariantContext query) { return true; } }; + private final String doc; + /** * @param detector The OverlapDetector to compare against * @param query The variant being tested * @return true iff the variant matches the given intervals */ abstract boolean test(OverlapDetector detector, VariantContext query); + + private Mode(String doc){ + this.doc = doc; + + } + @Override + public String getHelpDoc() { + return doc; + } } private final VariantContextWriter writer; From 578a951094caf0c8dad495e621e350cc148c435a Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 19 Aug 2024 12:53:27 -0400 Subject: [PATCH 3/9] Rebase and add better tests --- .../cmdline/StandardArgumentDefinitions.java | 2 +- .../hellbender/engine/GATKTool.java | 10 +--- .../tools/walkers/GenotypeGVCFs.java | 9 +-- .../gnarlyGenotyper/GnarlyGenotyper.java | 1 - .../writers/IntervalFilteringVcfWriter.java | 7 ++- .../engine/GatkToolIntegrationTest.java | 53 +++++++++++------- .../walkers/GenotypeGVCFsIntegrationTest.java | 7 +-- .../GnarlyGenotyperIntegrationTest.java | 8 +-- .../SelectVariantsIntegrationTest.java | 31 +++++++--- .../SelectVariants/complexExample1.vcf.idx | Bin 249 -> 433 bytes ...estSelectVariants_onlyOutputInInterval.vcf | 26 +++++++++ .../hellbender/testutils/BaseTest.java | 6 +- 12 files changed, 102 insertions(+), 58 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_onlyOutputInInterval.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java index 1c8596eb91b..9789c978c6b 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java @@ -46,7 +46,7 @@ private StandardArgumentDefinitions(){} public static final String INVALIDATE_PREVIOUS_FILTERS_LONG_NAME = "invalidate-previous-filters"; public static final String SORT_ORDER_LONG_NAME = "sort-order"; public static final String FLOW_ORDER_FOR_ANNOTATIONS = "flow-order-for-annotations"; - + public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME = "variant-output-interval-filtering-mode"; public static final String INPUT_SHORT_NAME = "I"; public static final String OUTPUT_SHORT_NAME = "O"; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 5607e56d33b..824522af0a1 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -21,7 +21,6 @@ import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; import org.broadinstitute.barclay.argparser.CommandLineException; -import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; @@ -51,10 +50,6 @@ import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter; import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; -//TODO: -//UserException overloads -//VCF outs - /** * Base class for all GATK tools. Tool authors that want to write a "GATK" tool but not use one of * the pre-packaged Walker traversals should feel free to extend this class directly. All other @@ -136,8 +131,7 @@ public abstract class GATKTool extends CommandLineProgram { doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) public boolean outputSitesOnlyVCFs = false; - public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE = "variant-output-interval-filtering-mode"; - @Argument(fullName = VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, + @Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", optional = true) @Advanced @@ -758,7 +752,7 @@ protected void onStartup() { checkToolRequirements(); if (outputVariantIntervalFilteringMode != null && userIntervals == null){ - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + VARIANT_OUTPUT_INTERVAL_FILTERING_MODE + " was specified."); + throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME + " was specified."); } initializeProgressMeter(getProgressMeterRecordLabel()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java index bdb98b48574..c58e7a32ad3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java @@ -10,6 +10,7 @@ import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; import org.broadinstitute.barclay.argparser.CommandLineException; +import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; @@ -18,7 +19,6 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.FeatureContext; -import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; @@ -41,13 +41,14 @@ import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -129,7 +130,7 @@ public final class GenotypeGVCFs extends VariantLocusWalker { /** * Import all data between specified intervals. Improves performance using large lists of intervals, as in exome * sequencing, especially if GVCF data only exists for specified intervals. Use with - * --{@value GATKTool#VARIANT_OUTPUT_INTERVAL_FILTERING_MODE} if input GVCFs contain calls outside the specified intervals. + * --{@value StandardArgumentDefinitions#VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME} if input GVCFs contain calls outside the specified intervals. */ @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, @@ -271,7 +272,7 @@ public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - final Collection variantAnnotations = makeVariantAnnotations(); + final Collection variantAnnotations = makeVariantAnnotations(); final Set annotationsToKeep = getAnnotationsToKeep(); annotationEngine = new VariantAnnotatorEngine(variantAnnotations, dbsnp.dbsnp, Collections.emptyList(), false, keepCombined, annotationsToKeep); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java index f69fe38061a..9537cb28634 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java @@ -243,7 +243,6 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r //return early if there's no non-symbolic ALT since GDB already did the merging if ( !variant.isVariant() || !GATKVariantContextUtils.isProperlyPolymorphic(variant) || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 ) - // todo this changes is a slight de-optimization since we will now process some sites whihc were previously ignored { if (keepAllSites) { VariantContextBuilder builder = new VariantContextBuilder(mqCalculator.finalizeRawMQ(variant)); //don't fill in QUAL here because there's no alt data diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java index a44370af501..2e979ebfa03 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -28,7 +28,7 @@ public enum Mode implements CommandLineParser.ClpEnum { STARTS_IN("starts within any of the given intervals"){ @Override - boolean test(OverlapDetector detector, final VariantContext query) { + boolean test(final OverlapDetector detector, final VariantContext query) { final SimpleInterval startPosition = new SimpleInterval(query.getContig(), query.getStart(), query.getStart()); return detector.overlapsAny(startPosition); } @@ -90,12 +90,13 @@ boolean test(final OverlapDetector detector, final VariantC * @param query The variant being tested * @return true iff the variant matches the given intervals */ - abstract boolean test(OverlapDetector detector, VariantContext query); + abstract boolean test(final OverlapDetector detector, final VariantContext query); private Mode(String doc){ this.doc = doc; } + @Override public String getHelpDoc() { return doc; @@ -111,7 +112,7 @@ public String getHelpDoc() { * @param intervals the intervals to compare against, note that these are not merged so if they should be merged than the input list should be preprocessed * @param mode the matching mode to use */ - public IntervalFilteringVcfWriter(final VariantContextWriter writer, List intervals, Mode mode) { + public IntervalFilteringVcfWriter(final VariantContextWriter writer, final List intervals, final Mode mode) { Utils.nonNull(writer); Utils.nonEmpty(intervals); Utils.nonNull(mode); diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java index 6031808e4e0..04e3308575c 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -14,10 +14,10 @@ import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.TestProgramGroup; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.BaseTest; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; -import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; @@ -107,6 +107,17 @@ public void testSharding() { oneLineSummary = "Test tool", programGroup = TestProgramGroup.class) public static class VariantEmitter extends GATKTool{ + static final SimpleInterval INT1 = new SimpleInterval("1",10, 15); + static final SimpleInterval INT2 = new SimpleInterval("1",100, 105); + static final SimpleInterval INT3 = new SimpleInterval("1",1000, 1005); + static final SimpleInterval INT4 = new SimpleInterval("1",10000, 10005); + static final SimpleInterval INT5 = new SimpleInterval("2",20, 25); + static final SimpleInterval INT6 = new SimpleInterval("2",200, 205); + static final SimpleInterval INT7 = new SimpleInterval("2",2000, 2005); + static final SimpleInterval INT8 = new SimpleInterval("2",20000, 20005); + + static final List INTERVALS = List.of(INT1, INT2, INT3, INT4, INT5, INT6, INT7, INT8); + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) File output; @@ -122,46 +133,48 @@ public void onTraversalStart() { final VariantContextBuilder vcb = new VariantContextBuilder(); vcb.alleles("AAAAAA", "A").chr("1"); - vcfWriter.add(vcb.start(10).stop(15).make()); - vcfWriter.add(vcb.start(100).stop(105).make()); - vcfWriter.add(vcb.start(1000).stop(1005).make()); - vcfWriter.add(vcb.start(10000).stop(10005).make()); - - vcb.chr("2"); - vcfWriter.add(vcb.start(20).stop(25).make()); - vcfWriter.add(vcb.start(200).stop(205).make()); - vcfWriter.add(vcb.start(2000).stop(2005).make()); - vcfWriter.add(vcb.start(20000).stop(20005).make()); + for(final Locatable interval : INTERVALS){ + vcfWriter.add(vcb.loc(interval.getContig(),interval.getStart(), interval.getEnd()).make()); + } } } } @DataProvider public Object[][] getIntervalsAndOverlapMode(){ + final SimpleInterval chr1Interval = new SimpleInterval("1", 101, 10001); + final SimpleInterval chr2Interval = new SimpleInterval("2", 201, 20001); return new Object[][]{ - {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ANYWHERE, 8}, - {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.OVERLAPS, 6}, - {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.STARTS_IN, 4}, - {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ENDS_IN, 4}, - {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.CONTAINED, 2}, - {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), null, 8}, + {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.ANYWHERE, VariantEmitter.INTERVALS }, + {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.OVERLAPS, List.of(VariantEmitter.INT2, VariantEmitter.INT3, VariantEmitter.INT4, VariantEmitter.INT6, VariantEmitter.INT7, VariantEmitter.INT8)}, + {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT3, VariantEmitter.INT4, VariantEmitter.INT7, VariantEmitter.INT8)}, + {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.ENDS_IN, List.of(VariantEmitter.INT2, VariantEmitter.INT3, VariantEmitter.INT6, VariantEmitter.INT7)}, + {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT3, VariantEmitter.INT7)}, + {Arrays.asList(chr1Interval, chr2Interval), null, VariantEmitter.INTERVALS}, }; } @Test(dataProvider = "getIntervalsAndOverlapMode") - public void testVcfOutputFilterMode(List intervals, IntervalFilteringVcfWriter.Mode mode, int variantsIncluded){ + public void testVcfOutputFilterMode(List intervals, IntervalFilteringVcfWriter.Mode mode, List expected){ final ArgumentsBuilder args = new ArgumentsBuilder(); final File out = createTempFile("out", ".vcf"); args.addOutput(out); intervals.forEach(args::addInterval); args.addReference(b37Reference); if( mode != null) { - args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, mode); + args.add(StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, mode); } runCommandLine(args, VariantEmitter.class.getSimpleName()); final Pair> vcfHeaderListPair = VariantContextTestUtils.readEntireVCFIntoMemory(out.toString()); - Assert.assertEquals(vcfHeaderListPair.getRight().size(), variantsIncluded); + final List actual = vcfHeaderListPair.getRight(); + Assert.assertEquals(actual.size(), expected.size()); + BaseTest.assertCondition(actual, expected, (left, right) -> { + Assert.assertEquals(left.getContig(), right.getContig()); + Assert.assertEquals(left.getStart(), right.getStart()); + Assert.assertEquals(left.getEnd(), right.getEnd()); + } ); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java index bee52cf9aed..d55b9de6f7a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java @@ -15,7 +15,6 @@ import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; -import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; @@ -85,7 +84,7 @@ public Object[][] gvcfsToGenotype() { //combine not supported yet, see https://github.com/broadinstitute/gatk/issues/2429 and https://github.com/broadinstitute/gatk/issues/2584 //{"combine.single.sample.pipeline.1.vcf", null, Arrays.asList("-V", getTestFile("combine.single.sample.pipeline.2.vcf").toString() , "-V", getTestFile("combine.single.sample.pipeline.3.vcf").toString()), b37_reference_20_21}, - {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b37_reference_20_21}, + {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+ StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b37_reference_20_21}, {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionExpected.vcf"), Arrays.asList("-L", "20:69512-69513"), b37_reference_20_21}, {getTestFile(BASE_PAIR_GVCF), getTestFile( BASE_PAIR_EXPECTED), NO_EXTRA_ARGS, b37_reference_20_21}, //base pair level gvcf {getTestFile("testUpdatePGT.gvcf"), getTestFile( "testUpdatePGT.gatk3.7_30_ga4f720357.output.vcf"), NO_EXTRA_ARGS, b37_reference_20_21}, //testUpdatePGT @@ -435,7 +434,7 @@ public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, Li args.addOutput(output); intervals.forEach(args::addInterval); args.add(GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, true); - args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); //note that this will restrict calls to just the specified intervals + args.add(StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN); //note that this will restrict calls to just the specified intervals runAndCheckGenomicsDBOutput(args, expected, output); @@ -572,7 +571,7 @@ public void testIntervalsAndOnlyOutputCallsStartingInIntervalsAreMutuallyRequire .addVCF(getTestFile("leadingDeletion.g.vcf")) .addReference(new File(b37_reference_20_21)) .addOutput( createTempFile("tmp",".vcf")) - .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); + .add(StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN); Assert.assertThrows(CommandLineException.MissingArgument.class, () -> runCommandLine(args)); args.add("L", "20:69512-69513"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index 764467059a8..8e2c52c0b7f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -10,8 +10,6 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureDataSource; -import org.broadinstitute.hellbender.engine.GATKTool; -import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; @@ -71,10 +69,10 @@ public Object[][] getVCFdata() { // Simple Test, spanning deletions; standard calling confidence //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+ StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, // Same as above, but with GenomicsDB using BCF2Codec for interchange {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString(), "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+ StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString(), "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, //lower calling confidence //same as above except (different intervals and) with SNPs with 40 < QUAL < 60 and INDELs with 49 < QUAL < 69 {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, @@ -179,7 +177,7 @@ public void testOnHailOutput() { args.addReference(new File(hg38Reference)) .add("V", input) .add("L", "chr20:10000000-10030000") - .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN) + .add(StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN) .add("keep-all-sites", true) .addOutput(outputPath) .add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java index 3664e551a1b..cfe959e15e2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -17,6 +17,7 @@ import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -101,17 +102,17 @@ public void testRepeatedLineSelectionAndExludeFiltered() throws IOException { } @Test - public void testComplexSelection() throws IOException { - final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; - final String samplesFile = getToolTestDataDir() + "samples.args"; + public void testComplexSelection() throws IOException { + final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; + final String samplesFile = getToolTestDataDir() + "samples.args"; - final IntegrationTestSpec spec = new IntegrationTestSpec( - baseTestString(" -sn NA11894 -se 'NA069*' -sn " + samplesFile + " -select 'RMSMAPQ < 170.0'", testFile), - Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ComplexSelection.vcf") - ); + final IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString(" -sn NA11894 -se 'NA069*' -sn " + samplesFile + " -select 'RMSMAPQ < 170.0'", testFile), + Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ComplexSelection.vcf") + ); - spec.executeTest("testComplexSelection--" + testFile, this); - } + spec.executeTest("testComplexSelection--" + testFile, this); + } /** * When input variants are untrimmed, they can be trimmed by select variants, which may change their order. @@ -1263,4 +1264,16 @@ public void testManyAllelicWithoutPLsDoesntFreeze() { .addOutput(output); runCommandLine(args); } + + @Test + public void testOutputOnlyStartingInInterval() throws IOException { + final String testFile = getToolTestDataDir() + "complexExample1.vcf"; + + final IntegrationTestSpec spec = new IntegrationTestSpec( + baseTestString("-L 1:10045604-10046982 " + + "--"+ StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME + " " + IntervalFilteringVcfWriter.Mode.STARTS_IN, testFile) , + Collections.singletonList(getToolTestDataDir() + "expected/testSelectVariants_onlyOutputInInterval.vcf") + ); + spec.executeTest("test that variant interval output filter mode is applied", this); + } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/complexExample1.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/complexExample1.vcf.idx index 94ed147bb2a32d711ddb94f9bae05523742d435c..57e11bb4defdbb99e4a75f2dbc981c9ee21c2e32 100644 GIT binary patch literal 433 zcmb`D!A`?442F$>3lic@>ZYaJ%B|W0Okzday1}hVJ8x7A&6H$gPr!kPK)eG_#G8-^ zwgYD@*}rAkzdpGb4jzvH;MnD&k#vQKQ$yOIRGY?B_@MO4tVK<@6n2G1*J#OD)MV5~ z*JM!XC01IAdD9x(*o`H8A}K3s=cLgpB@Mm^x$>r4p&QY=(=l Oc<9el-~FHNKk*HiHD+M| literal 249 zcmWIXbctYOU|?VdV*QN#+*JM2yvz#yqMX#S%)C_n^x{O_(!Ao5#H5^5{eq&*vcwXg zU}8zOZb^QAPO&absJJLuza+J|M87DtIKQ+gIki|nIX|}`C$+-0A`wIz>Xjv@F+Ak~ mS_1?RHb~C&0a*(JAhsa`!vcN=E+C5qDhyQ(Q$!US<^%u>@GKPo diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_onlyOutputInInterval.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_onlyOutputInInterval.vcf new file mode 100644 index 00000000000..e54b7e2a90c --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/expected/testSelectVariants_onlyOutputInInterval.vcf @@ -0,0 +1,26 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##reference=human_b36_both.fasta +##source=ArbitrarySource +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +1 10045604 . A ACAT 41.26 PASS AF=0.50 GT:DP:GQ 1/1:14:7 0/0:15:8 1/0:15:8 +1 10046982 testid0;testid1 C T 32.01 PASS AF=0.40;AlleleBalance=0.37;DoC=21;HomopolymerRun=0;MAPQ0=8;NS=1;RMSMAPQ=12.31;SB=-4.2;SpanningDeletions=0 GT:DP:GQ 1/0:3:49 ./. ./. diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java index c70660e9b2f..14f6366773b 100644 --- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java +++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java @@ -514,9 +514,9 @@ public static void assertContains(String actual, String expectedSubstring){ Assert.assertTrue(actual.contains(expectedSubstring), expectedSubstring +" was not found in " + actual + "."); } - public static void assertCondition(Iterable actual, Iterable expected, BiConsumer assertion){ - final Iterator iterActual = actual.iterator(); - final Iterator iterExpected = expected.iterator(); + public static void assertCondition(Iterable actual, Iterable expected, BiConsumer assertion){ + final Iterator iterActual = actual.iterator(); + final Iterator iterExpected = expected.iterator(); while(iterActual.hasNext() && iterExpected.hasNext()){ assertion.accept(iterActual.next(), iterExpected.next()); } From 62e36c5f5fa752732d038b45f11b743969c0f61b Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 19 Aug 2024 14:14:05 -0400 Subject: [PATCH 4/9] fix test --- .../tools/walkers/GnarlyGenotyperIntegrationTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index 8e2c52c0b7f..b9cc98baf41 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -57,14 +57,14 @@ public Object[][] getVCFdata() { //chrX haploid sample plus diploid sample -- expected results validated with vcf-validator (samtools?) {new File[]{getTestFile("NA12891.chrX.haploid.rb.g.vcf"), getTestFile("NA12892.chrX.diploid.rb.g.vcf")}, - getTestFile("haploidPlusDiploid.expected.vcf"), null, Arrays.asList(new SimpleInterval("chrX", 1000000, 5000000)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, + getTestFile("haploidPlusDiploid.expected.vcf"), null, Arrays.asList(new SimpleInterval("chrX", 1000000, 5000000)), Arrays.asList("--merge-input-intervals", "--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN), b38_reference_20_21}, //8 ALT alleles -- no PLs {new File[]{getTestFile("sample6.vcf"), getTestFile("sample7.vcf"), getTestFile("sample8.vcf"), getTestFile("sample9.vcf")}, - getTestFile("lotsOfAltsNoPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, + getTestFile("lotsOfAltsNoPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals", "--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN), b38_reference_20_21}, //6 ALT alleles -- yes PLs {new File[]{getTestFile("sample6.vcf"), getTestFile("sample7.vcf"), getTestFile("sample8.vcf")}, - getTestFile("lotsOfAltsYesPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, + getTestFile("lotsOfAltsYesPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals","--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN), b38_reference_20_21}, // Simple Test, spanning deletions; standard calling confidence //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes From cec1af17ad2ac702c350a57e8571d2cab673aa73 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 19 Aug 2024 14:15:25 -0400 Subject: [PATCH 5/9] fix fix test --- .../tools/walkers/GnarlyGenotyperIntegrationTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index b9cc98baf41..b3942729084 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -57,14 +57,14 @@ public Object[][] getVCFdata() { //chrX haploid sample plus diploid sample -- expected results validated with vcf-validator (samtools?) {new File[]{getTestFile("NA12891.chrX.haploid.rb.g.vcf"), getTestFile("NA12892.chrX.diploid.rb.g.vcf")}, - getTestFile("haploidPlusDiploid.expected.vcf"), null, Arrays.asList(new SimpleInterval("chrX", 1000000, 5000000)), Arrays.asList("--merge-input-intervals", "--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN), b38_reference_20_21}, + getTestFile("haploidPlusDiploid.expected.vcf"), null, Arrays.asList(new SimpleInterval("chrX", 1000000, 5000000)), Arrays.asList("--merge-input-intervals", "--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, //8 ALT alleles -- no PLs {new File[]{getTestFile("sample6.vcf"), getTestFile("sample7.vcf"), getTestFile("sample8.vcf"), getTestFile("sample9.vcf")}, - getTestFile("lotsOfAltsNoPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals", "--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN), b38_reference_20_21}, + getTestFile("lotsOfAltsNoPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals", "--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, //6 ALT alleles -- yes PLs {new File[]{getTestFile("sample6.vcf"), getTestFile("sample7.vcf"), getTestFile("sample8.vcf")}, - getTestFile("lotsOfAltsYesPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals","--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN), b38_reference_20_21}, + getTestFile("lotsOfAltsYesPLs.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 257008, 257008)), Arrays.asList("--merge-input-intervals","--" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, // Simple Test, spanning deletions; standard calling confidence //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes From 27beb5564ecc57e9173e64f75a0ced8fc8fb4027 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 17 Sep 2024 14:35:18 -0400 Subject: [PATCH 6/9] Updating the branch to reviewer comments and added some slightly more useful tests --- .../cmdline/StandardArgumentDefinitions.java | 2 +- .../hellbender/engine/GATKTool.java | 19 ++--- .../hellbender/engine/VariantWalker.java | 24 ++++++ .../writers/IntervalFilteringVcfWriter.java | 37 ++++++--- .../engine/GatkToolIntegrationTest.java | 79 ++++++++++++++++--- .../HaplotypeCallerIntegrationTest.java | 2 +- 6 files changed, 127 insertions(+), 36 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java index 9789c978c6b..ad1bb00a10a 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java @@ -46,7 +46,7 @@ private StandardArgumentDefinitions(){} public static final String INVALIDATE_PREVIOUS_FILTERS_LONG_NAME = "invalidate-previous-filters"; public static final String SORT_ORDER_LONG_NAME = "sort-order"; public static final String FLOW_ORDER_FOR_ANNOTATIONS = "flow-order-for-annotations"; - public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME = "variant-output-interval-filtering-mode"; + public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME = "variant-output-filtering"; public static final String INPUT_SHORT_NAME = "I"; public static final String OUTPUT_SHORT_NAME = "O"; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 824522af0a1..24906a9504b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -131,13 +131,6 @@ public abstract class GATKTool extends CommandLineProgram { doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) public boolean outputSitesOnlyVCFs = false; - @Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, - doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", - optional = true) - @Advanced - public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); - - /** * Master sequence dictionary to be used instead of all other dictionaries (if provided). */ @@ -429,10 +422,10 @@ public int getDefaultCloudIndexPrefetchBufferSize() { public String getProgressMeterRecordLabel() { return ProgressMeter.DEFAULT_RECORD_LABEL; } /** - * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. + * @return null for no output filtering of variants to the variant writer. Subclasses may override this to enforce other filtering schemes. */ - public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ - return null; + public IntervalFilteringVcfWriter.Mode getVariantFilteringOutputModeIfApplicable(){ + return IntervalFilteringVcfWriter.Mode.ANYWHERE; } protected List transformTraversalIntervals(final List getIntervals, final SAMSequenceDictionary sequenceDictionary) { @@ -751,7 +744,7 @@ protected void onStartup() { checkToolRequirements(); - if (outputVariantIntervalFilteringMode != null && userIntervals == null){ + if ((getVariantFilteringOutputModeIfApplicable() != IntervalFilteringVcfWriter.Mode.ANYWHERE ) && userIntervals == null){ throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME + " was specified."); } @@ -949,11 +942,11 @@ public VariantContextWriter createVCFWriter(final Path outPath) { options.toArray(new Options[0])); } - return outputVariantIntervalFilteringMode== null ? + return getVariantFilteringOutputModeIfApplicable() == IntervalFilteringVcfWriter.Mode.ANYWHERE ? unfilteredWriter : new IntervalFilteringVcfWriter(unfilteredWriter, intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()), - outputVariantIntervalFilteringMode); + getVariantFilteringOutputModeIfApplicable()); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java index 678136d7a68..9fe8369ab41 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java @@ -3,11 +3,13 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.filters.CountingReadFilter; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import java.util.Spliterator; @@ -31,6 +33,28 @@ public abstract class VariantWalker extends VariantWalkerBase { private FeatureDataSource drivingVariants; private FeatureInput drivingVariantsFeatureInput; + @Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, + doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", + optional = true) + @Advanced + public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); + + /** + * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. + */ + public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ + return IntervalFilteringVcfWriter.Mode.ANYWHERE; + } + + @Override + public IntervalFilteringVcfWriter.Mode getVariantFilteringOutputModeIfApplicable() { + if (outputVariantIntervalFilteringMode != null) { + return outputVariantIntervalFilteringMode; + } else { + return super.getVariantFilteringOutputModeIfApplicable(); + } + } + @Override protected SAMSequenceDictionary getSequenceDictionaryForDrivingVariants() { return drivingVariants.getSequenceDictionary(); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java index 2e979ebfa03..e0f0d1e2ac5 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -5,6 +5,8 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.CommandLineParser; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -32,6 +34,8 @@ boolean test(final OverlapDetector detector, final VariantC final SimpleInterval startPosition = new SimpleInterval(query.getContig(), query.getStart(), query.getStart()); return detector.overlapsAny(startPosition); } + @Override + String getName() {return "STARTS_IN";} }, /** @@ -43,6 +47,8 @@ boolean test(final OverlapDetector detector, final VariantC final SimpleInterval endPosition = new SimpleInterval(query.getContig(), query.getEnd(), query.getEnd()); return detector.overlapsAny(endPosition); } + @Override + String getName() {return "ENDS_IN";} }, /** @@ -53,12 +59,13 @@ boolean test(final OverlapDetector detector, final VariantC boolean test(final OverlapDetector detector, final VariantContext query) { return detector.overlapsAny(query); } + @Override + String getName() {return "OVERLAPS";} }, - // TODO finish this exception here... /** * Matches if the entirety of the query is contained within one of the intervals. Note that adjacent intervals - * may be merged into a single interval depending on the values in + * may be merged into a single interval depending specified "--interval-merging-rule". */ CONTAINED("contained completely within a contiguous block of intervals without overlap") { @Override @@ -71,6 +78,8 @@ boolean test(final OverlapDetector detector, final VariantC } return false; } + @Override + String getName() {return "CONTAINED";} }, /** @@ -81,6 +90,8 @@ boolean test(final OverlapDetector detector, final VariantC boolean test(final OverlapDetector detector, final VariantContext query) { return true; } + @Override + String getName() {return "ANYWHERE";} }; private final String doc; @@ -91,6 +102,7 @@ boolean test(final OverlapDetector detector, final VariantC * @return true iff the variant matches the given intervals */ abstract boolean test(final OverlapDetector detector, final VariantContext query); + abstract String getName(); private Mode(String doc){ this.doc = doc; @@ -103,9 +115,11 @@ public String getHelpDoc() { } } - private final VariantContextWriter writer; + private final VariantContextWriter underlyingWriter; private final OverlapDetector detector; private final Mode mode; + private static int filteredCount = 0; + protected final Logger logger = LogManager.getLogger(this.getClass()); /** * @param writer the writer to wrap @@ -117,29 +131,32 @@ public IntervalFilteringVcfWriter(final VariantContextWriter writer, final List< Utils.nonEmpty(intervals); Utils.nonNull(mode); - this.writer = writer; + this.underlyingWriter = writer; this.detector = OverlapDetector.create(intervals); this.mode = mode; } @Override public void writeHeader(final VCFHeader header) { - writer.writeHeader(header); + underlyingWriter.writeHeader(header); } @Override public void setHeader(final VCFHeader header) { - writer.setHeader(header); + underlyingWriter.setHeader(header); } @Override public void close() { - writer.close(); + underlyingWriter.close(); + if (filteredCount > 0) { + logger.info("Removed " + filteredCount + " variants from the output according to '"+mode.getName()+"' variant interval filtering rule."); + } } @Override public boolean checkError() { - return writer.checkError(); + return underlyingWriter.checkError(); } /** @@ -149,7 +166,9 @@ public boolean checkError() { @Override public void add(final VariantContext vc) { if(mode.test(detector, vc)) { - writer.add(vc); + underlyingWriter.add(vc); + } else { + filteredCount++; } } diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java index 04e3308575c..ff453b89a28 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -1,13 +1,16 @@ package org.broadinstitute.hellbender.engine; +import com.google.errorprone.annotations.Var; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.FileExtensions; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.CommandLineProgramTest; @@ -32,6 +35,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -115,8 +119,9 @@ public static class VariantEmitter extends GATKTool{ static final SimpleInterval INT6 = new SimpleInterval("2",200, 205); static final SimpleInterval INT7 = new SimpleInterval("2",2000, 2005); static final SimpleInterval INT8 = new SimpleInterval("2",20000, 20005); + static final SimpleInterval INT9_INS = new SimpleInterval("3",20000, 20000); - static final List INTERVALS = List.of(INT1, INT2, INT3, INT4, INT5, INT6, INT7, INT8); + static final List INTERVALS = List.of(INT1, INT2, INT3, INT4, INT5, INT6, INT7, INT8, INT9_INS); @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) File output; @@ -125,16 +130,34 @@ public static class VariantEmitter extends GATKTool{ public void traverse() { //nope } + @Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, + doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", + optional = true) + @Advanced + public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = IntervalFilteringVcfWriter.Mode.ANYWHERE; + + @Override + public IntervalFilteringVcfWriter.Mode getVariantFilteringOutputModeIfApplicable() { + if (outputVariantIntervalFilteringMode != null) { + return outputVariantIntervalFilteringMode; + } else { + return super.getVariantFilteringOutputModeIfApplicable(); + } + } @Override public void onTraversalStart() { try(final VariantContextWriter vcfWriter = createVCFWriter(output)){ vcfWriter.writeHeader(new VCFHeader()); - final VariantContextBuilder vcb = new VariantContextBuilder(); - vcb.alleles("AAAAAA", "A").chr("1"); - for(final Locatable interval : INTERVALS){ - vcfWriter.add(vcb.loc(interval.getContig(),interval.getStart(), interval.getEnd()).make()); + final VariantContextBuilder vcb = new VariantContextBuilder(); + if (interval.getEnd()==interval.getStart()) { + vcb.alleles("A", "AAAAAAAAAA"); + vcfWriter.add(vcb.chr(interval.getContig()).start(interval.getStart()).computeEndFromAlleles(Arrays.asList(Allele.create("A", true), Allele.create("AAAAAAAAAA")) ,interval.getStart()).make()); + } else { + vcb.alleles("AAAAAA", "A").chr("1"); + vcfWriter.add(vcb.loc(interval.getContig(),interval.getStart(), interval.getEnd()).make()); + } } } } @@ -144,23 +167,55 @@ public void onTraversalStart() { public Object[][] getIntervalsAndOverlapMode(){ final SimpleInterval chr1Interval = new SimpleInterval("1", 101, 10001); final SimpleInterval chr2Interval = new SimpleInterval("2", 201, 20001); + final SimpleInterval chr1IntervalLeft = new SimpleInterval("1", 99, 102); + final SimpleInterval chr1IntervalRight = new SimpleInterval("1", 103, 110); + final SimpleInterval chr1IntervalNonAbutting = new SimpleInterval("1", 104, 110); + + final SimpleInterval chr1Interval99 = new SimpleInterval("1", 99, 110); + final SimpleInterval chr1Interval100 = new SimpleInterval("1", 100, 110); + final SimpleInterval chr1Interval101 = new SimpleInterval("1", 101, 110); + + final SimpleInterval chr3Interval99 = new SimpleInterval("3", 19999, 19999); + final SimpleInterval chr3Interval100 = new SimpleInterval("3", 20000, 20000); + final SimpleInterval chr3Interval101 = new SimpleInterval("3", 20001, 20001); + return new Object[][]{ - {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.ANYWHERE, VariantEmitter.INTERVALS }, - {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.OVERLAPS, List.of(VariantEmitter.INT2, VariantEmitter.INT3, VariantEmitter.INT4, VariantEmitter.INT6, VariantEmitter.INT7, VariantEmitter.INT8)}, - {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT3, VariantEmitter.INT4, VariantEmitter.INT7, VariantEmitter.INT8)}, - {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.ENDS_IN, List.of(VariantEmitter.INT2, VariantEmitter.INT3, VariantEmitter.INT6, VariantEmitter.INT7)}, - {Arrays.asList(chr1Interval, chr2Interval), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT3, VariantEmitter.INT7)}, - {Arrays.asList(chr1Interval, chr2Interval), null, VariantEmitter.INTERVALS}, + {Arrays.asList(chr1Interval, chr2Interval), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.ANYWHERE, VariantEmitter.INTERVALS }, + {Arrays.asList(chr1Interval, chr2Interval), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.OVERLAPS, List.of(VariantEmitter.INT2, VariantEmitter.INT3, VariantEmitter.INT4, VariantEmitter.INT6, VariantEmitter.INT7, VariantEmitter.INT8)}, + {Arrays.asList(chr1Interval, chr2Interval), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT3, VariantEmitter.INT4, VariantEmitter.INT7, VariantEmitter.INT8)}, + {Arrays.asList(chr1Interval, chr2Interval), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.ENDS_IN, List.of(VariantEmitter.INT2, VariantEmitter.INT3, VariantEmitter.INT6, VariantEmitter.INT7)}, + {Arrays.asList(chr1Interval, chr2Interval), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT3, VariantEmitter.INT7)}, + + // Tests specifically aimed at documenting how the --interval-merging-rule argument works in conjunction with the interval filtering + {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, // Default is to merge all + {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), Arrays.asList("--interval-merging-rule", "ALL"), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), Arrays.asList("--interval-merging-rule", "OVERLAPPING_ONLY"), IntervalFilteringVcfWriter.Mode.CONTAINED, new ArrayList<>()}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalNonAbutting), Arrays.asList("--interval-merging-rule", "OVERLAPPING_ONLY"), IntervalFilteringVcfWriter.Mode.CONTAINED, new ArrayList<>()}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalNonAbutting), Arrays.asList("--interval-merging-rule", "OVERLAPPING_ONLY", "--interval-padding", "10"), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, + + // Demonstrating the exact behavior of the starts_in/ends_in modes + {Arrays.asList(chr1Interval99), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT2)}, + {Arrays.asList(chr1Interval100), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT2)}, //deletion where left base is at 100 + {Arrays.asList(chr1Interval101), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, new ArrayList<>()}, + + // Deomstrating the behavior for starts/ends_in with an insertion + {Arrays.asList(chr3Interval99), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, new ArrayList<>()}, + {Arrays.asList(chr3Interval100), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT9_INS)}, + {Arrays.asList(chr3Interval101), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, new ArrayList<>()}, + {Arrays.asList(chr3Interval99), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.ENDS_IN, new ArrayList<>()}, + {Arrays.asList(chr3Interval100), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.ENDS_IN, List.of(VariantEmitter.INT9_INS)}, + {Arrays.asList(chr3Interval101), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.ENDS_IN, new ArrayList<>()}, }; } @Test(dataProvider = "getIntervalsAndOverlapMode") - public void testVcfOutputFilterMode(List intervals, IntervalFilteringVcfWriter.Mode mode, List expected){ + public void testVcfOutputFilterMode(List intervals, List extraArguments, IntervalFilteringVcfWriter.Mode mode, List expected){ final ArgumentsBuilder args = new ArgumentsBuilder(); final File out = createTempFile("out", ".vcf"); args.addOutput(out); intervals.forEach(args::addInterval); args.addReference(b37Reference); + extraArguments.forEach(args::addRaw); if( mode != null) { args.add(StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, mode); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 1e059b444b5..3a3f592bbcc 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -88,7 +88,7 @@ public void testVCFModeIsConsistentWithPastResults(final String inputFileName, f final String[] args = { "-I", inputFileName, "-R", referenceFileName, - "-L", "20:10000000-10100000", + "-L", "20:10097437-10097439","-L", "20:10097430-10097435", "-O", outputPath, "-pairHMM", "AVX_LOGLESS_CACHING", "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false" From edd6df29a622892f7317ebaf5b6e31069b355f68 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 17 Sep 2024 16:22:56 -0400 Subject: [PATCH 7/9] fixing a hole where the rain gets in --- .../hellbender/engine/GATKTool.java | 11 ++++---- .../hellbender/engine/VariantWalker.java | 24 ------------------ .../hellbender/engine/VariantWalkerBase.java | 25 +++++++++++++++++++ .../writers/IntervalFilteringVcfWriter.java | 4 +-- .../engine/GatkToolIntegrationTest.java | 14 +++++------ 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 24906a9504b..3aff1d48b98 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -17,7 +17,6 @@ import java.util.stream.Stream; -import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; import org.broadinstitute.barclay.argparser.CommandLineException; @@ -424,7 +423,7 @@ public int getDefaultCloudIndexPrefetchBufferSize() { /** * @return null for no output filtering of variants to the variant writer. Subclasses may override this to enforce other filtering schemes. */ - public IntervalFilteringVcfWriter.Mode getVariantFilteringOutputModeIfApplicable(){ + public IntervalFilteringVcfWriter.Mode getVariantOutputFilteringMode(){ return IntervalFilteringVcfWriter.Mode.ANYWHERE; } @@ -744,8 +743,8 @@ protected void onStartup() { checkToolRequirements(); - if ((getVariantFilteringOutputModeIfApplicable() != IntervalFilteringVcfWriter.Mode.ANYWHERE ) && userIntervals == null){ - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME + " was specified."); + if ((getVariantOutputFilteringMode() != IntervalFilteringVcfWriter.Mode.ANYWHERE ) && userIntervals == null){ + throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME + " was specified or if the tool uses interval filtering."); } initializeProgressMeter(getProgressMeterRecordLabel()); @@ -942,11 +941,11 @@ public VariantContextWriter createVCFWriter(final Path outPath) { options.toArray(new Options[0])); } - return getVariantFilteringOutputModeIfApplicable() == IntervalFilteringVcfWriter.Mode.ANYWHERE ? + return getVariantOutputFilteringMode() == IntervalFilteringVcfWriter.Mode.ANYWHERE ? unfilteredWriter : new IntervalFilteringVcfWriter(unfilteredWriter, intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()), - getVariantFilteringOutputModeIfApplicable()); + getVariantOutputFilteringMode()); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java index 9fe8369ab41..678136d7a68 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalker.java @@ -3,13 +3,11 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; -import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.filters.CountingReadFilter; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import java.util.Spliterator; @@ -33,28 +31,6 @@ public abstract class VariantWalker extends VariantWalkerBase { private FeatureDataSource drivingVariants; private FeatureInput drivingVariantsFeatureInput; - @Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, - doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", - optional = true) - @Advanced - public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); - - /** - * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. - */ - public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ - return IntervalFilteringVcfWriter.Mode.ANYWHERE; - } - - @Override - public IntervalFilteringVcfWriter.Mode getVariantFilteringOutputModeIfApplicable() { - if (outputVariantIntervalFilteringMode != null) { - return outputVariantIntervalFilteringMode; - } else { - return super.getVariantFilteringOutputModeIfApplicable(); - } - } - @Override protected SAMSequenceDictionary getSequenceDictionaryForDrivingVariants() { return drivingVariants.getSequenceDictionary(); } diff --git a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java index 08ae9ade223..78d1c2d4d7b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java @@ -3,12 +3,16 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.barclay.argparser.Advanced; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.filters.CountingVariantFilter; import org.broadinstitute.hellbender.engine.filters.VariantFilter; import org.broadinstitute.hellbender.engine.filters.VariantFilterLibrary; import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions; import org.broadinstitute.hellbender.transformers.VariantTransformer; import org.broadinstitute.hellbender.utils.IndexUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import java.util.Spliterator; import java.util.stream.Stream; @@ -33,6 +37,11 @@ public abstract class VariantWalkerBase extends WalkerBase { * queries on the driving variants). */ public static final int DEFAULT_DRIVING_VARIANTS_LOOKAHEAD_BASES = 100_000; + @Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, + doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", + optional = true) + @Advanced + public IntervalFilteringVcfWriter.Mode userOutputVariantIntervalFilteringMode = null; //Various options for reading from a GenomicsDB protected GenomicsDBOptions genomicsDBOptions; @@ -103,6 +112,22 @@ public SAMSequenceDictionary getBestAvailableSequenceDictionary() { */ public abstract VCFHeader getHeaderForVariants(); + /** + * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. + */ + public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ + return IntervalFilteringVcfWriter.Mode.ANYWHERE; + } + + @Override + public IntervalFilteringVcfWriter.Mode getVariantOutputFilteringMode() { + if (userOutputVariantIntervalFilteringMode != null) { + return userOutputVariantIntervalFilteringMode; + } else { + return super.getVariantOutputFilteringMode(); + } + } + /** * Return the primary sequence dictionary to be used for the driving variants for this tool. The value returned * will usually have been prepared in {@link #initializeDrivingVariants} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java index e0f0d1e2ac5..6f23dbc8f72 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -65,7 +65,7 @@ boolean test(final OverlapDetector detector, final VariantC /** * Matches if the entirety of the query is contained within one of the intervals. Note that adjacent intervals - * may be merged into a single interval depending specified "--interval-merging-rule". + * may be merged into a single interval depending on the specified "--interval-merging-rule". */ CONTAINED("contained completely within a contiguous block of intervals without overlap") { @Override @@ -148,10 +148,10 @@ public void setHeader(final VCFHeader header) { @Override public void close() { - underlyingWriter.close(); if (filteredCount > 0) { logger.info("Removed " + filteredCount + " variants from the output according to '"+mode.getName()+"' variant interval filtering rule."); } + underlyingWriter.close(); } @Override diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java index ff453b89a28..5a3e90d2bf3 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.engine; -import com.google.errorprone.annotations.Var; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.FileExtensions; @@ -16,6 +15,7 @@ import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.TestProgramGroup; +import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.BaseTest; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; @@ -137,11 +137,11 @@ public void traverse() { public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = IntervalFilteringVcfWriter.Mode.ANYWHERE; @Override - public IntervalFilteringVcfWriter.Mode getVariantFilteringOutputModeIfApplicable() { + public IntervalFilteringVcfWriter.Mode getVariantOutputFilteringMode() { if (outputVariantIntervalFilteringMode != null) { return outputVariantIntervalFilteringMode; } else { - return super.getVariantFilteringOutputModeIfApplicable(); + return super.getVariantOutputFilteringMode(); } } @@ -188,10 +188,10 @@ public Object[][] getIntervalsAndOverlapMode(){ // Tests specifically aimed at documenting how the --interval-merging-rule argument works in conjunction with the interval filtering {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, // Default is to merge all - {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), Arrays.asList("--interval-merging-rule", "ALL"), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, - {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), Arrays.asList("--interval-merging-rule", "OVERLAPPING_ONLY"), IntervalFilteringVcfWriter.Mode.CONTAINED, new ArrayList<>()}, - {Arrays.asList(chr1IntervalLeft, chr1IntervalNonAbutting), Arrays.asList("--interval-merging-rule", "OVERLAPPING_ONLY"), IntervalFilteringVcfWriter.Mode.CONTAINED, new ArrayList<>()}, - {Arrays.asList(chr1IntervalLeft, chr1IntervalNonAbutting), Arrays.asList("--interval-merging-rule", "OVERLAPPING_ONLY", "--interval-padding", "10"), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), Arrays.asList("--"+IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, "ALL"), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalRight), Arrays.asList("--"+IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, "OVERLAPPING_ONLY"), IntervalFilteringVcfWriter.Mode.CONTAINED, new ArrayList<>()}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalNonAbutting), Arrays.asList("--"+IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, "OVERLAPPING_ONLY"), IntervalFilteringVcfWriter.Mode.CONTAINED, new ArrayList<>()}, + {Arrays.asList(chr1IntervalLeft, chr1IntervalNonAbutting), Arrays.asList("--"+IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, "OVERLAPPING_ONLY", "--interval-padding", "10"), IntervalFilteringVcfWriter.Mode.CONTAINED, List.of(VariantEmitter.INT2)}, // Demonstrating the exact behavior of the starts_in/ends_in modes {Arrays.asList(chr1Interval99), new ArrayList<>(), IntervalFilteringVcfWriter.Mode.STARTS_IN, List.of(VariantEmitter.INT2)}, From 2104c0e3147621adf36613082893089afc86fd75 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 18 Sep 2024 11:19:30 -0400 Subject: [PATCH 8/9] forgot to revert a HC test for testing --- .../walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 3a3f592bbcc..1e059b444b5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -88,7 +88,7 @@ public void testVCFModeIsConsistentWithPastResults(final String inputFileName, f final String[] args = { "-I", inputFileName, "-R", referenceFileName, - "-L", "20:10097437-10097439","-L", "20:10097430-10097435", + "-L", "20:10000000-10100000", "-O", outputPath, "-pairHMM", "AVX_LOGLESS_CACHING", "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false" From fa1f0ee7333a65efd324ec69cb10d383435ce21d Mon Sep 17 00:00:00 2001 From: James Date: Wed, 18 Sep 2024 14:34:38 -0400 Subject: [PATCH 9/9] quick fix changes --- .../org/broadinstitute/hellbender/engine/GATKTool.java | 2 +- .../hellbender/engine/VariantWalkerBase.java | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 3aff1d48b98..f4d529ebf4b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -421,7 +421,7 @@ public int getDefaultCloudIndexPrefetchBufferSize() { public String getProgressMeterRecordLabel() { return ProgressMeter.DEFAULT_RECORD_LABEL; } /** - * @return null for no output filtering of variants to the variant writer. Subclasses may override this to enforce other filtering schemes. + * @return default value does no filtering. Override to change how variants are filtered against the intervals for your tools. */ public IntervalFilteringVcfWriter.Mode getVariantOutputFilteringMode(){ return IntervalFilteringVcfWriter.Mode.ANYWHERE; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java index 78d1c2d4d7b..a80760ee37c 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/VariantWalkerBase.java @@ -112,18 +112,12 @@ public SAMSequenceDictionary getBestAvailableSequenceDictionary() { */ public abstract VCFHeader getHeaderForVariants(); - /** - * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. - */ - public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ - return IntervalFilteringVcfWriter.Mode.ANYWHERE; - } - @Override public IntervalFilteringVcfWriter.Mode getVariantOutputFilteringMode() { if (userOutputVariantIntervalFilteringMode != null) { return userOutputVariantIntervalFilteringMode; } else { + // Use whatever is the default provided by GATKTool return super.getVariantOutputFilteringMode(); } }