-
Notifications
You must be signed in to change notification settings - Fork 590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding new argument to control variant output interval filtering. #6388
Changes from 5 commits
231be5a
e96d439
578a951
62e36c5
cec1af1
27beb55
edd6df2
2104c0e
fa1f0ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,11 @@ | |
import java.util.*; | ||
import java.util.stream.Stream; | ||
|
||
|
||
import org.broadinstitute.barclay.argparser.Advanced; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.ArgumentCollection; | ||
import org.broadinstitute.barclay.argparser.CommandLineException; | ||
import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor; | ||
import org.broadinstitute.hellbender.cmdline.CommandLineProgram; | ||
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; | ||
|
@@ -45,6 +48,7 @@ | |
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; | ||
import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter; | ||
import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; | ||
|
||
/** | ||
* Base class for all GATK tools. Tool authors that want to write a "GATK" tool but not use one of | ||
|
@@ -127,6 +131,13 @@ public abstract class GATKTool extends CommandLineProgram { | |
doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) | ||
public boolean outputSitesOnlyVCFs = false; | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME, | ||
doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", | ||
optional = true) | ||
@Advanced | ||
public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this actually be moved into the VariantWalker codepaths so its not getting applied to EVERY GATKTool even those that are going to cause confusion if its there? |
||
|
||
|
||
/** | ||
* Master sequence dictionary to be used instead of all other dictionaries (if provided). | ||
*/ | ||
|
@@ -417,6 +428,13 @@ public int getDefaultCloudIndexPrefetchBufferSize() { | |
*/ | ||
public String getProgressMeterRecordLabel() { return ProgressMeter.DEFAULT_RECORD_LABEL; } | ||
|
||
/** | ||
* @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. | ||
*/ | ||
public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ | ||
return null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SHould this default to "ANYWHERE" instead? |
||
} | ||
|
||
protected List<SimpleInterval> transformTraversalIntervals(final List<SimpleInterval> getIntervals, final SAMSequenceDictionary sequenceDictionary) { | ||
return getIntervals; | ||
} | ||
|
@@ -600,7 +618,7 @@ public boolean requiresIntervals() { | |
|
||
/** | ||
* Does this tool want to disable the progress meter? If so, override here to return true | ||
* | ||
* | ||
* @return true if this tools wants to disable progress meter output, otherwise false | ||
*/ | ||
public boolean disableProgressMeter() { | ||
|
@@ -727,12 +745,16 @@ protected void onStartup() { | |
|
||
initializeIntervals(); // Must be initialized after reference, reads and features, since intervals currently require a sequence dictionary from another data source | ||
|
||
if ( seqValidationArguments.performSequenceDictionaryValidation()) { | ||
if (seqValidationArguments.performSequenceDictionaryValidation()) { | ||
validateSequenceDictionaries(); | ||
} | ||
|
||
checkToolRequirements(); | ||
|
||
if (outputVariantIntervalFilteringMode != null && userIntervals == null){ | ||
throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + StandardArgumentDefinitions.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE_LONG_NAME + " was specified."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a tool overrides the default here its going to spit this message if the user doesn't provide intervals at all which i think is confusing about this message.... We should probably disambiguate the default and user supplied here... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good point. I'm not sure it's addressed by your changes though? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well pulling this down into variant walker base makes this a little more obnoxious to fix... I'm just going to add a comment clarifying but this is a head scratcher |
||
} | ||
|
||
initializeProgressMeter(getProgressMeterRecordLabel()); | ||
} | ||
|
||
|
@@ -911,20 +933,27 @@ public VariantContextWriter createVCFWriter(final Path outPath) { | |
if (outputSitesOnlyVCFs) { | ||
options.add(Options.DO_NOT_WRITE_GENOTYPES); | ||
} | ||
|
||
final VariantContextWriter unfilteredWriter; | ||
if (maxVariantsPerShard > 0) { | ||
return new ShardingVCFWriter( | ||
unfilteredWriter = new ShardingVCFWriter( | ||
outPath, | ||
maxVariantsPerShard, | ||
sequenceDictionary, | ||
createOutputVariantMD5, | ||
options.toArray(new Options[options.size()])); | ||
options.toArray(new Options[0])); | ||
} else { | ||
unfilteredWriter = GATKVariantContextUtils.createVCFWriter( | ||
jamesemery marked this conversation as resolved.
Show resolved
Hide resolved
|
||
outPath, | ||
sequenceDictionary, | ||
createOutputVariantMD5, | ||
options.toArray(new Options[0])); | ||
} | ||
return GATKVariantContextUtils.createVCFWriter( | ||
outPath, | ||
sequenceDictionary, | ||
createOutputVariantMD5, | ||
options.toArray(new Options[options.size()])); | ||
|
||
return outputVariantIntervalFilteringMode== null ? | ||
unfilteredWriter : | ||
new IntervalFilteringVcfWriter(unfilteredWriter, | ||
intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()), | ||
outputVariantIntervalFilteringMode); | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This name is a mess, does anyone have suggestions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps just "variant-interval-filtering"?