Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,12 @@ tasks.register('testWithDefaultReference', Test) {
}
}

tasks.register('testWithOptimisticVCF4_4', Test) {
tasks.register('testWithOptimisticVCF4_x', Test) {
description = "Run tests with optimistic VCF 4.4 reading"
jvmArgs += '-Dsamjdk.optimistic_vcf_4_4=true'
jvmArgs += '-Dsamjdk.optimistic_vcf_4_x=true'

useTestNG {
includeGroups "optimistic_vcf_4_4"
includeGroups "optimistic_vcf_4_x"
}
}

Expand All @@ -141,12 +141,12 @@ test {

useTestNG {
if (OperatingSystem.current().isUnix()) {
excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "sra", "ena"
excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_x", "ftp", "http", "sra", "ena"
} else {
excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "sra", "ena", "unix"
excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_x", "ftp", "http", "sra", "ena", "unix"
}
}
} dependsOn testWithDefaultReference, testWithOptimisticVCF4_4
} dependsOn testWithDefaultReference, testWithOptimisticVCF4_x


tasks.register('testFTP', Test) {
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/htsjdk/samtools/Defaults.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ public class Defaults {
*/
public static final String DEFAULT_VCF_EXTENSION;

// accept VCF 4.4 files for read, but treat them as VCF 4.3
public static final boolean OPTIMISTIC_VCF_4_4;
// accept VCF 4.* files for read, but treat them as VCF 4.3
public static final boolean OPTIMISTIC_VCF_4_x;

/**
* Even if BUFFER_SIZE is 0, this is guaranteed to be non-zero. If BUFFER_SIZE is non-zero,
Expand Down Expand Up @@ -108,7 +108,7 @@ public class Defaults {
*/
public static final String DISABLE_SNAPPY_PROPERTY_NAME = "snappy.disable";

public static final String OPTIMISTIC_VCF_4_4_PROPERTY = "optimistic_vcf_4_4";
public static final String OPTIMISTIC_VCF_4_x_PROPERTY = "optimistic_vcf_4_x";


/**
Expand Down Expand Up @@ -139,7 +139,7 @@ public class Defaults {
SAM_FLAG_FIELD_FORMAT = SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name()));
SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false);
DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false);
OPTIMISTIC_VCF_4_4 = getBooleanProperty(OPTIMISTIC_VCF_4_4_PROPERTY, false);
OPTIMISTIC_VCF_4_x = getBooleanProperty(OPTIMISTIC_VCF_4_x_PROPERTY, false);
}

/**
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/htsjdk/variant/vcf/VCFCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,13 @@ public Object readActualHeader(final LineIterator lineIterator) {
throw new TribbleException.InvalidHeader(lineFields[1] + " is not a supported version");
foundHeaderVersion = true;
version = VCFHeaderVersion.toHeaderVersion(lineFields[1]);
if (Defaults.OPTIMISTIC_VCF_4_4 == true && version == VCFHeaderVersion.VCF4_4 ) {
if (Defaults.OPTIMISTIC_VCF_4_x == true && (version == VCFHeaderVersion.VCF4_4 || version == VCFHeaderVersion.VCF4_5)) {
// if optimistic VCFv4.4 is enabled, accept VCFv4.4 as input, but treat it as VCFv4.3, and hope for the best
log.warn("********** VCFv4.4 is not yet fully supported - processing VCFv4.4 input as VCFv4.3! **********");
log.warn("********** versions >=VCFv4.4 are not yet fully supported - processing >=VCFv4.4 input as VCFv4.3! **********");
version = VCFHeaderVersion.VCF4_3;
} else if ( version != VCFHeaderVersion.VCF4_0 && version != VCFHeaderVersion.VCF4_1 && version != VCFHeaderVersion.VCF4_2 && version != VCFHeaderVersion.VCF4_3)
throw new TribbleException.InvalidHeader("This codec is strictly for VCFv4 and does not support " + lineFields[1]);
throw new TribbleException.InvalidHeader("This codec is strictly for VCFv4 and does not support " + lineFields[1]+" . "
+ "For Versions >= VCF4.4 you can try to set the java property -D" + Defaults.SAMJDK_PREFIX + Defaults.OPTIMISTIC_VCF_4_x_PROPERTY + "=true");
}
headerStrings.add(lineIterator.next());
}
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ public enum VCFHeaderVersion {
VCF4_1("VCFv4.1", "fileformat"),
VCF4_2("VCFv4.2", "fileformat"),
VCF4_3("VCFv4.3", "fileformat"),
// VCFv4.4 is not fully supported yet, but we need this to support optimistic reading when
// the "optimistic_vcf_4_4" property is set
VCF4_4("VCFv4.4", "fileformat");
// VCFv4.4 and VCFv4.5 are not fully supported yet, but we need this to support optimistic reading when
// the "optimistic_vcf_4_x" property is set
VCF4_4("VCFv4.4", "fileformat"),
VCF4_5("VCFv4.5", "fileformat");

private final String versionString;
private final String formatString;
Expand Down
13 changes: 11 additions & 2 deletions src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public void testBlockCompressionExtension(final String testURIString, final bool
public void testBlockCompressionExtensionStringVersion(final String testURIString, final boolean expected) {
Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testURIString), expected);
}
@Test(groups = "optimistic_vcf_4_4")
@Test(groups = "optimistic_vcf_4_x")
public void testVCF4_4Optimistic() {
final AbstractFeatureReader<VariantContext, ?> fr = AbstractFeatureReader.getFeatureReader(
Paths.get("src/test/resources/htsjdk/variant/", "VCF4_4HeaderTest.vcf").toString(),
Expand All @@ -109,7 +109,16 @@ public void testVCF4_4Optimistic() {
final VCFHeader vcfHeader = (VCFHeader) fr.getHeader();
Assert.assertEquals(vcfHeader.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_3);
}

@Test(groups = "optimistic_vcf_4_x")
public void testVCF4_5Optimistic() {
final AbstractFeatureReader<VariantContext, ?> fr = AbstractFeatureReader.getFeatureReader(
Paths.get("src/test/resources/htsjdk/variant/", "VCF4_5HeaderTest.vcf").toString(),
new VCFCodec(),
false);
final VCFHeader vcfHeader = (VCFHeader) fr.getHeader();
Assert.assertEquals(vcfHeader.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_3);
}

@DataProvider(name = "vcfFileAndWrapperCombinations")
private static Object[][] vcfFileAndWrapperCombinations(){
return new Object[][] {
Expand Down
42 changes: 42 additions & 0 deletions src/test/resources/htsjdk/variant/VCF4_5HeaderTest.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
##fileformat=VCFv4.5
##FILTER=<ID=ABFilter,Description="AB 0.75 && DP 40">
##FILTER=<ID=DPFilter,Description="DP 120 || SB -0.10">
##FILTER=<ID=FDRtranche0.00to0.10,Description="FDR tranche level at qual 0.06">
##FILTER=<ID=FDRtranche0.10to1.00,Description="FDR tranche level at qual 0.03">
##FILTER=<ID=FDRtranche1.00to2.00,Description="FDR tranche level at qual 0.02">
##FILTER=<ID=FDRtranche2.00to10.00+,Description="FDR tranche level at qual > 0.06">
##FILTER=<ID=FDRtranche2.00to10.00,Description="FDR tranche level at qual unknown">
##FILTER=<ID=HARD_TO_VALIDATE,Description="MQ0 = 4 && ((MQ0 / (1.0 * DP)) 0.1)">
##FILTER=<ID=Indel,Description="Overlaps a user-input mask">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=LowQual,Description="QUAL 50.0">
##FILTER=<ID=ANNOTATION,Description="ANNOTATION != \"NA\" || ANNOTATION <= 0.01">
##FILTER=<ID=ANNOTATION2,Description="ANNOTATION with quote \" that is unmatched but escaped">
##FILTER=<ID=SnpCluster,Description="SNPs found in clusters">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
##FORMAT=<ID=GL,Number=3,Type=Float,Description="Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with two (and only two) segregating haplotypes">
##INFO=<ID=EscapingQuote,Number=1,Type=Float,Description="This description has an escaped \" quote in it">
##INFO=<ID=EscapingBackslash,Number=1,Type=Float,Description="This description has an escaped \\ backslash in it">
##INFO=<ID=EscapingNonQuoteOrBackslash,Number=1,Type=Float,Description="This other value has a \n newline in it">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=OQ,Number=1,Type=Float,Description="The original variant quality score">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-23/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-24/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-5/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-9/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-6/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-19/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-25/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-4/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-14/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-22/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-2/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-3/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-7/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-16/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-1/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-17/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-8/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-10/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-18/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-20/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-11/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-15/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-21/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-12/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-13/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam] read_buffer_size=null read_filter=[] intervals=[chrX] excludeIntervals=[chrM, chrY] reference_sequence=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta rodBind=[dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod, interval,Intervals,chrX] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod hapmap=null hapmap_chip=null out=null err=null outerr=null filterZeroMappingQualityReads=false downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null useOriginalQualities=false validation_strictness=SILENT unsafe=null max_reads_at_locus=10000 num_threads=1 interval_merging=ALL read_group_black_list=null genotype_model=JOINT_ESTIMATE base_model=EMPIRICAL heterozygosity=7.8E-4 genotype=false output_all_callable_bases=false standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 trigger_min_confidence_threshold_for_calling=30.0 trigger_min_confidence_threshold_for_emitting=30.0 noSLOD=false assume_single_sample_reads=null platform=null min_base_quality_score=20 min_mapping_quality_score=20 max_mismatches_in_40bp_window=3 use_reads_with_bad_mates=false max_deletion_fraction=0.05 cap_base_quality_by_mapping_quality=false"
##VariantFiltration="analysis_type=VariantFiltration input_file=[] read_buffer_size=null read_filter=[] intervals=null excludeIntervals=[chrM, chrY] reference_sequence=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta rodBind=[variant,VCF,wgs.v9/HiSeq.WGS.cleaned.ug.snpfiltered.vcf, mask,Bed,wgs.v9/HiSeq.WGS.cleaned.indels.10.mask] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null hapmap=null hapmap_chip=null out=wgs.v9/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf err=null outerr=null filterZeroMappingQualityReads=false downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null useOriginalQualities=false validation_strictness=SILENT unsafe=null max_reads_at_locus=2147483647 num_threads=1 interval_merging=ALL read_group_black_list=null filterExpression=[] filterName=[] genotypeFilterExpression=[] genotypeFilterName=[] clusterSize=3 clusterWindowSize=0 maskName=Indel NO_HEADER=false"
##VariantFiltration="analysis_type=VariantFiltration input_file=[] read_buffer_size=null read_filter=[] intervals=null excludeIntervals=[chrM, chrY] reference_sequence=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta rodBind=[variant,VCF,wgs.v9/HiSeq.WGS.cleaned.ug.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null hapmap=null hapmap_chip=null out=wgs.v9/HiSeq.WGS.cleaned.ug.snpfiltered.vcf err=null outerr=null filterZeroMappingQualityReads=false downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null useOriginalQualities=false validation_strictness=SILENT unsafe=null max_reads_at_locus=2147483647 num_threads=1 interval_merging=ALL read_group_black_list=null filterExpression=[QUAL < 50.0, MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1), AB > 0.75 && DP > 40, DP > 120 || SB > -0.10] filterName=[LowQual, HARD_TO_VALIDATE, ABFilter, DPFilter] genotypeFilterExpression=[] genotypeFilterName=[] clusterSize=3 clusterWindowSize=10 maskName=Mask NO_HEADER=false"
##source=VariantOptimizer
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
chr1 109 . A T 0 FDRtranche2.00to10.00+ AC=1;AF=0.50;AN=2;DP=1019;Dels=0.00;HRun=0;HaplotypeScore=686.65;MQ=19.20;MQ0=288;OQ=2175.54;QD=2.13;SB=-1042.18 GT:AD:DP:GL:GQ 0/1:610,327:308:-316.30,-95.47,-803.03:99
Loading