From baa0dd0048e3a5afc426176e366d8fe526d84c01 Mon Sep 17 00:00:00 2001 From: meganshand Date: Wed, 26 Jun 2024 13:34:40 -0400 Subject: [PATCH] Remove header lines in ReblockGVCFs when we remove FORMAT annotations (#8895) * remove header lines when removing annotation in Reblocking * clean up --- .../tools/walkers/variantutils/ReblockGVCF.java | 14 ++++++++------ .../variantutils/ReblockGVCFIntegrationTest.java | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index 62d417bc4ca..ef416e726b7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -159,7 +159,7 @@ public final class ReblockGVCF extends MultiVariantWalker { @Advanced @Argument(fullName=ANNOTATIONS_TO_REMOVE_LONG_NAME, doc="FORMAT level annotations to remove from all genotypes in final GVCF.", optional = true) - private List annotationsToRemove = new ArrayList<>(); + private List formatAnnotationsToRemove = new ArrayList<>(); @Advanced @Argument(fullName=QUAL_APPROX_LONG_NAME, shortName=QUAL_APPROX_SHORT_NAME, doc="Add necessary INFO field annotation to perform QUAL approximation downstream; required for GnarlyGenotyper", optional = true) @@ -238,9 +238,6 @@ public void onTraversalStart() { + ", but the " + GATKVCFConstants.TREE_SCORE + " annotation is not present in the input GVCF."); } - List missingAnnotationsToRemove = annotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList(); - missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header.")); - final Set inputHeaders = inputHeader.getMetaDataInSortedOrder(); final Set headerLines = new HashSet<>(inputHeaders); @@ -249,6 +246,11 @@ public void onTraversalStart() { (vcfHeaderLine.getKey().equals("INFO")) && ((VCFInfoHeaderLine)vcfHeaderLine).getID().equals(GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED) || //remove old (maybe wrong type) and add new with deprecated note (vcfHeaderLine.getKey().equals("INFO")) && infoFieldAnnotationKeyNamesToRemove.contains(((VCFInfoHeaderLine)vcfHeaderLine).getID())); + List missingAnnotationsToRemove = formatAnnotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList(); + missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header.")); + headerLines.removeIf(vcfHeaderLine -> vcfHeaderLine.getKey().equals("FORMAT") && + formatAnnotationsToRemove.contains(((VCFFormatHeaderLine) vcfHeaderLine).getID())); + headerLines.addAll(getDefaultToolVCFHeaderLines()); genotypingEngine = createGenotypingEngine(new IndexedSampleList(getSamplesForVariants())); @@ -338,7 +340,7 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r if (!variant.hasAllele(Allele.NON_REF_ALLELE)) { throw new UserException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a allele. This tool is only intended for use with GVCFs."); } - VariantContext newVC = annotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant; + VariantContext newVC = formatAnnotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant; regenotypeVC(newVC); } @@ -356,7 +358,7 @@ private VariantContext removeVCFFormatAnnotations(final VariantContext vc) { return vc; } - for (String annotation : annotationsToRemove) { + for (String annotation : formatAnnotationsToRemove) { extendedAttributes.remove(annotation); } final Genotype newGenotype = new GenotypeBuilder(genotype).noAttributes().attributes(extendedAttributes).make(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java index 7d8885044b4..7d7164a86da 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java @@ -619,7 +619,9 @@ public void testRemovingFormatAnnotations() { .addOutput(output); runCommandLine(args); - final List outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight(); + final Pair> outVCF = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); + Assert.assertNull(outVCF.getLeft().getFormatHeaderLine(priKey)); + final List outVCs = outVCF.getRight(); for(VariantContext vc : outVCs){ Assert.assertNull(vc.getGenotype(0).getExtendedAttribute(priKey)); }