From 92dc4ae8c216b372fe9086fe9bfb129077890a35 Mon Sep 17 00:00:00 2001 From: jamesemery Date: Fri, 28 Jun 2024 10:17:19 -0400 Subject: [PATCH] Inverted SoftClippedReadFilter to conform to the standard filtering logic (#8888) --- .../ReadFilterArgumentDefinitions.java | 6 +- .../engine/filters/SoftClippedReadFilter.java | 30 +++---- .../SoftClippedReadFilterUnitTest.java | 78 +++++++++---------- 3 files changed, 48 insertions(+), 66 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java index 9b8d016e14d..d873e545e62 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/ReadFilterArgumentDefinitions.java @@ -53,10 +53,8 @@ private ReadFilterArgumentDefinitions(){} public static final String KEEP_INTERVAL_NAME = "keep-intervals"; - public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "soft-clipped-ratio-threshold"; - public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "soft-clipped-leading-trailing-ratio"; - - public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter"; + public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio"; + public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio"; public static final String READ_FILTER_TAG = "read-filter-tag"; public static final String READ_FILTER_TAG_COMP = "read-filter-tag-comp"; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java b/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java index fa0421255fa..bc63df8e711 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilter.java @@ -25,20 +25,13 @@ public final class SoftClippedReadFilter extends ReadFilter { static final long serialVersionUID = 1L; private final Logger logger = LogManager.getLogger(this.getClass()); - @VisibleForTesting - @Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER, - doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.", - optional = true - ) - boolean doInvertFilter = false; - @VisibleForTesting @Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD, doc = "Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.", optional = true, mutex = { ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD } ) - Double minimumSoftClippedRatio = null; + Double maximumSoftClippedRatio = null; @VisibleForTesting @Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD, @@ -46,7 +39,7 @@ public final class SoftClippedReadFilter extends ReadFilter { optional = true, mutex = {ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD} ) - Double minimumLeadingTrailingSoftClippedRatio = null; + Double maximumLeadingTrailingSoftClippedRatio = null; // Command line parser requires a no-arg constructor public SoftClippedReadFilter() {} @@ -61,15 +54,15 @@ private boolean testMinSoftClippedRatio(final GATKRead read) { totalLength += element.getLength(); } - final double softClipRatio = ((double)numSoftClippedBases / (double)totalLength); + final double softClipRatio = totalLength != 0 ? ((double)numSoftClippedBases / (double)totalLength) : 0.0; - return softClipRatio > minimumSoftClippedRatio; + return softClipRatio <= maximumSoftClippedRatio; } private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) { if ( read.getCigarElements().size() < 1 ) { - return false; + return true; //NOTE: in this edge case that the read should pass this filter as there are no cigar elements to have edge soft-clipping. } // Get the index of the last cigar element: @@ -90,12 +83,13 @@ private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) { .sum(); // Calculate the ratio: - final double softClipRatio = ((double)numLeadingTrailingSoftClippedBases / (double)totalLength); + final double softClipRatio = totalLength != 0 ? ((double)numLeadingTrailingSoftClippedBases / (double)totalLength) : 0.0; - return softClipRatio > minimumLeadingTrailingSoftClippedRatio; + return softClipRatio <= maximumLeadingTrailingSoftClippedRatio; } @Override + // NOTE: for read filters we always return true if the read passes the filter, and false if it doesn't. public boolean test(final GATKRead read) { final boolean result; @@ -103,11 +97,11 @@ public boolean test(final GATKRead read) { // NOTE: Since we have mutex'd the args for the clipping ratios, we only need to see if they // have been specified. If they have, that's the filter logic we're using. // If we specified the clipping ratio, we use the min sequence length test: - if ( minimumSoftClippedRatio != null ) { + if ( maximumSoftClippedRatio != null ) { result = testMinSoftClippedRatio(read); } // If we specified the leading/trailing clipping ratio, we use the min sequence length test: - else if ( minimumLeadingTrailingSoftClippedRatio != null ) { + else if ( maximumLeadingTrailingSoftClippedRatio != null ) { result = testMinLeadingTrailingSoftClippedRatio(read); } else { @@ -118,10 +112,6 @@ else if ( minimumLeadingTrailingSoftClippedRatio != null ) { ); } - // Check for if we want to invert our results: - if ( doInvertFilter ) { - return !result; - } return result; } } diff --git a/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java index 668a1c62a92..b87062e56e7 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/filters/SoftClippedReadFilterUnitTest.java @@ -37,13 +37,10 @@ public void testOverclippedSoftClipRatioFilter(final String cigarString, final boolean expectedResult) { final SoftClippedReadFilter filter = new SoftClippedReadFilter(); - filter.minimumSoftClippedRatio = clipRatio; + filter.maximumSoftClippedRatio = clipRatio; final GATKRead read = buildSAMRead(cigarString); Assert.assertEquals(filter.test(read), expectedResult, cigarString); - - filter.doInvertFilter = true; - Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); } @Test(dataProvider= "SoftClippedLeadingTrailingRatioDataProvider") @@ -52,13 +49,10 @@ public void testSoftClippedLeadingTrailingRatioFilter(final String cigarString, final boolean expectedResult) { final SoftClippedReadFilter filter = new SoftClippedReadFilter(); - filter.minimumLeadingTrailingSoftClippedRatio = clipRatio; + filter.maximumLeadingTrailingSoftClippedRatio = clipRatio; final GATKRead read = buildSAMRead(cigarString); Assert.assertEquals(filter.test(read), expectedResult, cigarString); - - filter.doInvertFilter = true; - Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString); } @DataProvider(name = "SoftClipRatioDataProvider") @@ -67,25 +61,25 @@ public Iterator softClipRatioDataProvider() { // --------------------------------------- // Null / trivial cases: - testData.add(new Object[] { "", 0.1, false }); - testData.add(new Object[] { "10H", 0.1, false }); + testData.add(new Object[] { "", 0.1, true }); + testData.add(new Object[] { "10H", 0.1, true }); // --------------------------------------- // Soft clip ratio test: - testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217 - testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250 - testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280 + testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217 + testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250 + testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280 // --------------------------------------- // Soft clip placement: - testData.add(new Object[] { "101S100M", 0.5, true }); - testData.add(new Object[] { "100M101S", 0.5, true }); - testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true }); + testData.add(new Object[] { "101S100M", 0.5, false }); + testData.add(new Object[] { "100M101S", 0.5, false }); + testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false }); return testData.iterator(); } @@ -96,42 +90,42 @@ public Iterator softClippedLeadingTrailingRatioDataProvider() { // --------------------------------------- // Null / trivial cases: - testData.add(new Object[] { "", 0.1, false }); - testData.add(new Object[] { "10H", 0.1, false }); + testData.add(new Object[] { "", 0.1, true }); + testData.add(new Object[] { "10H", 0.1, true }); // --------------------------------------- // Soft clip ratio test: // Non-leading/-trailing - testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217 - testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250 - testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280 + testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217 + testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250 + testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280 // Leading: - testData.add(new Object[] { "2S1S1S16M", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "3S1S1S16M", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "4S1S1S16M", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "5S1S1S16M", 0.2, true }); // 5/23 = .217 - testData.add(new Object[] { "6S1S1S16M", 0.2, true }); // 6/24 = .250 - testData.add(new Object[] { "7S1S1S16M", 0.2, true }); // 7/25 = .280 + testData.add(new Object[] { "2S1S1S16M", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "3S1S1S16M", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "4S1S1S16M", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "5S1S1S16M", 0.2, false }); // 5/23 = .217 + testData.add(new Object[] { "6S1S1S16M", 0.2, false }); // 6/24 = .250 + testData.add(new Object[] { "7S1S1S16M", 0.2, false }); // 7/25 = .280 // Trailing: - testData.add(new Object[] { "1M1S16M2S", 0.2, false }); // 2/20 = .100 - testData.add(new Object[] { "1M1S16M3S", 0.2, false }); // 3/21 = .143 - testData.add(new Object[] { "1M1S16M4S", 0.2, false }); // 4/22 = .182 - testData.add(new Object[] { "1M1S16M5S", 0.2, true }); // 5/23 = .217 - testData.add(new Object[] { "1M1S16M6S", 0.2, true }); // 6/24 = .250 - testData.add(new Object[] { "1M1S16M7S", 0.2, true }); // 7/25 = .280 + testData.add(new Object[] { "1M1S16M2S", 0.2, true }); // 2/20 = .100 + testData.add(new Object[] { "1M1S16M3S", 0.2, true }); // 3/21 = .143 + testData.add(new Object[] { "1M1S16M4S", 0.2, true }); // 4/22 = .182 + testData.add(new Object[] { "1M1S16M5S", 0.2, false }); // 5/23 = .217 + testData.add(new Object[] { "1M1S16M6S", 0.2, false }); // 6/24 = .250 + testData.add(new Object[] { "1M1S16M7S", 0.2, false }); // 7/25 = .280 // --------------------------------------- // Soft clip placement: - testData.add(new Object[] { "101S100M", 0.5, true }); - testData.add(new Object[] { "100M101S", 0.5, true }); - testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false }); + testData.add(new Object[] { "101S100M", 0.5, false }); + testData.add(new Object[] { "100M101S", 0.5, false }); + testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true }); return testData.iterator(); }