Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require both overlap and breakend proximity for depth-only SV clustering #8962

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public class CanonicalSVLinkage<T extends SVCallRecord> extends SVClusterLinkage

public static final double DEFAULT_RECIPROCAL_OVERLAP_DEPTH_ONLY = 0.8;
public static final double DEFAULT_SIZE_SIMILARITY_DEPTH_ONLY = 0;
public static final int DEFAULT_WINDOW_DEPTH_ONLY = 0;
public static final int DEFAULT_WINDOW_DEPTH_ONLY = 10000000;
public static final double DEFAULT_SAMPLE_OVERLAP_DEPTH_ONLY = 0;

public static final double DEFAULT_RECIPROCAL_OVERLAP_MIXED = 0.8;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class ClusteringParameters {

// if true, both reciprocal overlap and window criteria must be met
// if false, reciprocal overlap and/or window criteria must be met
// NOTE this is currently set to true in all tools but is being kept for possible future use
private final boolean requiresOverlapAndProximity;

// returns true if two given records are the correct type of pair for this parameter set
Expand Down Expand Up @@ -57,7 +58,7 @@ public boolean isValidPair(final SVCallRecord a, final SVCallRecord b) {
}

public static ClusteringParameters createDepthParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) {
return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, false, (a,b) -> a.isDepthOnly() && b.isDepthOnly());
return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, true, (a,b) -> a.isDepthOnly() && b.isDepthOnly());
}

public static ClusteringParameters createMixedParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public static SVClusterEngine getNewDefaultMaxCliqueEngine() {
return new SVClusterEngine(SVClusterEngine.CLUSTERING_TYPE.MAX_CLIQUE, defaultCollapser::collapse, getNewDefaultLinkage(), hg38Dict);
}

public static final ClusteringParameters defaultDepthOnlyParameters = ClusteringParameters.createDepthParameters(0.8, 0, 0, 0);
public static final ClusteringParameters defaultDepthOnlyParameters = ClusteringParameters.createDepthParameters(0.8, 0, 10000000, 0);
public static final ClusteringParameters defaultMixedParameters = ClusteringParameters.createMixedParameters(0.8, 0, 1000, 0);
public static final ClusteringParameters defaultEvidenceParameters = ClusteringParameters.createPesrParameters(0.5, 0, 500, 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class SVClusterEngineTest {

private final SVClusterEngine engine = SVTestUtils.defaultSingleLinkageEngine;

private static final ClusteringParameters depthOnlyParametersSizeSimilarity = ClusteringParameters.createDepthParameters(0.1, 0.5, 0, 0);
private static final ClusteringParameters depthOnlyParametersSizeSimilarity = ClusteringParameters.createDepthParameters(0.1, 0.5, 10000000, 0);
private static final ClusteringParameters mixedParametersSizeSimilarity = ClusteringParameters.createMixedParameters(0.1, 0.5, 5000, 0);
private static final ClusteringParameters evidenceParametersSizeSimilarity = ClusteringParameters.createPesrParameters(0.1, 0.5, 5000, 0);
private final CanonicalSVLinkage<SVCallRecord> linkageSizeSimilarity = new CanonicalSVLinkage<>(SVTestUtils.hg38Dict, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public void testOverlappingEvents(final List<File> inputVcfs) {
.add(JointGermlineCNVSegmentation.MODEL_CALL_INTERVALS_LONG_NAME, getToolTestDataDir() + "intervals.chr22.interval_list")
.addInterval("22:22,538,114-23,538,437")
.add(JointGermlineCNVSegmentation.CLUSTERING_INTERVAL_OVERLAP_LONG_NAME, 0.8)
.add(JointGermlineCNVSegmentation.CLUSTERING_BREAKEND_WINDOW_LONG_NAME, 0);
.add(JointGermlineCNVSegmentation.CLUSTERING_BREAKEND_WINDOW_LONG_NAME, 10000000);

inputVcfs.forEach(vcf -> args.addVCF(vcf));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public void testMergeHelper(final boolean omitMembers) {
.add(SVCluster.ALGORITHM_LONG_NAME, SVCluster.CLUSTER_ALGORITHM.SINGLE_LINKAGE)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 1)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 0)
Expand Down Expand Up @@ -188,7 +188,7 @@ public void testClusterSingleLinkage() {
.add(StandardArgumentDefinitions.REFERENCE_LONG_NAME, REFERENCE_PATH)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand All @@ -204,7 +204,7 @@ public void testClusterSingleLinkage() {

Assert.assertEquals(header.getSampleNamesInOrder(), Arrays.asList("HG00096", "HG00129", "HG00140", "NA18945", "NA18956"));

Assert.assertEquals(records.size(), 1338);
Assert.assertEquals(records.size(), 1344);

// Check for one record
int expectedRecordsFound = 0;
Expand Down Expand Up @@ -255,7 +255,7 @@ public void testAgainstSimpleImplementation() {
.add(StandardArgumentDefinitions.REFERENCE_LONG_NAME, REFERENCE_PATH)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand All @@ -279,7 +279,7 @@ public void testAgainstSimpleImplementation() {
final Pair<VCFHeader, List<VariantContext>> testVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());

final ReferenceSequenceFile referenceSequenceFile = ReferenceUtils.createReferenceReader(new GATKPath(REFERENCE_PATH));
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0);
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0);
final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0);
final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0);
final SVClusterEngine engine = SVClusterEngineFactory.createCanonical(
Expand Down Expand Up @@ -347,7 +347,7 @@ public void testClusterMaxClique(final boolean fastMode) {
.add(SVCluster.ALGORITHM_LONG_NAME, SVCluster.CLUSTER_ALGORITHM.MAX_CLIQUE)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand Down Expand Up @@ -414,7 +414,7 @@ public void testClusterSampleOverlap() {
.add(SVCluster.ALGORITHM_LONG_NAME, SVCluster.CLUSTER_ALGORITHM.SINGLE_LINKAGE)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand Down Expand Up @@ -478,7 +478,7 @@ public void testAllosome() {
.add(StandardArgumentDefinitions.REFERENCE_LONG_NAME, REFERENCE_PATH)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void testRefPanel() {
.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand All @@ -69,7 +69,7 @@ public void testRefPanel() {
final Pair<VCFHeader, List<VariantContext>> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());

final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict;
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0);
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0);
final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0);
final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0);
final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary);
Expand Down Expand Up @@ -180,7 +180,7 @@ public void testSelf() {
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_SIZE_SIMILARITY_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_SIZE_SIMILARITY_NAME, 0)
Expand All @@ -197,7 +197,7 @@ public void testSelf() {
final Pair<VCFHeader, List<VariantContext>> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());

final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict;
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0);
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0);
final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0);
final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0);
final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary);
Expand Down Expand Up @@ -279,7 +279,7 @@ private void assertPerfectConcordance(final File output, final String evalVcfPat
final Pair<VCFHeader, List<VariantContext>> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());

final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict;
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0);
final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 10000000, 0);
final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0);
final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0);
final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary);
Expand Down Expand Up @@ -331,7 +331,7 @@ public void testSitesOnly() {
.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT)
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000)
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0)
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
Expand Down
Loading