Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add reconstruct trios clps #1435

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions src/main/java/picard/fingerprint/FingerprintChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,7 @@ private void getFingerprintFromVc(final Map<String, Fingerprint> fingerprints, f
for (final String sample : fingerprints.keySet()) {
final Fingerprint fp = fingerprints.get(sample);

//PLs are preferred over GTs
//TODO: this code is replicated in various places (ReconstructTriosFromVCF for example). Needs refactoring.
//TODO: add a way to force using GTs when both are available (why?)

// PLs are preferred over GTs
// Get the genotype for the sample and check that it is useful
final Genotype genotype = usableSnp.getGenotype(sample);
if (genotype == null) {
Expand All @@ -390,7 +387,7 @@ private void getFingerprintFromVc(final Map<String, Fingerprint> fingerprints, f

if (genotype.isNoCall()) continue;

// TODO: when multiple genotypes are available for a Haplotype check that they
// TODO: when multiple genotypes are available for a Haplotype, check that they
// TODO: agree. Not urgent since DownloadGenotypes already does this.
// TODO: more urgent now as we convert vcfs to haplotypeProbabilities and
// TODO: there could be different VCs with information we'd like to use...
Expand Down Expand Up @@ -511,11 +508,8 @@ public Map<FingerprintIdDetails, Fingerprint> fingerprintSamFile(final Path samF
// Now go through the data at each locus and figure stuff out!
for (final SamLocusIterator.LocusInfo info : iterator) {

// if statement to avoid string building.
// TODO: replace with lambda version once htsjdk is rev'ed
if (Log.isEnabled(Log.LogLevel.DEBUG)) {
log.debug("At locus " + info.toString());
}
log.debug(() -> "At locus " + info.toString());

// TODO: Filter out the locus if the allele balance doesn't make sense for either a
// TODO: 50/50 het or a hom with some errors; in HS data with deep coverage any base
// TODO: with major strand bias could cause errors
Expand Down
76 changes: 76 additions & 0 deletions src/main/java/picard/fingerprint/InferSex.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* The MIT License
*
* Copyright (c) 2019 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.fingerprint;

import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import org.broadinstitute.barclay.argparser.Argument;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.pedigree.PedFile;
import picard.pedigree.Sex;

import java.io.File;
import java.util.Map;
import java.util.Set;

/**
* Attempts to use a set of genome data samples to determine sex within the cohort provided.
*
* @author Tim Fennell
* @author Jonathan Barlev
* @author Yossi Farjoun
*/

// Abstract class that provides the outline of a sex inferencing CLP.
// The implementing class will have to implement getSexInferencer()
public abstract class InferSex extends CommandLineProgram {

@Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output pedigree file name.")
public File OUTPUT;

@Argument(doc="List of possible names for male sex chromosome(s)")
public Set<String> MALE_CHROMS = CollectionUtil.makeSet("Y", "chrY");

@Argument(doc="List of possible names for female sex chromosome(s)")
public Set<String> FEMALE_CHROMS = CollectionUtil.makeSet("X", "chrX");

final Log log = Log.getInstance(InferSex.class);

abstract SexInferenceEngine getSexInference();

@Override
protected int doWork() {
IOUtil.assertFileIsWritable(OUTPUT);

// Determine gender for everyone
log.info("Determining sample sexes");
final Map<String,Sex> sampleSexes = getSexInference().determineSexes();
PedFile.fromSexMap(sampleSexes).write(OUTPUT);

return 0;
}
}
76 changes: 76 additions & 0 deletions src/main/java/picard/fingerprint/InferSexFromBAM.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* The MIT License
*
* Copyright (c) 2019 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.fingerprint;

import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;

import java.io.File;
import java.util.List;

/**
* Program to Infer Sex of a cohort of samples from bams. Needs a large cohort with both sexes to
* work since it uses a clustering-based algorithm
*
* See InferSex for more details.
*
* This class looks at the index information to get average coverage over the sex chromosomes.
* it provides that information to the base class.
*
* Created by jbarlev on 5/30/14.
*
* @author Jonathan Barlev
* @author Yossi Farjoun
*/
@CommandLineProgramProperties(
summary = "A program that can infer sample sex from a collection of BAM files." +
"It compares the coverage over the male and female chromosomes to that over the rest of the" +
"genome, and performs clustering to find the answer. It uses the bam index to avoid having to " +
"iterate over the whole BAM file.",
oneLineSummary = "Infer sample sex from a collection of BAM files",
programGroup = DiagnosticsAndQCProgramGroup.class
)
@ExperimentalFeature
public class InferSexFromBAM extends InferSex {

@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input BAM (not SAM) file. Must be indexed.")
public List<File> INPUT;

@Override
SexInferenceEngine getSexInference() {
return new SexInferenceEngineFromBAM(MALE_CHROMS, FEMALE_CHROMS, INPUT);
}

@Override
protected int doWork() {
IOUtil.assertFilesAreReadable(INPUT);

return super.doWork();
}
}
87 changes: 87 additions & 0 deletions src/main/java/picard/fingerprint/InferSexFromVCF.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* The MIT License
*
* Copyright (c) 2019 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.fingerprint;

import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;

import java.io.File;

/**
* Program to Infer Sex of a cohort of samples from a vcf. Needs a large cohort with both sexes to
* work since it uses a clustering-based algorithm
*
* See InferSex for more details.
*
* This class looks at depth markers on spaced variants in the VCF and provide information from that to the base class.
*
* Created by jbarlev on 5/30/14.
*
* @author Jonathan Barlev
* @author Yossi Farjoun
*/
@CommandLineProgramProperties(
summary = "A program that can infer sample sex from a VCF file." +
"It compares the coverage over the male and female chromosomes to that over the rest of the" +
"genome, and performs clustering to find the answer.",
oneLineSummary = "Infer sample sex from a VCF file",
programGroup = DiagnosticsAndQCProgramGroup.class
)
@ExperimentalFeature
public class InferSexFromVCF extends InferSex {
@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input VCF file.")
public File INPUT;

@Argument(shortName = "AUTO_V", doc = "Number determining how many variants are sampled for coverage on each non-sex chromosome. " +
"Program will (attempt to) sample coverage at AUTOSOMAL_VARIANTS evenly spaced variants on each non-sex chromosome.")
public int AUTOSOMAL_VARIANTS = 10;

@Argument(shortName = "ALLO_V", doc = "Number determining how many variants are sampled for coverage on each sex chromosome. " +
"Program will (attempt to) sample coverage at ALLOSOMAL_VARIANTS evenly spaced variants on each sex chromosome. ")
public int ALLOSOMAL_VARIANTS = 100;

@Argument(shortName = "PAR", doc = "An IntervalList containing the pseudoautosomal region. Used for masking out that region since the regions on one " +
"chromosome may get mapped to the other and thus can change the apparent coverage.", optional = true)
public File PSEUDOAUTOSOMAL_REGION = null;

@Argument(doc="Some VCFs do not have variants (called) on Male chromosomes. Consequently, when determining sex, one may wish to give less weight to their Coverage than" +
"the memale chromosomes. Do this by dividing the male chromosome coverage values by a factor of Y_COVERAGE_SHRINK_FACTOR.")
public double Y_COVERAGE_SHRINK_FACTOR = 2.0;

@Override
SexInferenceEngine getSexInference() {
return new SexInferenceEngineFromVCF(MALE_CHROMS, FEMALE_CHROMS, INPUT, AUTOSOMAL_VARIANTS, ALLOSOMAL_VARIANTS, PSEUDOAUTOSOMAL_REGION, Y_COVERAGE_SHRINK_FACTOR);
}

@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
return super.doWork();
}
}
Loading