Skip to content

Commit

Permalink
Merge pull request #49 from Illumina/develop
Browse files Browse the repository at this point in the history
Transfer code from internal repository
  • Loading branch information
egor-dolzhenko authored Mar 2, 2019
2 parents 6691bdb + 965a4b3 commit 9a0376d
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 114 deletions.
8 changes: 7 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
BasedOnStyle: Google
BasedOnStyle: WebKit
ColumnLimit: 120
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Allman
BreakStringLiterals: true
ReflowComments: true
CompactNamespaces: true
58 changes: 52 additions & 6 deletions common/CountTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

#include "common/CountTable.hh"

#include <stdexcept>

using std::string;
using std::to_string;
using std::vector;
Expand All @@ -31,35 +33,53 @@ namespace ehunter

int32_t CountTable::countOf(int32_t element) const
{
if (elements_to_counts_.find(element) == elements_to_counts_.end())
if (elementsToCounts_.find(element) == elementsToCounts_.end())
{
return 0;
}
return elements_to_counts_.at(element);
return elementsToCounts_.at(element);
}

void CountTable::setCountOf(int32_t element, int32_t count)
{
elements_to_counts_[element] = count;
elementsToCounts_[element] = count;
if (count == 0)
{
elements_to_counts_.erase(element);
elementsToCounts_.erase(element);
}
}

void CountTable::incrementCountOf(int32_t element) { ++elements_to_counts_[element]; }
void CountTable::incrementCountOf(int32_t element, int32_t increment)
{
if (increment <= 0)
{
throw std::logic_error("CountTables require positive increments");
}

elementsToCounts_[element] += increment;
}

vector<int32_t> CountTable::getElementsWithNonzeroCounts() const
{
vector<int32_t> elements;
for (const auto& element_count : elements_to_counts_)
for (const auto& element_count : elementsToCounts_)
{
elements.push_back(element_count.first);
}

return elements;
}

CountTable& CountTable::operator=(const CountTable& other)
{
if (this != &other)
{
elementsToCounts_ = other.elementsToCounts_;
}

return *this;
}

std::ostream& operator<<(std::ostream& out, const CountTable& count_table)
{
string encoding;
Expand All @@ -85,4 +105,30 @@ std::ostream& operator<<(std::ostream& out, const CountTable& count_table)
return out;
}

CountTable collapseTopElements(const CountTable& countTable, int upperBound)
{
if (upperBound < 0)
{
throw std::logic_error("CountTables cannot be truncated to negative values");
}

CountTable truncatedTable;

for (auto element : countTable.getElementsWithNonzeroCounts())
{
auto count = countTable.countOf(element);

if (element < upperBound)
{
truncatedTable.setCountOf(element, count);
}
else
{
truncatedTable.incrementCountOf(upperBound, count);
}
}

return truncatedTable;
}

}
35 changes: 24 additions & 11 deletions common/CountTable.hh
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,48 @@
#include <cstdint>
#include <iostream>
#include <map>
#include <memory>
#include <vector>

namespace ehunter {

namespace ehunter
{

class CountTable
{
public:
using const_iterator = std::map<int32_t, int32_t>::const_iterator;
const_iterator begin() const { return elements_to_counts_.begin(); }
const_iterator end() const { return elements_to_counts_.end(); }
const_iterator begin() const { return elementsToCounts_.begin(); }
const_iterator end() const { return elementsToCounts_.end(); }

CountTable(){};
explicit CountTable(const std::map<int32_t, int32_t>& elements_to_counts)
: elements_to_counts_(elements_to_counts){};
CountTable() = default;
explicit CountTable(std::map<int32_t, int32_t> elementsToCounts)
: elementsToCounts_(std::move(elementsToCounts)) {};
CountTable(const CountTable& other) = default;

void clear() { elements_to_counts_.clear(); }
void clear() { elementsToCounts_.clear(); }

int32_t countOf(int32_t element) const;
void incrementCountOf(int32_t element);
void incrementCountOf(int32_t element, int32_t increment = 1);
void setCountOf(int32_t element, int32_t count);
std::vector<int32_t> getElementsWithNonzeroCounts() const;

bool operator==(const CountTable& other) const { return elements_to_counts_ == other.elements_to_counts_; }
CountTable& operator=(const CountTable& other);
bool operator==(const CountTable& other) const { return elementsToCounts_ == other.elementsToCounts_; }

private:
std::map<int32_t, int32_t> elements_to_counts_;
std::map<int32_t, int32_t> elementsToCounts_;
};

std::ostream& operator<<(std::ostream& out, const CountTable& count_table);

/**
* Collapses the elements above a specified bound; the counts of collapsed elements are summed together.
*
* @param countTable: Any count table
* @param upperBound: Elements above this bound are collapsed
* @return Table where counts of all elements exceeding the bound are removed and their counts are added to bound's
* count
*/
CountTable collapseTopElements(const CountTable& countTable, int upperBound);

}
50 changes: 35 additions & 15 deletions common/tests/CountTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,50 @@ using namespace ehunter;

TEST(InitializationOfCountTable, TypicalCountTable_Initialized)
{
const map<int32_t, int32_t> elements_and_counts = { { 1, 2 }, { 3, 5 } };
CountTable count_table(elements_and_counts);
EXPECT_EQ(2, count_table.countOf(1));
EXPECT_EQ(0, count_table.countOf(2));
EXPECT_EQ(5, count_table.countOf(3));
const map<int32_t, int32_t> elementsAndCounts = { { 1, 2 }, { 3, 5 } };
CountTable countTable(elementsAndCounts);
EXPECT_EQ(2, countTable.countOf(1));
EXPECT_EQ(0, countTable.countOf(2));
EXPECT_EQ(5, countTable.countOf(3));
}

TEST(ManipulatingCountTable, TypicalOperations_TableUpdated)
{
CountTable count_table;
count_table.incrementCountOf(4);
EXPECT_EQ(1, count_table.countOf(4));
CountTable countTable;
countTable.incrementCountOf(4);
EXPECT_EQ(1, countTable.countOf(4));

count_table.setCountOf(4, 3);
EXPECT_EQ(3, count_table.countOf(4));
countTable.setCountOf(4, 3);
EXPECT_EQ(3, countTable.countOf(4));
}

TEST(ObtainingElementsWithNonzeroCounts, TypicalCountTable_ElementsObtained)
{
const map<int32_t, int32_t> elements_and_counts = { { 1, 2 }, { 3, 5 }, { 7, 15 } };
CountTable count_table(elements_and_counts);
const map<int32_t, int32_t> elementsAndCounts = { { 1, 2 }, { 3, 5 }, { 7, 15 } };
CountTable countTable(elementsAndCounts);

count_table.setCountOf(3, 0);
countTable.setCountOf(3, 0);

vector<int32_t> expected_elements = { 1, 7 };
EXPECT_EQ(expected_elements, count_table.getElementsWithNonzeroCounts());
vector<int32_t> expectedElements = { 1, 7 };
EXPECT_EQ(expectedElements, countTable.getElementsWithNonzeroCounts());
}

TEST(TruncatingCounts, TypicalCountTables_CountsTruncated)
{
const map<int32_t, int32_t> elementsAndCounts = { { 1, 2 }, { 3, 5 }, { 7, 15 }, { 10, 2 } };
CountTable countTable(elementsAndCounts);

{
const map<int32_t, int32_t> expectedElementsAndCounts = { { 1, 2 }, { 3, 5 }, { 5, 17 } };
const CountTable expectedCountTable(expectedElementsAndCounts);

EXPECT_EQ(expectedCountTable, collapseTopElements(countTable, 5));
}

{
const map<int32_t, int32_t> expectedElementsAndCounts = { { 1, 2 }, { 3, 22 } };
const CountTable expectedCountTable(expectedElementsAndCounts);

EXPECT_EQ(expectedCountTable, collapseTopElements(countTable, 3));
}
}
114 changes: 69 additions & 45 deletions input/LocusSpecDecoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

#include "input/LocusSpecDecoding.hh"

#include <algorithm>
#include <memory>
#include <stdexcept>

#include <boost/optional.hpp>

Expand Down Expand Up @@ -59,9 +61,24 @@ static string extendLocusStructure(
const Reference& reference, const vector<GenomicRegion>& referenceRegions, const string& flanklessLocusStructure)
{

const auto& leftFlank = referenceRegions.front();
const auto& rightFlank = referenceRegions.back();
return reference.getSequence(leftFlank) + flanklessLocusStructure + reference.getSequence(rightFlank);
const auto& leftFlankRegion = referenceRegions.front();
string leftFlank = reference.getSequence(leftFlankRegion);

const auto& rightFlankRegion = referenceRegions.back();
string rightFlank = reference.getSequence(rightFlankRegion);

const size_t maxNsAllowedInFlanks = 5;
const size_t numNsInLeftFlank = std::count(leftFlank.begin(), leftFlank.end(), 'N');
const size_t numNsInRightFlank = std::count(rightFlank.begin(), rightFlank.end(), 'N');

if (numNsInLeftFlank + numNsInRightFlank > maxNsAllowedInFlanks)
{
const string message = "Flanks can contain at most " + to_string(maxNsAllowedInFlanks)
+ " characters N but found " + to_string(numNsInLeftFlank + numNsInRightFlank) + " Ns";
throw std::runtime_error(message);
}

return leftFlank + flanklessLocusStructure + rightFlank;
}

static vector<GenomicRegion>
Expand Down Expand Up @@ -238,64 +255,71 @@ LocusSpecification decodeLocusSpecification(
const LocusDescriptionFromUser& userDescription, Sex sampleSex, const Reference& reference,
const HeuristicParameters& heuristicParams)
{
assertValidity(userDescription);
try
{
assertValidity(userDescription);

const int kExtensionLength = heuristicParams.regionExtensionLength();
auto referenceRegionsWithFlanks = addFlankingRegions(kExtensionLength, userDescription.referenceRegions);
auto completeLocusStructure
= extendLocusStructure(reference, referenceRegionsWithFlanks, userDescription.locusStructure);
const int kExtensionLength = heuristicParams.regionExtensionLength();
auto referenceRegionsWithFlanks = addFlankingRegions(kExtensionLength, userDescription.referenceRegions);
auto completeLocusStructure
= extendLocusStructure(reference, referenceRegionsWithFlanks, userDescription.locusStructure);

GraphBlueprint blueprint = decodeFeaturesFromRegex(completeLocusStructure);
graphtools::Graph locusGraph = makeRegionGraph(blueprint, userDescription.locusId);
auto completeReferenceRegions = addReferenceRegionsForInterruptions(blueprint, referenceRegionsWithFlanks);
GraphBlueprint blueprint = decodeFeaturesFromRegex(completeLocusStructure);
graphtools::Graph locusGraph = makeRegionGraph(blueprint, userDescription.locusId);
auto completeReferenceRegions = addReferenceRegionsForInterruptions(blueprint, referenceRegionsWithFlanks);

GenomicRegion referenceRegionForEntireLocus = mergeRegions(userDescription.referenceRegions);
GenomicRegion referenceRegionForEntireLocus = mergeRegions(userDescription.referenceRegions);

vector<GenomicRegion> targetReadExtractionRegions;
for (const GenomicRegion& region : userDescription.targetRegions)
{
targetReadExtractionRegions.push_back(region.extend(kExtensionLength));
}
if (targetReadExtractionRegions.empty())
{
targetReadExtractionRegions.push_back(referenceRegionForEntireLocus.extend(kExtensionLength));
}
vector<GenomicRegion> targetReadExtractionRegions;
for (const GenomicRegion& region : userDescription.targetRegions)
{
targetReadExtractionRegions.push_back(region.extend(kExtensionLength));
}
if (targetReadExtractionRegions.empty())
{
targetReadExtractionRegions.push_back(referenceRegionForEntireLocus.extend(kExtensionLength));
}

const auto& contigName = reference.contigInfo().getContigName(referenceRegionForEntireLocus.contigIndex());
AlleleCount expectedAlleleCount = determineExpectedAlleleCount(sampleSex, contigName);
const auto& contigName = reference.contigInfo().getContigName(referenceRegionForEntireLocus.contigIndex());
AlleleCount expectedAlleleCount = determineExpectedAlleleCount(sampleSex, contigName);

NodeToRegionAssociation referenceRegionsOfGraphNodes
= associateNodesWithReferenceRegions(blueprint, locusGraph, completeReferenceRegions);
NodeToRegionAssociation referenceRegionsOfGraphNodes
= associateNodesWithReferenceRegions(blueprint, locusGraph, completeReferenceRegions);

LocusSpecification locusSpec(
userDescription.locusId, targetReadExtractionRegions, expectedAlleleCount, locusGraph,
referenceRegionsOfGraphNodes);
locusSpec.setOfftargetReadExtractionRegions(userDescription.offtargetRegions);
LocusSpecification locusSpec(
userDescription.locusId, targetReadExtractionRegions, expectedAlleleCount, locusGraph,
referenceRegionsOfGraphNodes);
locusSpec.setOfftargetReadExtractionRegions(userDescription.offtargetRegions);

int variantIndex = 0;
for (const auto& feature : blueprint)
{
if (doesFeatureDefineVariant(feature.type))
int variantIndex = 0;
for (const auto& feature : blueprint)
{
const GenomicRegion& referenceRegion = userDescription.referenceRegions.at(variantIndex);
if (doesFeatureDefineVariant(feature.type))
{
const GenomicRegion& referenceRegion = userDescription.referenceRegions.at(variantIndex);

VariantTypeFromUser variantDescription = userDescription.variantTypesFromUser.at(variantIndex);
const string& variantId = userDescription.variantIds[variantIndex];
VariantType variantType = determineVariantType(feature.type);
VariantSubtype variantSubtype = determineVariantSubtype(feature.type, variantDescription, referenceRegion);
VariantTypeFromUser variantDescription = userDescription.variantTypesFromUser.at(variantIndex);
const string& variantId = userDescription.variantIds[variantIndex];
VariantType variantType = determineVariantType(feature.type);
VariantSubtype variantSubtype
= determineVariantSubtype(feature.type, variantDescription, referenceRegion);

optional<NodeId> optionalReferenceNode = determineReferenceNode(feature, reference, referenceRegion);
optional<NodeId> optionalReferenceNode = determineReferenceNode(feature, reference, referenceRegion);

VariantClassification classification(variantType, variantSubtype);
VariantClassification classification(variantType, variantSubtype);

locusSpec.addVariantSpecification(
variantId, classification, referenceRegion, feature.nodeIds, optionalReferenceNode);
locusSpec.addVariantSpecification(
variantId, classification, referenceRegion, feature.nodeIds, optionalReferenceNode);

++variantIndex;
++variantIndex;
}
}
return locusSpec;
}
catch (const std::exception& e)
{
throw std::runtime_error("Error loading locus " + userDescription.locusId + ": " + e.what());
}

return locusSpec;
}

void assertValidity(const LocusDescriptionFromUser& userDescription)
Expand Down
Loading

0 comments on commit 9a0376d

Please sign in to comment.