diff --git a/Makefile.am b/Makefile.am index 6f950100e..978900e8f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -54,6 +54,7 @@ SUBDIRS = \ FilterGraph \ GapFiller \ Sealer \ + RResolver \ AdjList \ vendor \ $(GTest) \ diff --git a/RResolver/BloomFilters.cpp b/RResolver/BloomFilters.cpp index f3da98a22..d9cc679fa 100644 --- a/RResolver/BloomFilters.cpp +++ b/RResolver/BloomFilters.cpp @@ -1,352 +1,290 @@ #include "BloomFilters.h" - -#include "RUtils.h" #include "RAlgorithmsShort.h" +#include "RUtils.h" #include "Common/IOUtil.h" #include "Common/Options.h" #include "DataLayer/FastaReader.h" -#include "vendor/nthash/stHashIterator.hpp" -#include "vendor/nthash/ntHashIterator.hpp" -#include "btllib/include/btllib/seq_reader.hpp" +#include "btllib/seq_reader.hpp" #include -#include #include #include +#include +#include -static std::vector -generateSpacedSeedsPatterns(const int count, const int size, const int misses) { - assert(count < size); - assert(misses < count); - assert(misses > 0); - - std::vector seeds; - auto randomEngine = std::mt19937(); - std::vector seedsPermutation; - - for (int i = 0; i < count; i++) { - seeds.push_back(std::string(size, '1')); - seedsPermutation.push_back(i); - } - - for (int i = 0; i < (size + 1) / 2; i++) { - std::shuffle(seedsPermutation.begin(), seedsPermutation.end(), randomEngine); - for (int j = 0; j < count; j++) { - char c = j < misses ? '0' : '1'; - seeds[seedsPermutation[j]][i] = c; - } - if (i < size / 2) { - for (int j = 0; j < count; j++) { - seeds[count - j - 1][size - i - 1] = seeds[j][i]; - } - } - } - - return seeds; -} - -void QCSpacedSeedsPatterns(const std::vector &patterns) { - int k = patterns[0].size(); - - for (const auto &pattern : patterns) { - int patternBasesCovered = k; - bool hasZero = false; - for (char c : pattern) { - if (c == '0') { - hasZero = true; - patternBasesCovered--; - break; - } - } - if (opt::verbose && !hasZero) { - std::cerr << "A spaced seed has no zeros!\n"; - } - if (opt::verbose && (patternBasesCovered < k * SPACED_SEEDS_QC_MIN_BASES_PATTERN)) { - std::cerr << "A spaced seed pattern does not cover enough bases!\n"; - } - } - - auto combinations = genCombinations(SPACED_SEEDS_COUNT, SPACED_SEEDS_MIN_HITS); - std::string overallErrorCoverage(k, '1'); - int overallErrorsCovered = 0; - std::string overallBaseCoverage(k, '0'); - int overallBasesCovered = 0; - std::string worstCombinationCoverage(k, '1'); - int worstCombinationBasesCovered = k; - - for (auto combination : combinations) { - std::string combinationCoverage(k, '0'); - int combinationBasesCovered = 0; - for (auto index : combination) { - const auto &pattern = patterns[index]; - for (unsigned i = 0; i < pattern.size(); i++) { - if (pattern[i] == '1' && combinationCoverage[i] != '1') { - combinationCoverage[i] = '1'; - combinationBasesCovered++; - } - } - } - if (combinationBasesCovered < worstCombinationBasesCovered) { - worstCombinationCoverage = combinationCoverage; - worstCombinationBasesCovered = combinationBasesCovered; - } - if (opt::verbose && (combinationBasesCovered < combinationCoverage.size() * SPACED_SEEDS_QC_MIN_BASES_COMBINATION)) { - std::cerr << "A spaced seed combination does not cover enough bases!\n"; - } - - for (unsigned i = 0; i < combinationCoverage.size(); i++) { - if (combinationCoverage[i] == '0') { - if (overallErrorCoverage[i] != '0') { - overallErrorCoverage[i] = '0'; - overallErrorsCovered++; - } - } else { - if (overallBaseCoverage[i] != '1') { - overallBaseCoverage[i] = '1'; - overallBasesCovered++; - } - } - } - } - - if (opt::verbose) { - std::cerr << std::fixed; - std::cerr << "Worst combination coverage: " << worstCombinationCoverage << '\n'; - std::cerr << "Worst combination bases covered: " << worstCombinationBasesCovered / double(k) * 100.0 << "%\n"; - std::cerr << "Overall base coverage:\n" << overallBaseCoverage << '\n'; - std::cerr << "Bases covered: " << overallBasesCovered / double(k) * 100.0 << "%\n"; - std::cerr << "Overall error coverage:\n" << overallErrorCoverage << '\n'; - std::cerr << "Errors covered: " << overallErrorsCovered / double(k) * 100.0 << "%\n"; - std::cerr << std::defaultfloat << std::endl; - } - - if (opt::verbose && (overallErrorsCovered < k * SPACED_SEEDS_QC_MIN_ERRORS_OVERALL)) { - std::cerr << "Spaced seeds do not cover enough error positions!\n\n"; - } - if (opt::verbose && (overallBasesCovered < k * SPACED_SEEDS_QC_MIN_BASES_OVERALL)) { - std::cerr << "Spaced seeds do not cover enough base positions!\n\n"; - } - std::cerr << std::flush; -} - -size_t VanillaBloomFilter::getPop() const { - size_t i, popBF = 0; - #pragma omp parallel for reduction(+:popBF) - for (i = 0; i < (m_size + 7) / 8; i++) { - popBF = popBF + popCnt(m_filter[i]); - } - return popBF; -} - -void VanillaBloomFilter::loadSequence(const Sequence& sequence) { - if (sequence.size() >= m_kmerSize) { -#if RMER_LOAD_STEP > 1 - unsigned offset = 0; -#endif - for (ntHashIterator it(sequence, HASH_NUM, m_kmerSize); it != ntHashIterator::end();) - { - insert(*it); -#if RMER_LOAD_STEP > 1 - for (unsigned i = 0; (i < RMER_LOAD_STEP) && (it != ntHashIterator::end()); ++i, ++it, ++offset) { - if (offset == sequence.size() - m_kmerSize) { insert(*it); } - } -#else - ++it; -#endif - } - } -} +btllib::KmerBloomFilter *g_vanillaBloom = nullptr; +btllib::SeedBloomFilter *g_spacedSeedsBloom = nullptr; -SpacedSeedsBloomFilter::SpacedSeedsBloomFilter(size_t filterSize, int kmerSize): - BloomFilter(filterSize, SPACED_SEEDS_COUNT * SPACED_SEEDS_HASH_PER_SEED, kmerSize) +static std::vector +generateSpacedSeedsPatterns(const int count, const int size, const int misses) { - const auto patterns = generateSpacedSeedsPatterns(SPACED_SEEDS_COUNT, kmerSize, SPACED_SEEDS_MISSES); - for (const auto &pattern : patterns) { - assert(pattern.size() == getKmerSize()); - } - if (SPACED_SEEDS_QC) { - QCSpacedSeedsPatterns(patterns); - } - spacedSeeds = stHashIterator::parseSeed(patterns); + assert(count < size); + assert(misses < count); + assert(misses > 0); + + std::vector seeds; + auto randomEngine = std::mt19937(); + std::vector seedsPermutation; + + for (int i = 0; i < count; i++) { + seeds.push_back(std::string(size, '1')); + seedsPermutation.push_back(i); + } + + for (int i = 0; i < (size + 1) / 2; i++) { + std::shuffle(seedsPermutation.begin(), seedsPermutation.end(), randomEngine); + for (int j = 0; j < count; j++) { + char c = j < misses ? '0' : '1'; + seeds[seedsPermutation[j]][i] = c; + } + if (i < size / 2) { + for (int j = 0; j < count; j++) { + seeds[count - j - 1][size - i - 1] = seeds[j][i]; + } + } + } + + return seeds; } -size_t SpacedSeedsBloomFilter::getPop() const { - size_t i, popBF = 0; - #pragma omp parallel for reduction(+:popBF) - for (i = 0; i < (m_size + 7) / 8; i++) { - popBF = popBF + popCnt(m_filter[i]); - } - return popBF; -} - -double SpacedSeedsBloomFilter::getFPR() const { - const double occupancy = double(getPop()) / double(m_size); - const double singleSeedFpr = std::pow(occupancy, SPACED_SEEDS_HASH_PER_SEED); - const double totalFpr = 1 - std::pow(1 - singleSeedFpr, SPACED_SEEDS_COUNT); - return totalFpr; +void +QCSpacedSeedsPatterns(const std::vector& patterns) +{ + int k = patterns[0].size(); + + for (const auto& pattern : patterns) { + int patternBasesCovered = k; + bool hasZero = false; + for (const char c : pattern) { + if (c == '0') { + hasZero = true; + patternBasesCovered--; + break; + } + } + if (opt::verbose && !hasZero) { + std::cerr << "A spaced seed has no zeros!\n"; + } + if (opt::verbose && (patternBasesCovered < k * SPACED_SEEDS_QC_MIN_BASES_PATTERN)) { + std::cerr << "A spaced seed pattern does not cover enough bases!\n"; + } + } + + const auto combinations = genCombinations(SPACED_SEEDS_COUNT, SPACED_SEEDS_MIN_HITS); + std::string overallErrorCoverage(k, '1'); + int overallErrorsCovered = 0; + std::string overall_base_coverage(k, '0'); + int overallBasesCovered = 0; + std::string worstCombinationCoverage(k, '1'); + int worstCombinationBasesCovered = k; + + for (auto combination : combinations) { + std::string combinationCoverage(k, '0'); + int combinationBasesCovered = 0; + for (auto index : combination) { + const auto& pattern = patterns[index]; + for (unsigned i = 0; i < pattern.size(); i++) { + if (pattern[i] == '1' && combinationCoverage[i] != '1') { + combinationCoverage[i] = '1'; + combinationBasesCovered++; + } + } + } + if (combinationBasesCovered < worstCombinationBasesCovered) { + worstCombinationCoverage = combinationCoverage; + worstCombinationBasesCovered = combinationBasesCovered; + } + if (opt::verbose && (combinationBasesCovered < + combinationCoverage.size() * SPACED_SEEDS_QC_MIN_BASES_COMBINATION)) { + std::cerr << "A spaced seed combination does not cover enough bases!\n"; + } + + for (unsigned i = 0; i < combinationCoverage.size(); i++) { + if (combinationCoverage[i] == '0') { + if (overallErrorCoverage[i] != '0') { + overallErrorCoverage[i] = '0'; + overallErrorsCovered++; + } + } else { + if (overall_base_coverage[i] != '1') { + overall_base_coverage[i] = '1'; + overallBasesCovered++; + } + } + } + } + + if (opt::verbose) { + std::cerr << std::fixed; + std::cerr << "Worst combination coverage: " << worstCombinationCoverage << '\n'; + std::cerr << "Worst combination bases covered: " + << worstCombinationBasesCovered / double(k) * 100.0 << "%\n"; + std::cerr << "Overall base coverage:\n" << overall_base_coverage << '\n'; + std::cerr << "Bases covered: " << overallBasesCovered / double(k) * 100.0 << "%\n"; + std::cerr << "Overall error coverage:\n" << overallErrorCoverage << '\n'; + std::cerr << "Errors covered: " << overallErrorsCovered / double(k) * 100.0 << "%\n"; + std::cerr << std::defaultfloat << std::endl; + } + + if (opt::verbose && (overallErrorsCovered < k * SPACED_SEEDS_QC_MIN_ERRORS_OVERALL)) { + std::cerr << "Spaced seeds do not cover enough error positions!\n\n"; + } + if (opt::verbose && (overallBasesCovered < k * SPACED_SEEDS_QC_MIN_BASES_OVERALL)) { + std::cerr << "Spaced seeds do not cover enough base positions!\n\n"; + } + std::cerr << std::flush; } -void SpacedSeedsBloomFilter::loadSequence(const Sequence& sequence) { - if (sequence.size() >= m_kmerSize) { -#if RMER_LOAD_STEP > 1 - unsigned offset = 0; +static void +loadReads(const std::vector& readFilepaths, int r) +{ + const size_t LOAD_PROGRESS_STEP = 100000; + const size_t PARALLEL_IO_SIZE = 100; + +#if _OPENMP + size_t threads_per_task = omp_get_max_threads() / PARALLEL_IO_SIZE; + if (threads_per_task < 1) { + threads_per_task = 1; + } + omp_set_nested(1); #endif - for (stHashIterator it(sequence, spacedSeeds, SPACED_SEEDS_COUNT, SPACED_SEEDS_HASH_PER_SEED, m_kmerSize); - it != stHashIterator::end();) - { - insert(*it); -#if RMER_LOAD_STEP > 1 - for (unsigned i = 0; (i < RMER_LOAD_STEP) && (it != stHashIterator::end()); ++i, ++it, ++offset) { - if (offset == sequence.size() - m_kmerSize) { insert(*it); } - } +#pragma omp parallel +#pragma omp single + { + int counter = 0; +#if _OPENMP + for (const auto& p : readFilepaths) { + const auto path = p; // Clang complains that range based loop is copying without this #else - ++it; + for (const auto& path : readFilepaths) { #endif - } - } -} - -static void loadReads(const std::vector& readFilepaths, int r) { - const size_t LOAD_PROGRESS_STEP = 100000; - const size_t PARALLEL_IO_SIZE = 10; - - size_t kmersPerReadMode = 0; - size_t kmersPerReadModeCount = 0; - Histogram kmersPerReadHist; - - size_t threads_per_task = omp_get_max_threads() / PARALLEL_IO_SIZE; - if (threads_per_task < 1) { threads_per_task = 1; } - omp_set_nested(1); - #pragma omp parallel - #pragma omp single - { - int counter = 0; - for (const auto path : readFilepaths) - { - #pragma omp task firstprivate(path) - { - assert(!path.empty()); - if (opt::verbose) { - #pragma omp critical (cerr) - std::cerr << "Loading reads from `" << path << "'...\n"; - } - - btllib::SeqReader reader(path); - uint64_t readCount = 0, dropped = 0; - #pragma omp parallel num_threads(threads_per_task) - for (btllib::SeqReader::Record record; record = reader.read();) { - if (int(record.seq.size()) != ReadBatch::current.size) { continue; } - bool loaded = false; - size_t qual_threshold_position = 0; - for (int j = record.qual.size() - 1; j >= 0; j--) { - if (record.qual[j] >= RMER_QUALITY_THRESHOLD) { - qual_threshold_position = j; - break; - } - } - size_t substr_len = std::min(long(r + opt::threshold - 1), - std::max(long(r + opt::threshold - 1), long(qual_threshold_position + 1))); - std::string seq = record.seq.substr(0, substr_len); - if (seq.size() >= g_vanillaBloom->getKmerSize()) { - #pragma omp critical (kmersPerReadHist) - kmersPerReadHist.insert(seq.size() - r + 1); - - g_vanillaBloom->loadSequence(seq); - if (opt::error_correction) { - g_spacedSeedsBloom->loadSequence(seq); - } - loaded = true; - } - if (opt::verbose) - #pragma omp critical (cerr) - { - if (!loaded) { dropped++; } - readCount++; - if (readCount % LOAD_PROGRESS_STEP == 0) - std::cerr << "\rLoaded " << readCount - << " reads into Bloom filter."; - } - } - if (opt::verbose) { - std::cerr << std::endl; - } - } - counter++; - if (counter % PARALLEL_IO_SIZE == 0) { - #pragma omp taskwait - } - } - } - - for (const auto& entry : kmersPerReadHist) { - if (entry.second > kmersPerReadModeCount) { - kmersPerReadMode = entry.first; - kmersPerReadModeCount = entry.second; - } - } - if (opt::verbose) { - std::cerr << "Kmers per read mode: " << kmersPerReadMode << '\n'; - } +#pragma omp task firstprivate(path) + { + assert(!path.empty()); + if (opt::verbose) { +#pragma omp critical(cerr) + std::cerr << "Loading reads from `" << path << "'...\n"; + } + + btllib::SeqReader reader(path); + uint64_t readCount = 0; +#pragma omp parallel num_threads(threads_per_task) + for (btllib::SeqReader::Record record; (record = reader.read());) { + if (int(record.seq.size()) != ReadSize::current.size) { + continue; + } + std::string seq = record.seq.substr(0, r + opt::extract - 1); + if (seq.size() >= g_vanillaBloom->get_k()) { + g_vanillaBloom->insert(seq); + if (opt::errorCorrection) { + g_spacedSeedsBloom->insert(seq); + } + if (opt::verbose) +#pragma omp critical(cerr) + { + readCount++; + if (readCount % LOAD_PROGRESS_STEP == 0) { + std::cerr << "\rLoaded " << readCount << " reads into Bloom filter."; + } + } + } + } + if (opt::verbose) { + std::cerr << std::endl; + } + } + counter++; + if (counter % PARALLEL_IO_SIZE == 0) { +#pragma omp taskwait + } + } + } } -VanillaBloomFilter* g_vanillaBloom = nullptr; -SpacedSeedsBloomFilter* g_spacedSeedsBloom = nullptr; - -void buildFilters(const std::vector& readFilepaths, const int r, const size_t bloomBytesTotal) +void +buildFilters( + const std::vector& readFilepaths, + const int r, + const size_t bloomBytesTotal) { - assert(bloomBytesTotal > 0); - try { - if (opt::verbose) { std::cerr << "Building Bloom filter(s) for r value " << r << '\n'; } - - delete g_vanillaBloom; - delete g_spacedSeedsBloom; - - size_t bloomBitsVanilla = size_t(bloomBytesTotal) * 8; - size_t bloomBitsSpacedSeeds = 0; - - if (opt::error_correction) { - double vanillaRatio = VANILLA_TO_SEEDS_MEM_RATIO * double(HASH_NUM) / (double(HASH_NUM) + double(SPACED_SEEDS_COUNT * SPACED_SEEDS_HASH_PER_SEED)); - bloomBitsVanilla = size_t(vanillaRatio * bloomBytesTotal) * 8; - bloomBitsSpacedSeeds = bloomBytesTotal * 8 - bloomBitsVanilla; - } - - - if (opt::verbose > 1) { - if (opt::error_correction) { - std::cerr << "Total Bloom filter memory = " << bytesToSI(bloomBytesTotal) << '\n'; - std::cerr << "Vanilla Bloom filter memory = " << bytesToSI(bloomBitsVanilla / 8) << '\n'; - std::cerr << "Spaced seeds Bloom filter memory = " << bytesToSI(bloomBitsSpacedSeeds / 8) << '\n'; - } else { - std::cerr << "Vanilla Bloom filter memory = " << bytesToSI(bloomBitsVanilla / 8) << '\n'; - } - } - - g_vanillaBloom = new VanillaBloomFilter(bloomBitsVanilla, r); - if (opt::error_correction) { - g_spacedSeedsBloom = new SpacedSeedsBloomFilter(bloomBitsSpacedSeeds, r); - } - - loadReads(readFilepaths, r); - - if (opt::verbose > 1) { - const auto vanillaFPR = g_vanillaBloom->getFPR(); - - std::cerr << "Vanilla Bloom filter (k = " << std::to_string(g_vanillaBloom->getKmerSize()) << setprecision(3) - << ") occupancy = " << double(g_vanillaBloom->getPop()) / double(g_vanillaBloom->getFilterSize()) * 100.0 << "%" - << ", FPR = " << vanillaFPR * 100.0 << "%" << endl; - - if (opt::error_correction) { - std::cerr << "FPR for base substitution = " << - (1 - std::pow(1 - vanillaFPR, 3 * g_vanillaBloom->getKmerSize() / SPACED_SEEDS_COUNT * SPACED_SEEDS_SNP_FRACTION)) * 100.0 << "%" << std::endl; - - std::cerr << "Spaced seeds Bloom filter (k = " << std::to_string(g_spacedSeedsBloom->getKmerSize()) << setprecision(3) - << ") occupancy = " << double(g_spacedSeedsBloom->getPop()) / double(g_spacedSeedsBloom->getFilterSize()) * 100.0 << "%" - << ", FPR = " << g_spacedSeedsBloom->getFPR() * 100.0 << "%" << endl; - } - } - } catch (const std::bad_alloc& e) { - std::cerr << "Bloom filter allocation failed: " << e.what() << '\n'; - exit(EXIT_FAILURE); - } + assert(bloomBytesTotal > 0); + try { + if (opt::verbose) { + std::cerr << "Building Bloom filter(s) for r value " << r << '\n'; + } + + delete g_vanillaBloom; + delete g_spacedSeedsBloom; + + size_t bloomBytesVanilla = size_t(bloomBytesTotal); + size_t bloomBytesSpacedSeeds = 0; + + if (opt::errorCorrection) { + double vanillaRatio = + VANILLA_TO_SEEDS_MEM_RATIO * double(HASH_NUM) / + (double(HASH_NUM) + double(SPACED_SEEDS_COUNT * SPACED_SEEDS_HASHES_PER_SEED)); + bloomBytesVanilla = size_t(vanillaRatio * bloomBytesTotal); + bloomBytesSpacedSeeds = bloomBytesTotal - bloomBytesVanilla; + } + + if (opt::verbose > 1) { + if (opt::errorCorrection) { + std::cerr << "Total Bloom filter memory = " << bytesToSI(bloomBytesTotal) << '\n'; + std::cerr << "Vanilla Bloom filter memory = " << bytesToSI(bloomBytesVanilla) + << '\n'; + std::cerr << "Spaced seeds Bloom filter memory = " + << bytesToSI(bloomBytesSpacedSeeds) << '\n'; + } else { + std::cerr << "Vanilla Bloom filter memory = " << bytesToSI(bloomBytesVanilla) + << '\n'; + } + } + + g_vanillaBloom = new btllib::KmerBloomFilter(bloomBytesVanilla, HASH_NUM, r); + if (opt::errorCorrection) { + const auto patterns = + generateSpacedSeedsPatterns(SPACED_SEEDS_COUNT, r, SPACED_SEEDS_MISSES); + for (const auto& pattern : patterns) { + assert(pattern.size() == size_t(r)); + } + if (SPACED_SEEDS_QC) { + QCSpacedSeedsPatterns(patterns); + } + g_spacedSeedsBloom = new btllib::SeedBloomFilter(bloomBytesSpacedSeeds, r, patterns, SPACED_SEEDS_HASHES_PER_SEED); + } + + loadReads(readFilepaths, r); + + if (opt::verbose > 1) { + const auto vanillaFPR = g_vanillaBloom->get_fpr(); + + std::cerr << "Vanilla Bloom filter (k = " + << std::to_string(g_vanillaBloom->get_k()) << std::setprecision(3) + << ") occupancy = " + << g_vanillaBloom->get_occupancy() * 100.0 + << "%" + << ", FPR = " << vanillaFPR * 100.0 << "%" << std::endl; + + if (opt::errorCorrection) { + std::cerr << "FPR for base substitution = " + << (1 - std::pow( + 1 - vanillaFPR, + 3 * g_vanillaBloom->get_k() / SPACED_SEEDS_COUNT * + SPACED_SEEDS_SNP_FRACTION)) * + 100.0 + << "%" << std::endl; + + std::cerr << "Spaced seeds Bloom filter (k = " + << std::to_string(g_spacedSeedsBloom->get_k()) << std::setprecision(3) + << ") occupancy = " + << g_spacedSeedsBloom->get_occupancy() * 100.0 + << "%" + << ", FPR = " << g_spacedSeedsBloom->get_fpr() * 100.0 << "%" << std::endl; + } + } + } catch (const std::bad_alloc& e) { + std::cerr << "Bloom filter allocation failed: " << e.what() << '\n'; + exit(EXIT_FAILURE); + } } diff --git a/RResolver/BloomFilters.h b/RResolver/BloomFilters.h index 530e5ba14..5f702dd33 100644 --- a/RResolver/BloomFilters.h +++ b/RResolver/BloomFilters.h @@ -1,95 +1,34 @@ #ifndef RRESOLVER_BLOOMFILTERS_H #define RRESOLVER_BLOOMFILTERS_H 1 -#include "Common/Sequence.h" -#include "vendor/btl_bloomfilter/BloomFilter.hpp" +#include "btllib/bloom_filter.hpp" -#include -#include -#include #include #include +#include +#include +#include -const int HASH_NUM = 7; // For vanilla bloom filter -const int SPACED_SEEDS_HASH_PER_SEED = 5; -const int SPACED_SEEDS_COUNT = 6; -const int SPACED_SEEDS_MIN_HITS = 1; -const double SPACED_SEEDS_MISSES = 1; -const bool SPACED_SEEDS_QC = true; -const double SPACED_SEEDS_QC_MIN_BASES_PATTERN = 0.70; -const double SPACED_SEEDS_QC_MIN_BASES_COMBINATION = 0.70; -const double SPACED_SEEDS_QC_MIN_BASES_OVERALL = 0.65; -const double SPACED_SEEDS_QC_MIN_ERRORS_OVERALL = 0.65; +const int HASH_NUM = 7; // For vanilla bloom filter +const int SPACED_SEEDS_HASHES_PER_SEED = 5; +const int SPACED_SEEDS_COUNT = 6; +const int SPACED_SEEDS_MIN_HITS = 1; +const double SPACED_SEEDS_MISSES = 1; +const bool SPACED_SEEDS_QC = true; +const double SPACED_SEEDS_QC_MIN_BASES_PATTERN = 0.70; +const double SPACED_SEEDS_QC_MIN_BASES_COMBINATION = 0.70; +const double SPACED_SEEDS_QC_MIN_BASES_OVERALL = 0.65; +const double SPACED_SEEDS_QC_MIN_ERRORS_OVERALL = 0.65; const unsigned MAX_ERRORS_PER_RMER = 1; -const unsigned RMER_LOAD_STEP = 1; -const double VANILLA_TO_SEEDS_MEM_RATIO = 1.15; - -typedef std::vector SpacedSeed; -typedef std::vector SpacedSeeds; - -class VanillaBloomFilter : protected BloomFilter { - -public: - - VanillaBloomFilter(size_t filterSize, int kmerSize): BloomFilter(filterSize, HASH_NUM, kmerSize) {} - - size_t getPop() const; - double getFPR() const { return std::pow(double(getPop())/double(m_size), double(m_hashNum)); } - - unsigned getHashNum() const { return BloomFilter::getHashNum(); } - unsigned getKmerSize() const { return BloomFilter::getKmerSize(); } - size_t getFilterSize() const { return BloomFilter::getFilterSize(); } - - void loadSequence(const Sequence& sequence); - - bool contains(const size_t precomputed[]) const { return BloomFilter::contains(precomputed); } - bool contains(vector const &precomputed) const { return BloomFilter::contains(precomputed); } - -}; - -class SpacedSeedsBloomFilter : protected BloomFilter { - -public: - - SpacedSeedsBloomFilter(size_t filterSize, int kmerSize); - - size_t getPop() const; - double getFPR() const; - - unsigned getHashNum() const { return BloomFilter::getHashNum(); } - unsigned getKmerSize() const { return BloomFilter::getKmerSize(); } - size_t getFilterSize() const { return BloomFilter::getFilterSize(); } - - void loadSequence(const Sequence& sequence); - - SpacedSeeds getHitSeeds(const size_t precomputed[]) const { - SpacedSeeds hitSeeds; - size_t normHash; - unsigned seedHits; - for (unsigned i = 0; i < SPACED_SEEDS_COUNT; i++) { - seedHits = 0; - for (unsigned j = 0; j < SPACED_SEEDS_HASH_PER_SEED; j++) { - normHash = precomputed[i * SPACED_SEEDS_HASH_PER_SEED + j] % m_size; - if (m_filter[normHash / bitsPerChar] & bitMask[normHash % bitsPerChar]) { seedHits++; } - } - if (seedHits == SPACED_SEEDS_HASH_PER_SEED) { - hitSeeds.push_back(spacedSeeds[i]); - } - } - return hitSeeds; - } - - SpacedSeeds getSpacedSeeds() const { return spacedSeeds; } - -private: - - SpacedSeeds spacedSeeds; - -}; +const double VANILLA_TO_SEEDS_MEM_RATIO = 1.15; -extern VanillaBloomFilter* g_vanillaBloom; -extern SpacedSeedsBloomFilter* g_spacedSeedsBloom; +extern btllib::KmerBloomFilter *g_vanillaBloom; +extern btllib::SeedBloomFilter *g_spacedSeedsBloom; -void buildFilters(const std::vector& readFilepaths, const int r, const size_t bloomBytesTotal); +void +buildFilters( + const std::vector& read_filepaths, + const int r, + const size_t bloom_bytes_total); #endif diff --git a/RResolver/Contigs.cpp b/RResolver/Contigs.cpp index 25c2eb052..a525090d5 100644 --- a/RResolver/Contigs.cpp +++ b/RResolver/Contigs.cpp @@ -1,217 +1,258 @@ #include "Contigs.h" #include -#include #include +#include #include Graph g_contigGraph; std::vector g_contigComments; ContigSequences g_contigSequences; -int distanceBetween(const ContigNode& node1, const ContigNode& node2) { - return get(edge_bundle, g_contigGraph, node1, node2).distance; +int +distanceBetween(const ContigNode& node1, const ContigNode& node2) +{ + return get(edge_bundle, g_contigGraph, node1, node2).distance; } -const ContigSequence& getContigSequence(const ContigNode &node) { - const long idx = node.index(); - assert(idx >= 0); - assert(idx < long(g_contigSequences.size())); - return g_contigSequences[idx]; +const ContigSequence& +getContigSequence(const ContigNode& node) +{ + const unsigned int idx = node.index(); + assert(idx < g_contigSequences.size()); + return g_contigSequences[idx]; } -int getContigSize(const ContigNode& node) { - return getContigSequence(node).size(); +int +getContigSize(const ContigNode& node) +{ + return getContigSequence(node).size(); } -const std::string& getContigComment(const ContigNode &node) { - const long id = node.id(); - assert(id >= 0); - assert(id < long(g_contigComments.size())); - return g_contigComments[id]; +const std::string& +getContigComment(const ContigNode& node) +{ + const unsigned int id = node.id(); + assert(id < g_contigComments.size()); + return g_contigComments[id]; } -Sequence getPathSequence(const ContigPath &path) { - assert(path.size() >= 1); - Sequence sequence = Sequence(getContigSequence(path[0])); - for (size_t i = 1; i < path.size(); i++) { - const auto &node = path[i]; - const auto distance = distanceBetween(path[i - 1], path[i]); - const auto overlap = -distance; - assert(overlap >= 0); - Sequence newSequence = Sequence(getContigSequence(node)); - assert(int(sequence.size()) >= overlap); - assert(int(newSequence.size()) >= overlap); - assert(sequence.substr(sequence.size() - overlap) == newSequence.substr(0, overlap)); - sequence += newSequence.substr(overlap); - } - return sequence; +Sequence +getPathSequence(const ContigPath& path) +{ + assert(path.size() >= 1); + Sequence sequence = Sequence(getContigSequence(path[0])); + for (size_t i = 1; i < path.size(); i++) { + const auto& node = path[i]; + const auto distance = distanceBetween(path[i - 1], path[i]); + const auto overlap = -distance; + assert(overlap >= 0); + Sequence newSequence = Sequence(getContigSequence(node)); + assert(int(sequence.size()) >= overlap); + assert(int(newSequence.size()) >= overlap); + assert(sequence.substr(sequence.size() - overlap) == newSequence.substr(0, overlap)); + sequence += newSequence.substr(overlap); + } + return sequence; } -Sequence getPathSequence(const ImaginaryContigPath &path) { - assert(path.size() >= 1); - Sequence sequence = Sequence(getContigSequence(path[0].first)); - for (size_t i = 1; i < path.size(); i++) { - const auto &node = path[i].first; - const auto &distance = path[i].second; - const auto overlap = -distance; - assert(overlap >= 0); - Sequence newSequence = Sequence(getContigSequence(node)); - assert(int(sequence.size()) >= overlap); - assert(int(newSequence.size()) >= overlap); - assert(sequence.substr(sequence.size() - overlap) == newSequence.substr(0, overlap)); - sequence += newSequence.substr(overlap); - } - return sequence; +Sequence +getPathSequence(const ImaginaryContigPath& path) +{ + assert(path.size() >= 1); + Sequence sequence = Sequence(getContigSequence(path[0].first)); + for (size_t i = 1; i < path.size(); i++) { + const auto& node = path[i].first; + const auto& distance = path[i].second; + const auto overlap = -distance; + assert(overlap >= 0); + Sequence newSequence = Sequence(getContigSequence(node)); + assert(int(sequence.size()) >= overlap); + assert(int(newSequence.size()) >= overlap); + assert(sequence.substr(sequence.size() - overlap) == newSequence.substr(0, overlap)); + sequence += newSequence.substr(overlap); + } + return sequence; } -long getContigNumOfKmers(const ContigNode& node) { - return get(vertex_bundle, g_contigGraph, node).coverage; +long +getContigNumOfKmers(const ContigNode& node) +{ + return get(vertex_bundle, g_contigGraph, node).coverage; } -double getContigBaseCoverage(const ContigNode& node) { - return double(getContigNumOfKmers(node)) * opt::k / double(getContigSequence(node).size() - opt::k + 1); +double +getContigBaseCoverage(const ContigNode& node) +{ + return double(getContigNumOfKmers(node)) * opt::k / + double(getContigSequence(node).size() - opt::k + 1); } -void loadContigGraph(const std::string &contigGraphPath) { - if (opt::verbose) std::cerr << "Loading contig graph from `" << contigGraphPath << "'...\n"; - std::ifstream fin(contigGraphPath); - assert_good(fin, contigGraphPath); - fin >> g_contigGraph; - assert(fin.eof()); - g_contigNames.lock(); - if (opt::verbose) { - std::cerr << "Contig graph loaded.\n"; - printGraphStats(std::cerr, g_contigGraph); - } +void +loadContigGraph(const std::string& contigGraphPath) +{ + if (opt::verbose) + std::cerr << "Loading contig graph from `" << contigGraphPath << "'...\n"; + std::ifstream fin(contigGraphPath); + assert_good(fin, contigGraphPath); + fin >> g_contigGraph; + assert(fin.eof()); + g_contigNames.lock(); + if (opt::verbose) { + std::cerr << "Contig graph loaded.\n"; + printGraphStats(std::cerr, g_contigGraph); + } } -void loadContigs(const std::string &contigsPath) { - if (opt::verbose) std::cerr << "Loading contigs from `" << contigsPath << "'...\n"; - FastaReader in(contigsPath.c_str(), FastaReader::NO_FOLD_CASE); - for (FastaRecord rec; in >> rec;) { - if (g_contigNames.count(rec.id) == 0) continue; - assert(g_contigSequences.size() / 2 == get(g_contigNames, rec.id)); - g_contigComments.push_back(rec.comment); - g_contigSequences.push_back(rec.seq); - g_contigSequences.push_back(reverseComplement(rec.seq)); - } - assert(in.eof()); - assert(!g_contigSequences.empty()); - opt::colourSpace = isdigit(g_contigSequences.front()[0]); - if (opt::verbose) { - std::cerr << "Contigs loaded.\n"; - } +void +loadContigs(const std::string& contigsPath) +{ + if (opt::verbose) + std::cerr << "Loading contigs from `" << contigsPath << "'...\n"; + FastaReader in(contigsPath.c_str(), FastaReader::NO_FOLD_CASE); + for (FastaRecord rec; in >> rec;) { + if (g_contigNames.count(rec.id) == 0) + continue; + assert(g_contigSequences.size() / 2 == get(g_contigNames, rec.id)); + g_contigComments.push_back(rec.comment); + g_contigSequences.push_back(rec.seq); + g_contigSequences.push_back(reverseComplement(rec.seq)); + } + assert(in.eof()); + assert(!g_contigSequences.empty()); + opt::colourSpace = isdigit(g_contigSequences.front()[0]); + if (opt::verbose) { + std::cerr << "Contigs loaded.\n"; + } } -void storeContigGraph(const std::string &contigGraphPath, const std::string &program, const std::string& commandLine) { - if (opt::verbose) { std::cerr << "Storing contig graph to `" << contigGraphPath << "'...\n"; } - std::ofstream fout(contigGraphPath.c_str()); - assert_good(fout, contigGraphPath); - write_graph(fout, g_contigGraph, program, commandLine); - assert_good(fout, contigGraphPath); - if (opt::verbose) { std::cerr << "Contig graph stored.\n"; } +void +storeContigGraph( + const std::string& contigGraphPath, + const std::string& program, + const std::string& commandLine) +{ + if (opt::verbose) { + std::cerr << "Storing contig graph to `" << contigGraphPath << "'...\n"; + } + std::ofstream fout(contigGraphPath.c_str()); + assert_good(fout, contigGraphPath); + write_graph(fout, g_contigGraph, program, commandLine); + assert_good(fout, contigGraphPath); + if (opt::verbose) { + std::cerr << "Contig graph stored.\n"; + } } -void storeContigs(const std::string &contigsPath) { - if (opt::verbose) { std::cerr << "Storing contigs to `" << contigsPath << "'...\n"; } - std::ofstream fout(contigsPath.c_str()); - assert_good(fout, contigsPath); - - Graph::vertex_iterator vertexStart, vertexEnd; - boost::tie(vertexStart, vertexEnd) = vertices(g_contigGraph); - for (auto it = vertexStart; it != vertexEnd; ++it) { - auto node = *it; - if (!get(vertex_removed, g_contigGraph, node) && !node.sense()) { - FastaRecord rec; - - std::stringstream ss; - ss << get(vertex_name, g_contigGraph, node); - std::string name = ss.str(); - - rec.id = name.substr(0, name.size() - 1); - rec.comment = getContigComment(node); - rec.seq = getContigSequence(node); - - fout << rec; - } - } - assert_good(fout, contigsPath); - if (opt::verbose) { std::cerr << "Contigs stored.\n"; } +void +storeContigs(const std::string& contigsPath) +{ + if (opt::verbose) { + std::cerr << "Storing contigs to `" << contigsPath << "'...\n"; + } + std::ofstream fout(contigsPath.c_str()); + assert_good(fout, contigsPath); + + Graph::vertex_iterator vertexStart, vertexEnd; + boost::tie(vertexStart, vertexEnd) = vertices(g_contigGraph); + for (auto it = vertexStart; it != vertexEnd; ++it) { + auto node = *it; + if (!get(vertex_removed, g_contigGraph, node) && !node.sense()) { + FastaRecord rec; + + std::stringstream ss; + ss << get(vertex_name, g_contigGraph, node); + std::string name = ss.str(); + + rec.id = name.substr(0, name.size() - 1); + rec.comment = getContigComment(node); + rec.seq = getContigSequence(node); + + fout << rec; + } + } + assert_good(fout, contigsPath); + if (opt::verbose) { + std::cerr << "Contigs stored.\n"; + } } -unsigned num_vertices_removed(const Graph& graph) { - unsigned removed = 0; - Graph::vertex_iterator vertexStart, vertexEnd; - boost::tie(vertexStart, vertexEnd) = vertices(graph); - for (auto it = vertexStart; it != vertexEnd; ++it) { - auto node = *it; - if (get(vertex_removed, graph, node)) { - removed++; - } - } - return removed; +unsigned +numVerticesRemoved(const Graph& graph) +{ + unsigned removed = 0; + Graph::vertex_iterator vertexStart, vertexEnd; + boost::tie(vertexStart, vertexEnd) = vertices(graph); + for (auto it = vertexStart; it != vertexEnd; ++it) { + auto node = *it; + if (get(vertex_removed, graph, node)) { + removed++; + } + } + return removed; } -void assembleContigs() { - if (opt::verbose) { - std::cerr << "Assembling contigs... "; - } +void +assembleContigs() +{ + if (opt::verbose) { + std::cerr << "Assembling contigs... "; + } - std::vector pathIDs; - ContigPaths paths; + std::vector pathIDs; + ContigPaths paths; - Graph newContigGraph(g_contigGraph); - assemble(newContigGraph, back_inserter(paths)); - assert(num_vertices(newContigGraph) >= num_vertices(g_contigGraph)); + Graph newContigGraph(g_contigGraph); + assemble(newContigGraph, back_inserter(paths)); + assert(num_vertices(newContigGraph) >= num_vertices(g_contigGraph)); - // Record all the contigs that are in a path. + // Record all the contigs that are in a path. std::vector seen(g_contigGraph.num_vertices() / 2); - g_contigNames.unlock(); - int counter = 0; - for (const auto &path : paths) { - assert(!path.empty()); - ContigNode pathNode(g_contigGraph.num_vertices() / 2 + counter, path[0].sense()); - std::string name = createContigName(); - pathIDs.push_back(name); - put(vertex_name, newContigGraph, pathNode, name); - for (const auto &node : path) { - seen[node.id()] = true; - } - counter++; - } - g_contigNames.lock(); - - for (const auto &path : paths) { + g_contigNames.unlock(); + int counter = 0; + for (const auto& path : paths) { + assert(!path.empty()); + ContigNode pathNode(g_contigGraph.num_vertices() / 2 + counter, path[0].sense()); + std::string name = createContigName(); + pathIDs.push_back(name); + put(vertex_name, newContigGraph, pathNode, name); + for (const auto& node : path) { + seen[node.id()] = true; + } + counter++; + } + g_contigNames.lock(); + + for (const auto& path : paths) { assert(!path.empty()); - Sequence sequence = getPathSequence(path); - unsigned coverage = 0; - for (const auto &node : path) { - if (!node.ambiguous()) { - coverage += g_contigGraph[node].coverage; - } - } - - std::stringstream ss; - ss << sequence.size() << ' ' << coverage << ' '; - ss << get(vertex_name, g_contigGraph, path.front()); - if (path.size() == 1) - return; - else if (path.size() == 3) - ss << ',' << get(vertex_name, g_contigGraph, path[1]); - else if (path.size() > 3) - ss << ",..."; - ss << ',' << get(vertex_name, g_contigGraph, path.back()); - - g_contigSequences.push_back(sequence); - g_contigSequences.push_back(reverseComplement(sequence)); - g_contigComments.push_back(ss.str()); + Sequence sequence = getPathSequence(path); + unsigned coverage = 0; + for (const auto& node : path) { + if (!node.ambiguous()) { + coverage += g_contigGraph[node].coverage; + } + } + + std::stringstream ss; + ss << sequence.size() << ' ' << coverage << ' '; + ss << get(vertex_name, g_contigGraph, path.front()); + if (path.size() == 1) + return; + else if (path.size() == 3) + ss << ',' << get(vertex_name, g_contigGraph, path[1]); + else if (path.size() > 3) + ss << ",..."; + ss << ',' << get(vertex_name, g_contigGraph, path.back()); + + g_contigSequences.push_back(sequence); + g_contigSequences.push_back(reverseComplement(sequence)); + g_contigComments.push_back(ss.str()); } - - g_contigGraph.swap(newContigGraph); - if (opt::verbose) { - std::cerr << "Done!\n"; - } + g_contigGraph.swap(newContigGraph); + + if (opt::verbose) { + std::cerr << "Done!\n"; + } } diff --git a/RResolver/Contigs.h b/RResolver/Contigs.h index 712e6f915..d22496ced 100644 --- a/RResolver/Contigs.h +++ b/RResolver/Contigs.h @@ -1,15 +1,15 @@ #ifndef RRESOLVER_CONTIGS_H #define RRESOLVER_CONTIGS_H 1 -#include "Common/Options.h" -#include "Common/StringUtil.h" -#include "Common/IOUtil.h" -#include "Common/Sequence.h" #include "Common/ConstString.h" -#include "Common/ContigPath.h" #include "Common/ContigNode.h" +#include "Common/ContigPath.h" #include "Common/ContigProperties.h" #include "Common/Histogram.h" +#include "Common/IOUtil.h" +#include "Common/Options.h" +#include "Common/Sequence.h" +#include "Common/StringUtil.h" #include "DataLayer/FastaReader.h" #include "Graph/ContigGraph.h" #include "Graph/ContigGraphAlgorithms.h" @@ -17,60 +17,77 @@ #include "Graph/GraphIO.h" #include "Graph/GraphUtil.h" +#include #include #include -#include -typedef ContigGraph > Graph; +typedef ContigGraph> Graph; typedef Graph::vertex_descriptor vertex_descriptor; typedef Graph::edge_descriptor edge_descriptor; typedef Graph::adjacency_iterator adjacency_iterator; typedef Graph::in_edge_iterator in_edge_iterator; typedef Graph::out_edge_iterator out_edge_iterator; -class ContigSequence : public const_string { - +class ContigSequence : public const_string +{ public: + ContigSequence(const std::string& s) + : const_string(s) + { + strSize = const_string::size(); + } - ContigSequence(const std::string& s): const_string(s) { - strSize = const_string::size(); - } - - size_t size() const { return strSize; } + size_t size() const { return strSize; } private: - - size_t strSize; - + size_t strSize; }; typedef std::vector> ContigSequencesInfo; typedef std::vector ContigSequences; typedef std::vector ContigPath; typedef std::vector ContigPaths; -typedef std::vector> ImaginaryContigPath; // Each element is a pair of contig and distance to the next +typedef std::vector> + ImaginaryContigPath; // Each element is a pair of contig and distance to the next typedef std::set ImaginaryContigPaths; extern Graph g_contigGraph; extern std::vector g_contigComments; extern ContigSequences g_contigSequences; -int distanceBetween(const ContigNode& node1, const ContigNode& node2); -const ContigSequence& getContigSequence(const ContigNode &node); -int getContigSize(const ContigNode& node); -const std::string& getContigComment(const ContigNode &node); -Sequence getPathSequence(const ContigPath &path); -Sequence getPathSequence(const ImaginaryContigPath &path); -long getContigNumOfKmers(const ContigNode& node); -double getContigBaseCoverage(const ContigNode& node); - -void loadContigGraph(const std::string &contigGraphPath); -void loadContigs(const std::string &contigsPath); -void storeContigGraph(const std::string &contigGraphPath, const std::string &program, const std::string& commandLine); -void storeContigs(const std::string &contigsPath); - -unsigned num_vertices_removed(const Graph& contigGraph); - -void assembleContigs(); +int +distanceBetween(const ContigNode& node1, const ContigNode& node2); +const ContigSequence& +getContigSequence(const ContigNode& node); +int +getContigSize(const ContigNode& node); +const std::string& +getContigComment(const ContigNode& node); +Sequence +getPathSequence(const ContigPath& path); +Sequence +getPathSequence(const ImaginaryContigPath& path); +long +getContigNumOfKmers(const ContigNode& node); +double +getContigBaseCoverage(const ContigNode& node); + +void +loadContigGraph(const std::string& contigGraphPath); +void +loadContigs(const std::string& contigsPath); +void +storeContigGraph( + const std::string& contigGraphPath, + const std::string& program, + const std::string& commandLine); +void +storeContigs(const std::string& contigsPath); + +unsigned +numVerticesRemoved(const Graph& contigGraph); + +void +assembleContigs(); #endif diff --git a/RResolver/Makefile.am b/RResolver/Makefile.am index a5758aa9d..3ef6e32e0 100644 --- a/RResolver/Makefile.am +++ b/RResolver/Makefile.am @@ -1,19 +1,35 @@ bin_PROGRAMS = abyss-rresolver-short -abyss_rresolver_short_CPPFLAGS = -I$(top_srcdir) \ - -I$(top_srcdir)/Common \ - -I$(top_srcdir)/DataLayer +abyss_rresolver_short_CXXFLAGS = \ + $(AM_CXXFLAGS) $(OPENMP_CXXFLAGS) \ + -I$(top_srcdir) -I$(top_srcdir)/Common -I$(top_srcdir)/DataLayer -I$(top_srcdir)/RResolver/btllib/include -abyss_rresolver_short_CXXFLAGS = $(AM_CXXFLAGS) $(OPENMP_CXXFLAGS) +abyss_rresolver_short_SOURCES = \ + RResolverShort.cpp \ + Contigs.cpp Contigs.h + +LDFLAGS += -pthread abyss_rresolver_short_LDADD = \ + $(top_builddir)/DataLayer/libdatalayer.a \ + $(top_builddir)/Common/libcommon.a \ + libralgorithmsshort.a + +noinst_LIBRARIES = libralgorithmsshort.a + +libralgorithmsshort_a_CXXFLAGS = \ + $(AM_CXXFLAGS) $(OPENMP_CXXFLAGS) \ + -I$(top_srcdir) -I$(top_srcdir)/Common -I$(top_srcdir)/DataLayer -I$(top_srcdir)/RResolver/btllib/include + +libralgorithmsshort_a_SOURCES = \ + RAlgorithmsShort.cpp RAlgorithmsShort.h \ + BloomFilters.cpp BloomFilters.h \ + Contigs.cpp Contigs.h \ + SequenceTree.cpp SequenceTree.h \ + RUtils.cpp RUtils.h + +libralgorithmsshort_a_LIBADD = \ $(top_builddir)/DataLayer/libdatalayer.a \ $(top_builddir)/Common/libcommon.a -abyss_rresolver_short_SOURCES = \ - RAlgorithmsShort.cpp \ - BloomFilters.cpp \ - Contigs.cpp \ - SequenceTree.cpp \ - RUtils.cpp \ - RResolverShort.cpp +EXTRA_DIST = btllib diff --git a/RResolver/RAlgorithmsShort.cpp b/RResolver/RAlgorithmsShort.cpp index accca3220..c01f91e91 100644 --- a/RResolver/RAlgorithmsShort.cpp +++ b/RResolver/RAlgorithmsShort.cpp @@ -1,1029 +1,1245 @@ #include "RAlgorithmsShort.h" -#include "vendor/nthash/stHashIterator.hpp" -#include "vendor/nthash/ntHashIterator.hpp" +#include "RUtils.h" +#include "SequenceTree.h" #include "btllib/include/btllib/seq_reader.hpp" +#if _OPENMP #include -#include +#endif + +#include #include +#include +#include #include #include -#include -#include #include -#include - -static unsigned char BASES[] = { 'A', 'C', 'T', 'G' }; +#include -typedef std::map> SupportMap; -typedef std::map RepeatSupportMap; +template +constexpr std::size_t countof(T(&)[N]) { return N; } -long ReadBatch::readsSampleSize = 0; -std::vector ReadBatch::batches; -ReadBatch ReadBatch::current(0); +typedef std::map> SupportMap; +typedef std::map RepeatSupportMap; -class FractionHistogram : public Histogram { +long ReadSize::readsSampleSize = 0; +std::vector ReadSize::readSizes; +ReadSize ReadSize::current(0); -public: +class FractionHistogram : public Histogram +{ - void insert(double fraction) { - assert(fraction >= 0); - assert(fraction <= 1); - Histogram::insert(int(fraction * 100)); - } + public: + void insert(double fraction) + { + assert(fraction >= 0); + assert(fraction <= 1); + Histogram::insert(int(fraction * 100)); + } - friend std::ostream& operator<<(std::ostream& o, const FractionHistogram& h) + friend std::ostream& operator<<(std::ostream& o, const FractionHistogram& h) { o << (Histogram&)h; - if (h.size() == 0 || (--h.end())->first != 100) { - o << 100 << "\t0\n"; - } + if (h.size() == 0 || (--h.end())->first != 100) { + o << 100 << "\t0\n"; + } return o; } - }; -class Resolution { - -public: - - Resolution(const ReadBatch& batch, int r): batch(batch), r(r) {} - - RepeatSupportMap repeatSupportMap; - const ReadBatch& batch; - int r; - Histogram findsHistogram; - FractionHistogram fractionFindsHistogram; - Histogram calculatedTestsHistogram; - bool failed = false; +class Resolution +{ + public: + Resolution(const ReadSize& batch, int r) + : batch(batch) + , r(r) + {} + + RepeatSupportMap repeatSupportMap; + const ReadSize& batch; + int r; + Histogram findsHistogram; + FractionHistogram fractionFindsHistogram; + Histogram calculatedTestsHistogram; + bool failed = false; }; -static int getMinWindowLength(const int tests, const int repeatSize, const int minMargin) { - return tests - 1 + minMargin + repeatSize + minMargin; +static int +getMinWindowLength(const int tests, const int repeatSize, const int minMargin) +{ + return tests - 1 + minMargin + repeatSize + minMargin; } -static bool windowLongEnough(const int windowSize, const int tests, const int repeatSize, const int minMargin) { - return windowSize >= getMinWindowLength(tests, repeatSize, minMargin); +static bool +windowLongEnough(const int windowSize, const int tests, const int repeatSize, const int minMargin) +{ + return windowSize >= getMinWindowLength(tests, repeatSize, minMargin); } -//static int numOfTests(const int repeatSize, const int windowSize, const int minMargin) { +// static int numOfTests(const int repeatSize, const int windowSize, const int minMargin) { // return windowSize - minMargin - repeatSize - minMargin + 1; //} -static int getMargin(const int windowSize, const int tests, const int repeatSize, const int minMargin) { - assert(windowLongEnough(windowSize, tests, repeatSize, minMargin)); - const int requiredSeqSize = windowSize + tests - 1; - const int margin = (requiredSeqSize - repeatSize + 1) / 2; - assert(margin >= minMargin); - return margin; +static int +getMargin(const int windowSize, const int tests, const int repeatSize, const int minMargin) +{ + assert(windowLongEnough(windowSize, tests, repeatSize, minMargin)); + const int requiredSeqSize = windowSize + tests - 1; + const int margin = (requiredSeqSize - repeatSize + 1) / 2; + assert(margin >= minMargin); + return margin; } -static bool determineShortReadStats(const std::vector& readFilenames) { - if (opt::verbose) { - std::cerr << "Determining read stats..." << std::endl; - } - ReadBatch::batches.clear(); - #pragma omp parallel - #pragma omp single - { - for (const auto filename : readFilenames) - #pragma omp task firstprivate(filename) - { - Histogram hist; - std::map qualThresholdPositionsHists; - - btllib::SeqReader reader(filename); - for (btllib::SeqReader::Record record; (record = reader.read()) && (record.num < READ_STATS_SAMPLE_SIZE);) { - if (record.seq.size() > MAX_READ_SIZE) { continue; } - hist.insert(record.seq.size()); - for (int j = record.qual.size() - 1; j >= 0; j--) { - if (record.qual[j] >= RMER_QUALITY_THRESHOLD) { - qualThresholdPositionsHists[record.seq.size()].insert(j); - break; - } - } - } - - #pragma omp critical (ReadBatches) - { - for (const auto& i : hist) { - ReadBatch* batch = nullptr; - bool found = false; - for (auto& b : ReadBatch::batches) { - if (b.size == i.first) { - found = true; - batch = &b; - break; - } - } - if (!found) { - ReadBatch::batches.push_back(ReadBatch(i.first)); - batch = &(ReadBatch::batches.back()); - } - batch->sampleCount += i.second; - auto& qualHist = batch->qualThresholdPositions; - for (const auto& q: qualThresholdPositionsHists[i.first]) { - for (size_t n = 0; n < q.second; n++) { - qualHist.insert(q.first); - } - } - } - } - } - #pragma omp taskwait - } - - ReadBatch::readsSampleSize = 0; - for (const auto& batch : ReadBatch::batches) { - ReadBatch::readsSampleSize += batch.sampleCount; - } - - std::sort(ReadBatch::batches.begin(), ReadBatch::batches.end(), [] (ReadBatch a, ReadBatch b) { return a.sampleCount > b.sampleCount; }); - - if (ReadBatch::batches.size() == 0) { - std::cerr << "Insufficient number of short reads. Finishing..." << std::endl; - return false; - } - - if (ReadBatch::batches[0].getFractionOfTotal() < READ_BATCH_FRACTION_THRESHOLD) { - std::cerr << "Insufficient reads of same size. Finishing..." << std::endl; - return false; - } - - std::vector batchesFiltered; - for (const auto& b: ReadBatch::batches) { - if (b.getFractionOfTotal() >= READ_BATCH_FRACTION_THRESHOLD) { - batchesFiltered.push_back(b); - } - } - ReadBatch::batches = batchesFiltered; - - std::sort(ReadBatch::batches.begin(), ReadBatch::batches.end(), [] (ReadBatch a, ReadBatch b) { return a.size < b.size; }); - if (opt::verbose) { - std::cerr << "Read lengths determined to be: " << std::fixed; - std::cerr << ReadBatch::batches[0].size << " (" << (ReadBatch::batches[0].getFractionOfTotal() * 100.0) << "%)"; - for (size_t i = 1; i < ReadBatch::batches.size(); i++) { - std::cerr << ", " << ReadBatch::batches[i].size << " (" << (ReadBatch::batches[i].getFractionOfTotal() * 100.0) << "%)"; - } - std::cerr << std::defaultfloat << std::endl; - } - - for (size_t i = 0; i < ReadBatch::batches.size(); i++) { - auto& batch = ReadBatch::batches[i]; - int r = batch.qualThresholdPositions.median() - opt::threshold + 1; - int prevR = opt::k; - if (i > 0) { - prevR = ReadBatch::batches[i - 1].rValues.back(); - } - int steps = 0; - while ((r - prevR > R_VALUES_STEP) && (steps < R_STEPS_MAX)) { - if (r - opt::k > R_MAX_K_DIFF) { r = opt::k + R_MAX_K_DIFF; } - batch.rValues.push_back(r); - r -= R_VALUES_STEP; - steps++; - } - std::reverse(batch.rValues.begin(), batch.rValues.end()); - } - - if (opt::verbose) { - std::cerr << "Using r values: "; - for (size_t i = 0, j = 0; i < ReadBatch::batches.size(); i++) { - j = 0; - for (auto r : ReadBatch::batches[i].rValues) { - std::cerr << r << " (" << ReadBatch::batches[i].size << + ")"; - if ((i < ReadBatch::batches.size() - 1) || (j < ReadBatch::batches[i].rValues.size() - 1)) { std::cerr << ", "; } - j++; - } - } - std::cerr << '\n'; - } - - return true; -} +static bool +determineShortReadStats(const std::vector& readFilenames) +{ + if (opt::verbose) { + std::cerr << "Determining read stats..." << std::endl; + } + ReadSize::readSizes.clear(); +#pragma omp parallel +#pragma omp single + { +#if _OPENMP + for (const auto& f : readFilenames) { + const auto filename = + f; // Clang complains that range based loop is copying without this +#else + for (const auto& filename : readFilenames) { +#endif +#pragma omp task firstprivate(filename) + { + Histogram hist; + std::map qualThresholdPositionsHists; + + btllib::SeqReader reader(filename); + for (btllib::SeqReader::Record record; + (record = reader.read()) && (record.num < READ_STATS_SAMPLE_SIZE);) { + if (record.seq.size() > MAX_READ_SIZE) { + continue; + } + hist.insert(record.seq.size()); + for (int j = record.qual.size() - 1; j >= 0; j--) { + if (record.qual[j] >= opt::readQualityThreshold) { + qualThresholdPositionsHists[record.seq.size()].insert(j); + break; + } + } + } + +#pragma omp critical(ReadSizes) + { + for (const auto& i : hist) { + ReadSize* batch = nullptr; + bool found = false; + for (auto& b : ReadSize::readSizes) { + if (b.size == i.first) { + found = true; + batch = &b; + break; + } + } + if (!found) { + ReadSize::readSizes.push_back(ReadSize(i.first)); + batch = &(ReadSize::readSizes.back()); + } + batch->sampleCount += i.second; + auto& qualHist = batch->qualThresholdPositions; + for (const auto& q : qualThresholdPositionsHists[i.first]) { + for (size_t n = 0; n < q.second; n++) { + qualHist.insert(q.first); + } + } + } + } + } + } +#pragma omp taskwait + } + + ReadSize::readsSampleSize = 0; + for (const auto& batch : ReadSize::readSizes) { + ReadSize::readsSampleSize += batch.sampleCount; + } + + std::sort(ReadSize::readSizes.begin(), ReadSize::readSizes.end(), [](ReadSize a, ReadSize b) { + return a.sampleCount > b.sampleCount; + }); -static Support testSequence(const Sequence& sequence) { - int found = 0; - int tests = 0; - unsigned r = g_vanillaBloom->getKmerSize(); - if (sequence.size() >= r) { - tests = sequence.size() - r + 1; - - int offset = 0; - - if (opt::error_correction) { - stHashIterator it(sequence, g_spacedSeedsBloom->getSpacedSeeds(), SPACED_SEEDS_COUNT, SPACED_SEEDS_HASH_PER_SEED, r); - for (; it != stHashIterator::end(); ++it, ++offset) - { - auto hitSeeds = g_spacedSeedsBloom->getHitSeeds(*it); - if (hitSeeds.size() > 0) { - it.snp({}, {}, g_vanillaBloom->getHashNum()); - if (g_vanillaBloom->contains(*it)) { - found++; - } else { - bool success = false; - for (auto hitSeed : hitSeeds) { - for (auto seedIt = (hitSeed.begin() + std::round(hitSeed.size() * (1.00 - SPACED_SEEDS_SNP_FRACTION))); seedIt != hitSeed.end(); ++seedIt) { - const auto pos = *seedIt; - for (auto base : BASES) { - if (base == (unsigned char)(sequence[offset + pos])) { continue; } - it.snp({ pos }, { base }, g_vanillaBloom->getHashNum()); - if (g_vanillaBloom->contains(*it)) { - success = true; - found++; - break; - } - } - if (success) { break; } - } - if (success) { break; } - } - } - } - } - } else { - for (ntHashIterator it(sequence, HASH_NUM, r); it != ntHashIterator::end(); ++it, ++offset) - { - if (g_vanillaBloom->contains(*it)) { - found++; - } - } - } - } - return Support(found, tests); + if (ReadSize::readSizes.size() == 0) { + std::cerr << "Insufficient number of short reads. Finishing..." << std::endl; + return false; + } + + if (ReadSize::readSizes[0].getFractionOfTotal() < READ_BATCH_FRACTION_THRESHOLD) { + std::cerr << "Insufficient reads of same size. Finishing..." << std::endl; + return false; + } + + std::vector readSizesFiltered; + for (const auto& b : ReadSize::readSizes) { + if (b.getFractionOfTotal() >= READ_BATCH_FRACTION_THRESHOLD) { + readSizesFiltered.push_back(b); + } + } + ReadSize::readSizes = readSizesFiltered; + + std::sort(ReadSize::readSizes.begin(), ReadSize::readSizes.end(), [](ReadSize a, ReadSize b) { + return a.size < b.size; + }); + if (opt::verbose) { + std::cerr << "Read lengths determined to be: " << std::fixed; + std::cerr << ReadSize::readSizes[0].size << " (" + << (ReadSize::readSizes[0].getFractionOfTotal() * 100.0) << "%)"; + for (size_t i = 1; i < ReadSize::readSizes.size(); i++) { + std::cerr << ", " << ReadSize::readSizes[i].size << " (" + << (ReadSize::readSizes[i].getFractionOfTotal() * 100.0) << "%)"; + } + std::cerr << std::defaultfloat << std::endl; + } + + std::sort(opt::rValues.begin(), opt::rValues.end()); + for (size_t i = 0; i < ReadSize::readSizes.size(); i++) { + auto& batch = ReadSize::readSizes[i]; + if (opt::rValues.size() > 0) { + if (int(i) >= int(opt::rValues.size())) { continue; } + const int r = opt::rValues[i - (ReadSize::readSizes.size() - opt::rValues.size())]; + if (r <= int(opt::k)) { + std::cerr << "r size (" << r << ") must be larger than assembly k (" << opt::k << ")." << std::endl; + std::exit(-1); + } + if (r > batch.size - opt::extract + 1) { + std::cerr << "r size (" << r << ") must be smaller than or equal to read size - extract + 1 (" << batch.size - opt::extract + 1 << ")." << std::endl; + std::exit(-1); + } + batch.rValues.push_back(r); + } else { + const int r = std::min({ int(opt::k + R_HEURISTIC), int(batch.size * R_HEURISTIC_A + R_HEURISTIC_B), int(batch.size - opt::extract + 1) }); + if (r > int(opt::k)) { + batch.rValues.push_back(r); + } + } + } + + if (opt::verbose) { + std::cerr << "Using r values: "; + for (size_t i = 0, j = 0; i < ReadSize::readSizes.size(); i++) { + j = 0; + for (auto r : ReadSize::readSizes[i].rValues) { + std::cerr << r << " (" << ReadSize::readSizes[i].size << + ")"; + if ((i < ReadSize::readSizes.size() - 1) || + (j < ReadSize::readSizes[i].rValues.size() - 1)) { + std::cerr << ", "; + } + j++; + } + } + std::cerr << '\n'; + } + + std::sort(opt::covApproxFactors.begin(), opt::covApproxFactors.end()); + for (size_t i = 0; i < ReadSize::readSizes.size(); i++) { + auto& batch = ReadSize::readSizes[i]; + if (int(i) < int(opt::covApproxFactors.size())) { + batch.covApproxFactor = opt::covApproxFactors[i]; + } + } + if (opt::verbose) { + std::cerr << "Using coverage approximation factors: "; + for (size_t i = 0; i < ReadSize::readSizes.size(); i++) { + std::cerr << ReadSize::readSizes[i].covApproxFactor << " (" << ReadSize::readSizes[i].size << + ")"; + if (i < ReadSize::readSizes.size() - 1) { + std::cerr << ", "; + } + } + std::cerr << '\n'; + } + + return true; } -static Support testCombination(const std::string &head, const std::string &repeat, const std::string &tail, const int requestedTests) +static Support +testSequence(const Sequence& sequence) { - const auto windowSize = g_vanillaBloom->getKmerSize(); - - auto plannedTests = requestedTests; - if (plannedTests < opt::min_tests) { plannedTests = opt::min_tests; } - if (plannedTests > opt::min_tests + MAX_TESTS_OFFSET) { return Support(); } - - const auto margin = getMargin(windowSize, plannedTests, repeat.size(), MIN_MARGIN); - - int possibleTests = head.size() + repeat.size() + tail.size() - windowSize + 1; - if (possibleTests < plannedTests || long(head.size()) < margin || long(tail.size()) < margin) { - return Support(); - } - - Sequence sequence; - if (possibleTests > plannedTests + 1) { - assert(long(head.size()) > margin || long(tail.size()) > margin); - sequence = head.substr(head.size() - margin) + repeat + tail.substr(0, margin); - } else { - sequence = head + repeat + tail; - } - possibleTests = sequence.size() - windowSize + 1; - - assert(plannedTests <= possibleTests); - assert(possibleTests <= plannedTests + 1); - assert(int(sequence.size()) >= MIN_MARGIN + int(repeat.size()) + MIN_MARGIN); - assert(int(sequence.size()) < int(windowSize) * 2); - - return testSequence(sequence); + static unsigned char BASES[] = { 'A', 'C', 'T', 'G' }; + int found = 0; + int tests = 0; + unsigned r = g_vanillaBloom->get_k(); + if (sequence.size() >= r) { + tests = sequence.size() - r + 1; + int offset = 0; + if (opt::errorCorrection) { + btllib::NtHash nthash(sequence, r, g_vanillaBloom->get_hash_num()); + for (const auto hitSeeds : g_spacedSeedsBloom->contains(sequence)) { + nthash.roll(); + if (hitSeeds.size() > 0) { + nthash.sub({}, {}); + if (g_vanillaBloom->contains(nthash.hashes())) { + found++; + } else { + bool success = false; + for (const auto hitSeed : hitSeeds) { + const auto seed = g_spacedSeedsBloom->get_parsed_seeds()[hitSeed]; + for (auto seedIt = + (seed.begin() + + std::round( + seed.size() * (1.00 - SPACED_SEEDS_SNP_FRACTION))); + seedIt != seed.end(); + ++seedIt) { + const auto pos = *seedIt; + for (auto base : BASES) { + if (base == (unsigned char)(sequence[offset + pos])) { + continue; + } + nthash.sub({ pos }, { base }); + if (g_vanillaBloom->contains(nthash.hashes())) { + success = true; + found++; + break; + } + } + if (success) { + break; + } + } + if (success) { + break; + } + } + } + } + offset++; + } + } else { + found = g_vanillaBloom->contains(sequence); + } + } + return Support(found, tests); } -static double expectedSpacingBetweenReads(const ContigPath& path) { - const long pathLength = 100000; - const double leftContigBaseCoverage = getContigBaseCoverage(path[0]); - const double rightContigBaseCoverage = getContigBaseCoverage(path[path.size() - 1]); - const double pathBaseCoverage = std::min(leftContigBaseCoverage, rightContigBaseCoverage); - const double pathBases = pathBaseCoverage * (pathLength - opt::k + 1); +static Support +testCombination( + const std::string& head, + const std::string& repeat, + const std::string& tail, + const int requestedTests) +{ + const auto windowSize = g_vanillaBloom->get_k(); + + auto plannedTests = requestedTests; + if (plannedTests < opt::minTests) { + plannedTests = opt::minTests; + } + + int possibleTests = head.size() + repeat.size() + tail.size() - windowSize + 1; + if (possibleTests < plannedTests) { + return Support(Support::UnknownReason::POSSIBLE_TESTS_LT_PLANNED); + } + + if (plannedTests > opt::maxTests) { + return Support(Support::UnknownReason::OVER_MAX_TESTS); + } + + const auto margin = getMargin(windowSize, plannedTests, repeat.size(), MIN_MARGIN); + + if (long(head.size()) < margin) { + return Support(Support::UnknownReason::HEAD_SHORTER_THAN_MARGIN); + } + + if (long(tail.size()) < margin) { + return Support(Support::UnknownReason::TAIL_SHORTER_THAN_MARGIN); + } - double meanReadKmerContribution = 0; - for (const auto& batch : ReadBatch::batches) { - meanReadKmerContribution += batch.getFractionOfTotal() * (batch.size - opt::k + 1); - } - const double baseContributionRatio = ReadBatch::current.getFractionOfTotal() * (ReadBatch::current.size - opt::k + 1) / meanReadKmerContribution; + Sequence sequence; + if (possibleTests > plannedTests + 1) { + assert(long(head.size()) > margin || long(tail.size()) > margin); + sequence = head.substr(head.size() - margin) + repeat + tail.substr(0, margin); + } else { + sequence = head + repeat + tail; + } + possibleTests = sequence.size() - windowSize + 1; - const double approxNumOfReads = double(pathBases * baseContributionRatio) / double(opt::k * (ReadBatch::current.size - opt::k + 1)); - assert(approxNumOfReads > 2); + assert(plannedTests <= possibleTests); + assert(possibleTests <= plannedTests + 1); + assert(int(sequence.size()) >= MIN_MARGIN + int(repeat.size()) + MIN_MARGIN); + assert(int(sequence.size()) < int(windowSize) * 2); - return double(pathLength - ReadBatch::current.size) / double(approxNumOfReads - 1); + return testSequence(sequence); } -static Support determinePathSupport(const ContigPath& path) +static double +expectedSpacingBetweenReads(const ContigPath& path) { - assert(path.size() >= 3); - const Sequence repeat = getPathSequence(ContigPath(path.begin() + 1, path.end() - 1)); - const int repeatSize = repeat.size(); - assert(repeatSize >= 2); - - const long calculatedTests = std::round(expectedSpacingBetweenReads(path) * COV_APPROX_FORMULA_FACTOR); - assert(calculatedTests >= 0); - - long requiredTests = calculatedTests; - if (requiredTests < opt::min_tests) { requiredTests = opt::min_tests; } - - const int windowSize = g_vanillaBloom->getKmerSize(); - assert(windowSize >= 4); - - if (!windowLongEnough(windowSize, requiredTests, repeatSize, MIN_MARGIN)) { - return Support(calculatedTests); - } - - const auto &leftContig = path[0]; - const auto &rightContig = path[path.size() - 1]; - assert(windowSize >= MIN_MARGIN + repeatSize + MIN_MARGIN); - - const int leftDistance = distanceBetween(leftContig, path[1]); - const int rightDistance = distanceBetween(path[path.size() - 2], rightContig); - - const int margin = getMargin(windowSize, requiredTests, repeat.size(), MIN_MARGIN); - - const auto heads = getTreeSequences(leftContig, -leftDistance, margin, false, opt::branching); - const auto tails = getTreeSequences(rightContig, -rightDistance, margin, true, opt::branching); - assert(heads.size() > 0); - assert(tails.size() > 0); - - Support maxSupport(calculatedTests); - bool unknown = false; - - const long combinations = heads.size() * tails.size(); - if (combinations > opt::branching * opt::branching) { - unknown = true; - } else { - if (combinations >= PATH_COMBINATIONS_MULTITHREAD_THRESHOLD) { - bool end = false; - for (const auto head : heads) { - #pragma omp critical (maxSupport) - { - if (unknown) { end = true; } - } - if (end) { break; } - - #pragma omp task firstprivate(head) shared(maxSupport, unknown) - { - bool end = false; - for (const auto& tail : tails) { - #pragma omp critical (maxSupport) - { - if (unknown) { end = true; } - } - if (end) { break; } - - auto support = testCombination(head, repeat, tail, requiredTests); - - #pragma omp critical (maxSupport) - { - if (support.unknown()) { - unknown = true; - end = true; - } else if (support > maxSupport) { - maxSupport = support; - } else if (maxSupport.found == 0 && support.tests > maxSupport.tests) { - maxSupport.tests = support.tests; - } - } - if (end) { break; } - } - } - } - } else { - for (const auto& head : heads) { - if (unknown) { break; } - - for (const auto& tail : tails) { - if (unknown) { break; } - - auto support = testCombination(head, repeat, tail, requiredTests); - - if (support.unknown()) { - unknown = true; - break; - } else if (support > maxSupport) { - maxSupport = support; - } else if (maxSupport.found == 0 && support.tests > maxSupport.tests) { - maxSupport.tests = support.tests; - } - } - } - } - } - - if (combinations >= PATH_COMBINATIONS_MULTITHREAD_THRESHOLD) { - #pragma omp taskwait - } - - if (unknown) { - return Support(calculatedTests); - } - - maxSupport.calculatedTests = calculatedTests; - return maxSupport; + const long pathLength = 100000; + const double leftContigBaseCoverage = getContigBaseCoverage(path[0]); + const double rightContigBaseCoverage = getContigBaseCoverage(path[path.size() - 1]); + const double pathBaseCoverage = std::min(leftContigBaseCoverage, rightContigBaseCoverage); + const double pathBases = pathBaseCoverage * (pathLength - opt::k + 1); + + double meanReadKmerContribution = 0; + for (const auto& batch : ReadSize::readSizes) { + meanReadKmerContribution += batch.getFractionOfTotal() * (batch.size - opt::k + 1); + } + const double baseContributionRatio = ReadSize::current.getFractionOfTotal() * + (ReadSize::current.size - opt::k + 1) / + meanReadKmerContribution; + + const double approxNumOfReads = double(pathBases * baseContributionRatio) / + double(opt::k * (ReadSize::current.size - opt::k + 1)); + assert(approxNumOfReads > 2); + + return double(pathLength - ReadSize::current.size) / double(approxNumOfReads - 1); +} + +static Support +determinePathSupport(const ContigPath& path) +{ + assert(path.size() >= 3); + const Sequence repeat = getPathSequence(ContigPath(path.begin() + 1, path.end() - 1)); + const int repeatSize = repeat.size(); + assert(repeatSize >= 2); + + const long calculatedTests = + std::round(expectedSpacingBetweenReads(path) * ReadSize::current.covApproxFactor + opt::threshold); + assert(calculatedTests >= 0); + + long requiredTests = calculatedTests; + if (requiredTests < opt::minTests) { + requiredTests = opt::minTests; + } + if (requiredTests > opt::maxTests) { + return Support(calculatedTests, Support::UnknownReason::OVER_MAX_TESTS); + } + + const int windowSize = g_vanillaBloom->get_k(); + assert(windowSize >= 4); + + if (!windowLongEnough(windowSize, requiredTests, repeatSize, MIN_MARGIN)) { + return Support(calculatedTests, Support::UnknownReason::WINDOW_NOT_LONG_ENOUGH); + } + + const auto& leftContig = path[0]; + const auto& rightContig = path[path.size() - 1]; + assert(windowSize >= MIN_MARGIN + repeatSize + MIN_MARGIN); + + const int leftDistance = distanceBetween(leftContig, path[1]); + const int rightDistance = distanceBetween(path[path.size() - 2], rightContig); + + const int margin = getMargin(windowSize, requiredTests, repeat.size(), MIN_MARGIN); + + auto heads = getTreeSequences(leftContig, -leftDistance, margin, false, 2 * opt::branching); + auto tails = getTreeSequences(rightContig, -rightDistance, margin, true, 2 * opt::branching); + long combinations = heads.size() * tails.size(); + assert(combinations > 0); + if (combinations > opt::branching * opt::branching) { + std::random_shuffle(heads.begin(), heads.end()); + std::random_shuffle(tails.begin(), tails.end()); + if (heads.size() > size_t(opt::branching) && tails.size() > size_t(opt::branching)) { + heads.resize(opt::branching); + tails.resize(opt::branching); + } else if (tails.size() <= size_t(opt::branching)) { + size_t newsize = size_t(opt::branching * opt::branching) / tails.size(); + if (newsize < heads.size()) { heads.resize(newsize); } + } else if (heads.size() <= size_t(opt::branching)) { + size_t newsize = size_t(opt::branching * opt::branching) / heads.size(); + if (newsize < tails.size()) { tails.resize(newsize); } + } else { + assert(false); + } + combinations = heads.size() * tails.size(); + assert(combinations > 0); + } + + for (const auto& head : heads) { + if (long(head.size()) < margin) { + return Support(calculatedTests, Support::UnknownReason::HEAD_SHORTER_THAN_MARGIN); + } + } + for (const auto& tail : tails) { + if (long(tail.size()) < margin) { + return Support(calculatedTests, Support::UnknownReason::TAIL_SHORTER_THAN_MARGIN); + } + } + + Support maxSupport(calculatedTests, Support::UnknownReason::UNDETERMINED); + bool unknown = false; + + if (combinations >= PATH_COMBINATIONS_MULTITHREAD_THRESHOLD) { + bool end = false; +#if _OPENMP + for (const auto& h : heads) { + const auto head = h; +#else + for (const auto& head : heads) { +#endif +#pragma omp critical(maxSupport) + { + if (unknown) { + end = true; + } + } + if (end) { + break; + } + +#pragma omp task firstprivate(head) shared(maxSupport, unknown) + { + bool end = false; + for (const auto& tail : tails) { +#pragma omp critical(maxSupport) + { + if (unknown) { + end = true; + } + } + if (end) { + break; + } + + auto support = testCombination(head, repeat, tail, requiredTests); + +#pragma omp critical(maxSupport) + { + if (support.unknown()) { + unknown = true; + end = true; + maxSupport = support; + } else if (support > maxSupport) { + maxSupport = support; + } else if (maxSupport.found == 0 && support.tests > maxSupport.tests) { + maxSupport.tests = support.tests; + } + } + if (end) { + break; + } + } + } + } + } else { + for (const auto& head : heads) { + if (unknown) { + break; + } + + for (const auto& tail : tails) { + if (unknown) { + break; + } + + auto support = testCombination(head, repeat, tail, requiredTests); + + if (support.unknown()) { + unknown = true; + maxSupport = support; + break; + } else if (support > maxSupport) { + maxSupport = support; + } else if (maxSupport.found == 0 && support.tests > maxSupport.tests) { + maxSupport.tests = support.tests; + } + } + } + } + + if (combinations >= PATH_COMBINATIONS_MULTITHREAD_THRESHOLD) { +#pragma omp taskwait + } + + maxSupport.calculatedTests = calculatedTests; + return maxSupport; } static SupportMap -buildRepeatSupportMap(const ContigNode &repeat) +buildRepeatSupportMap(const ContigNode& repeat) { - SupportMap supportMap; - bool unknown = false; - in_edge_iterator inIt, inLast; - for (std::tie(inIt, inLast) = in_edges(repeat, g_contigGraph); - inIt != inLast; ++inIt) - { - const auto intig = source(*inIt, g_contigGraph); - out_edge_iterator outIt, outLast; - for (std::tie(outIt, outLast) = out_edges(repeat, g_contigGraph); - outIt != outLast; ++outIt) - { - const auto outig = target(*outIt, g_contigGraph); - auto support = determinePathSupport({ intig, repeat, outig }); - - supportMap[intig.index()][outig.index()] = support; - if (support.unknown()) { - unknown = true; - } - } - } - - if (unknown) { - for (std::tie(inIt, inLast) = in_edges(repeat, g_contigGraph); - inIt != inLast; ++inIt) - { - const auto intig = source(*inIt, g_contigGraph); - out_edge_iterator outIt, outLast; - for (std::tie(outIt, outLast) = out_edges(repeat, g_contigGraph); - outIt != outLast; ++outIt) - { - const auto outig = target(*outIt, g_contigGraph); - supportMap[intig.index()][outig.index()].reset(); - } - } - } - - return supportMap; + SupportMap supportMap; + bool unknown = false; + in_edge_iterator inIt, inLast; + for (std::tie(inIt, inLast) = in_edges(repeat, g_contigGraph); inIt != inLast; ++inIt) { + const auto intig = source(*inIt, g_contigGraph); + out_edge_iterator outIt, outLast; + for (std::tie(outIt, outLast) = out_edges(repeat, g_contigGraph); outIt != outLast; + ++outIt) { + const auto outig = target(*outIt, g_contigGraph); + auto support = determinePathSupport({ intig, repeat, outig }); + + supportMap[intig.index()][outig.index()] = support; + if (support.unknown()) { + unknown = true; + } + } + } + + if (unknown) { + for (std::tie(inIt, inLast) = in_edges(repeat, g_contigGraph); inIt != inLast; ++inIt) { + const auto intig = source(*inIt, g_contigGraph); + out_edge_iterator outIt, outLast; + for (std::tie(outIt, outLast) = out_edges(repeat, g_contigGraph); outIt != outLast; + ++outIt) { + const auto outig = target(*outIt, g_contigGraph); + auto& support = supportMap[intig.index()][outig.index()]; + if (!support.unknown()) { + support.reset(); + support.unknownReason = Support::UnknownReason::DIFFERENT_CULPRIT; + } + } + } + } + + return supportMap; } -static void updateStats(Resolution& resolution, long &pathsKnown, long& pathsUnknown, - const SupportMap& repeatSupportMap, - bool inHistSample) +static void +updateStats( + Resolution& resolution, + std::vector& supports, + const SupportMap& repeatSupportMap, + bool inHistSample) { - for (const auto &intigIdxAndOutigsSupp : repeatSupportMap) - { - for (const auto &outigIdxAndSupp : intigIdxAndOutigsSupp.second) - { - const auto &support = outigIdxAndSupp.second; - - if (support.unknown()) { - pathsUnknown++; - } else { - assert(support.found >= 0); - assert(support.tests >= 0); - pathsKnown++; - if (inHistSample) { - resolution.findsHistogram.insert(support.found); - resolution.fractionFindsHistogram.insert(double(support.found) / double(support.tests)); - } - } - - assert(support.calculatedTests >= 0); - if (inHistSample) { - resolution.calculatedTestsHistogram.insert(support.calculatedTests); - } - } - } + for (const auto& intigIdxAndOutigsSupp : repeatSupportMap) { + for (const auto& outigIdxAndSupp : intigIdxAndOutigsSupp.second) { + const auto& support = outigIdxAndSupp.second; + + supports.push_back(support); + + if (!support.unknown()) { + assert(support.found >= 0); + assert(support.tests >= 0); + if (inHistSample) { + resolution.findsHistogram.insert(support.found); + resolution.fractionFindsHistogram.insert( + double(support.found) / double(support.tests)); + } + } + + assert(support.calculatedTests >= 0); + if (inHistSample) { + resolution.calculatedTestsHistogram.insert(support.calculatedTests); + } + } + } } -static bool isSmallRepeat(const ContigNode& node) +static bool +isSmallRepeat(const ContigNode& node) { - unsigned r = g_vanillaBloom->getKmerSize(); - return (!get(vertex_removed, g_contigGraph, node) && !node.sense() && - windowLongEnough(r, opt::min_tests, getContigSize(node), MIN_MARGIN) && - (in_degree(node, g_contigGraph) > 0 && out_degree(node, g_contigGraph) > 0) && - (in_degree(node, g_contigGraph) > 1 || out_degree(node, g_contigGraph) > 1)); + unsigned r = g_vanillaBloom->get_k(); + return ( + !get(vertex_removed, g_contigGraph, node) && !node.sense() && + windowLongEnough(r, opt::minTests, getContigSize(node), MIN_MARGIN) && + (in_degree(node, g_contigGraph) > 0 && out_degree(node, g_contigGraph) > 0) && + (in_degree(node, g_contigGraph) > 1 || out_degree(node, g_contigGraph) > 1)); } -static Resolution resolveRepeats() +static Resolution +resolveRepeats() { - long total = (num_vertices(g_contigGraph) - num_vertices_removed(g_contigGraph)) / 2; - long repeats = 0, pathsKnown = 0, pathsUnknown = 0; - long pathsSupported = 0, pathsUnsupported = 0; - - progressStart("Path resolution (r = " + - std::to_string(g_vanillaBloom->getKmerSize()) + - ")", total * 2); - - Resolution resolution(ReadBatch::current, g_vanillaBloom->getKmerSize()); - - Graph::vertex_iterator vertexStart, vertexEnd; - boost::tie(vertexStart, vertexEnd) = vertices(g_contigGraph); - - iteratorMultithreading(vertexStart, vertexEnd, - [&] (const ContigNode &node) { - if (!get(vertex_removed, g_contigGraph, node)) { - if (isSmallRepeat(node)) { - return true; - } else { - #pragma omp critical (cerr) - progressUpdate(); - return false; - } - } - return false; - }, - [&] (const ContigNode &node) { - bool inHistSample; - bool skip = false; - #pragma omp critical(resolution) - { - repeats++; - inHistSample = (repeats <= HIST_SAMPLE_SIZE); - skip = (repeats > REPEAT_CASES_LIMIT); - } - - if (!skip) { - auto supportMap = buildRepeatSupportMap(node); - - #pragma omp critical(resolution) - { - resolution.repeatSupportMap[node.index()] = supportMap; - updateStats(resolution, pathsKnown, pathsUnknown, - supportMap, inHistSample); - } - } - - #pragma omp critical (cerr) - progressUpdate(); - }); - - if (repeats > 0 && pathsKnown > 0) { - for (const auto &findsAndCount : resolution.findsHistogram) { - const auto &finds = findsAndCount.first; - const auto &count = findsAndCount.second; - if (finds >= opt::threshold) { - pathsSupported += count; - } else { - pathsUnsupported += count; - } - } - - double sampleFactor = double(pathsKnown) / double(pathsSupported + pathsUnsupported); - pathsSupported *= sampleFactor; - pathsUnsupported *= sampleFactor; - - if (opt::verbose) { - std::cerr << std::fixed; - std::cerr << "Small repeats = " << repeats << "/" << total - << " (" << double(repeats) / total * 100.0 << "%)\n"; - std::cerr << "Known support paths = " << pathsKnown << " / " << (pathsKnown + pathsUnknown) - << " (" << double(pathsKnown) / (pathsKnown + pathsUnknown) * 100.0 << "%)\n"; - std::cerr << "Unknown support paths = " << pathsUnknown << " / " << (pathsKnown + pathsUnknown) - << " (" << double(pathsUnknown) / (pathsKnown + pathsUnknown) * 100.0 << "%)\n"; - std::cerr << "Supported paths ~= " << pathsSupported << "/" << pathsKnown - << " (" << double(pathsSupported) / pathsKnown * 100.0 << "%)\n"; - std::cerr << "Unsupported paths ~= " << pathsUnsupported << "/" << pathsKnown - << " (" << double(pathsUnsupported) / pathsKnown * 100.0 << "%)\n"; - std::cerr << std::defaultfloat << std::flush; - } - - if (double(pathsSupported) / double(pathsKnown) < SUPPORTED_PATHS_MIN) { - std::cerr << "Insufficient support found. Is something wrong with the data?\n"; - resolution.failed = true; - } - } else { - std::cerr << "No small junctions were found!\n"; - resolution.failed = true; - } - - return resolution; + long total = (num_vertices(g_contigGraph) - num_vertices_removed(g_contigGraph)) / 2; + long repeats = 0; + long pathsSupported = 0, pathsUnsupported = 0; + std::vector supports; + + progressStart( + "Path resolution (r = " + std::to_string(g_vanillaBloom->get_k()) + ")", total * 2); + + Resolution resolution(ReadSize::current, g_vanillaBloom->get_k()); + + Graph::vertex_iterator vertexStart, vertexEnd; + boost::tie(vertexStart, vertexEnd) = vertices(g_contigGraph); + + iteratorMultithreading( + vertexStart, + vertexEnd, + [&](const ContigNode& node) { + if (!get(vertex_removed, g_contigGraph, node)) { + if (isSmallRepeat(node)) { + return true; + } else { +#pragma omp critical(cerr) + progressUpdate(); + return false; + } + } + return false; + }, + [&](const ContigNode& node) { + bool inHistSample; + bool skip = false; +#pragma omp critical(resolution) + { + repeats++; + inHistSample = (repeats <= HIST_SAMPLE_SIZE); + skip = (repeats > REPEAT_CASES_LIMIT); + } + + if (!skip) { + auto supportMap = buildRepeatSupportMap(node); + +#pragma omp critical(resolution) + { + resolution.repeatSupportMap[node.index()] = supportMap; + updateStats(resolution, supports, supportMap, inHistSample); + } + } + +#pragma omp critical(cerr) + progressUpdate(); + }); + + long pathsKnown = 0, pathsUnknown = 0, pathsTotal = 0; + static const std::string unknownReasonLabels[] = { "Undetermined", "Too many combinations", "Over max tests", "Possible tests < planned tests", "Window not long enough", "Head shorter than margin", "Tail shorter than margin", "Different culprit" }; + static const size_t unknownReasons = countof(unknownReasonLabels); + int unknownReasonCounts[unknownReasons]; + for (size_t i = 0; i < unknownReasons; i++) { + unknownReasonCounts[i] = 0; + } + for (const auto& s : supports) { + if (s.unknown()) { + pathsUnknown++; + unknownReasonCounts[unsigned(s.unknownReason)]++; + } else { + pathsKnown++; + } + } + pathsTotal = pathsKnown + pathsUnknown; + + if (repeats > 0 && pathsKnown > 0) { + for (const auto& findsAndCount : resolution.findsHistogram) { + const auto& finds = findsAndCount.first; + const auto& count = findsAndCount.second; + if (finds >= opt::threshold) { + pathsSupported += count; + } else { + pathsUnsupported += count; + } + } + + double sampleFactor = double(pathsKnown) / double(pathsSupported + pathsUnsupported); + pathsSupported *= sampleFactor; + pathsUnsupported *= sampleFactor; + + if (opt::verbose) { + std::cerr << std::fixed; + std::cerr << "Small repeats = " << repeats << "/" << total << " (" + << double(repeats) / total * 100.0 << "%)\n"; + std::cerr << "Known support paths = " << pathsKnown << " / " + << pathsTotal << " (" + << double(pathsKnown) / pathsTotal * 100.0 << "%)\n"; + std::cerr << "Unknown support paths = " << pathsUnknown << " / " + << pathsTotal << " (" + << double(pathsUnknown) / pathsTotal * 100.0 << "%)\n"; + for (size_t i = 0; i < unknownReasons; i++) { + if (i > 0) { std::cerr << ", "; } + std::cerr << unknownReasonLabels[i] << ": " << double(unknownReasonCounts[i]) / pathsUnknown * 100.0 << "%"; + } + std::cerr << "\n"; + std::cerr << "Supported paths ~= " << pathsSupported << "/" << pathsKnown << " (" + << double(pathsSupported) / pathsKnown * 100.0 << "%)\n"; + std::cerr << "Unsupported paths ~= " << pathsUnsupported << "/" << pathsKnown << " (" + << double(pathsUnsupported) / pathsKnown * 100.0 << "%)\n"; + std::cerr << std::defaultfloat << std::flush; + } + + if (double(pathsSupported) / double(pathsKnown) < SUPPORTED_PATHS_MIN) { + std::cerr << "Insufficient support found. Is something wrong with the data?\n"; + resolution.failed = true; + } + } else { + std::cerr << "No small junctions were found!\n"; + resolution.failed = true; + } + + return resolution; } -struct OldEdge { +struct OldEdge +{ - OldEdge(ContigNode u, ContigNode v): - u(u), v(v) {} + OldEdge(ContigNode u, ContigNode v) + : u(u) + , v(v) + {} - ContigNode u, v; + ContigNode u, v; }; -struct NewEdge { +struct NewEdge +{ - NewEdge(ContigNode u, ContigNode v, Distance distance): - u(u), v(v), distance(distance) {} + NewEdge(ContigNode u, ContigNode v, Distance distance) + : u(u) + , v(v) + , distance(distance) + {} - ContigNode u, v; - Distance distance; + ContigNode u, v; + Distance distance; }; -struct NewVertex { +struct NewVertex +{ - NewVertex(ContigNode original, ContigNode node): - original(original), node(node) {} + NewVertex(ContigNode original, ContigNode node) + : original(original) + , node(node) + {} - ContigNode original, node; + ContigNode original, node; }; -class RepeatInstance { - -public: - - RepeatInstance(const ContigNode instance, - const ContigNode original, - const std::vector originalIntigs, - const std::vector originalOutigs): - instance(instance), - original(original), - originalIntigs(originalIntigs), - originalOutigs(originalOutigs) {} - - bool inOriginalIntigs(const ContigNode &node) const { - return std::find(originalIntigs.begin(), originalIntigs.end(), node) - != originalIntigs.end(); - } - - bool inOriginalOutigs(const ContigNode &node) const { - return std::find(originalOutigs.begin(), originalOutigs.end(), node) - != originalOutigs.end(); - } - - RepeatInstance getReverse() const { - std::vector originalIntigsReverse; - for (auto originalOutig : originalOutigs) { - originalIntigsReverse.push_back(originalOutig ^ true); - } - - std::vector originalOutigsReverse; - for (auto originalIntig : originalIntigs) { - originalOutigsReverse.push_back(originalIntig ^ true); - } - - return RepeatInstance(instance ^ true, original ^ true, - originalIntigsReverse, originalOutigsReverse); - } - - const ContigNode instance; - const ContigNode original; - std::vector originalIntigs; - std::vector originalOutigs; - std::vector> intigsInstances; - std::vector> outigsInstances; +class RepeatInstance +{ + + public: + RepeatInstance( + const ContigNode instance, + const ContigNode original, + const std::vector originalIntigs, + const std::vector originalOutigs) + : instance(instance) + , original(original) + , originalIntigs(originalIntigs) + , originalOutigs(originalOutigs) + {} + + bool inOriginalIntigs(const ContigNode& node) const + { + return std::find(originalIntigs.begin(), originalIntigs.end(), node) != + originalIntigs.end(); + } + + bool inOriginalOutigs(const ContigNode& node) const + { + return std::find(originalOutigs.begin(), originalOutigs.end(), node) != + originalOutigs.end(); + } + + RepeatInstance getReverse() const + { + std::vector originalIntigsReverse; + for (auto originalOutig : originalOutigs) { + originalIntigsReverse.push_back(originalOutig ^ true); + } + + std::vector originalOutigsReverse; + for (auto originalIntig : originalIntigs) { + originalOutigsReverse.push_back(originalIntig ^ true); + } + + return RepeatInstance( + instance ^ true, original ^ true, originalIntigsReverse, originalOutigsReverse); + } + const ContigNode instance; + const ContigNode original; + std::vector originalIntigs; + std::vector originalOutigs; + std::vector> intigsInstances; + std::vector> outigsInstances; }; -static void processGraph(const Resolution &resolution, - ImaginaryContigPaths& supportedPaths, - ImaginaryContigPaths& unsupportedPaths) +static void +processGraph( + const Resolution& resolution, + ImaginaryContigPaths& supportedPaths, + ImaginaryContigPaths& unsupportedPaths) { - progressStart("New paths and vertices setup", resolution.repeatSupportMap.size() * 3); + progressStart("New paths and vertices setup", resolution.repeatSupportMap.size() * 3); - assert(!resolution.failed); + assert(!resolution.failed); - std::vector edges2remove; - std::vector edges2add; - std::vector vertices2add; + std::vector edges2remove; + std::vector edges2add; + std::vector vertices2add; - std::map> repeatInstancesMap; + std::map> repeatInstancesMap; - size_t lastId = num_vertices(g_contigGraph) / 2; + size_t lastId = num_vertices(g_contigGraph) / 2; - const auto start = resolution.repeatSupportMap.begin(); - const auto end = resolution.repeatSupportMap.end(); + const auto start = resolution.repeatSupportMap.begin(); + const auto end = resolution.repeatSupportMap.end(); #ifdef _OPENMP - int threads = omp_get_num_threads(); + int threads = omp_get_num_threads(); #else - int threads = 1; + int threads = 1; #endif - // 1 - iteratorMultithreading(start, end, - [&] (const std::pair &repeatSupport) { - (void)repeatSupport; - return true; - }, - [&] (const std::pair &repeatSupport) { - const auto repeat = ContigNode(repeatSupport.first); - const auto &supportMap = repeatSupport.second; - - #pragma omp critical (repeatInstancesMap) - { - repeatInstancesMap.emplace(repeat.index(), std::vector()); - repeatInstancesMap.emplace((repeat ^ true).index(), std::vector()); - } - - for (const auto &intigIdxAndOutigsSupp : supportMap) { - const auto intig = ContigNode(intigIdxAndOutigsSupp.first); - for (const auto &outigIdxAndSupp : intigIdxAndOutigsSupp.second) { - const auto outig = ContigNode(outigIdxAndSupp.first); - const auto &support = outigIdxAndSupp.second; - int dist1 = distanceBetween(intig, repeat); - int dist2 = distanceBetween(repeat, outig); - - ImaginaryContigPath path = { { intig, 0 }, { repeat, dist1 }, { outig, dist2 } }; - - if (support.good()) { - #pragma omp critical (supportedPaths) - supportedPaths.insert(path); - } else { - #pragma omp critical (unsupportedPaths) - unsupportedPaths.insert(path); - - #pragma omp critical (supportedPaths) - { - if (supportedPaths.find(path) != supportedPaths.end()) { - supportedPaths.erase(path); - } - } - } - } - } - - #pragma omp critical (cerr) - progressUpdate(); - }, std::min(4, threads)); - - // 2 - for (auto it = start; it != end; it++) { - const std::pair &repeatSupport = *it; - const auto repeat = ContigNode(repeatSupport.first); - const auto &supportMap = repeatSupport.second; - - assert(repeatInstancesMap.find(repeat.index()) != repeatInstancesMap.end()); - assert(repeatInstancesMap.find((repeat ^ true).index()) != repeatInstancesMap.end()); - - auto &repeatInstances = repeatInstancesMap.at(repeat.index()); - auto &repeatInstancesReverse = repeatInstancesMap.at((repeat ^ true).index()); - - assert(repeatInstances.size() == 0); - assert(repeatInstancesReverse.size() == 0); - - for (const auto &intigIdxAndOutigsSupp : supportMap) { - const auto intig = ContigNode(intigIdxAndOutigsSupp.first); - const auto &outigsSupp = intigIdxAndOutigsSupp.second; - - std::vector supportedOutigs; - for(const auto &outigIdxAndSupp : outigsSupp) { - const auto &outig = ContigNode(outigIdxAndSupp.first); - const auto &support = outigIdxAndSupp.second; - if (support.good()) { - supportedOutigs.push_back(outig); - } - } - - bool matched = false; - for (auto &instance : repeatInstances) { - if (instance.originalOutigs.size() == supportedOutigs.size()) { - matched = true; - for (const auto &outig: supportedOutigs) { - bool found = false; - for (const auto &instanceOutig : instance.originalOutigs) { - if (outig == instanceOutig) { - found = true; - break; - } - } - if (!found) { - matched = false; - break; - } - } - } - if (matched) { - instance.originalIntigs.push_back(intig); - break; - } - } - - if (!matched) { - if (supportedOutigs.size() > 0) { - std::vector intigs = { intig }; - if (repeatInstances.size() == 0) { - repeatInstances.push_back(RepeatInstance(repeat, repeat, intigs, supportedOutigs)); - } else { - ContigNode repeatCopy = ContigNode(lastId++, repeat.sense()); - repeatInstances.push_back(RepeatInstance(repeatCopy, repeat, intigs, supportedOutigs)); - } - } - } - } - - if (repeatInstances.size() > 0) { - std::set intigIdxs; - - for (const auto &instance : repeatInstances) { - for (const auto &intig : instance.originalIntigs) { - assert(intigIdxs.find(intig.index()) == intigIdxs.end()); - intigIdxs.insert(intig.index()); - } - assert(instance.originalOutigs.size() > 0); - repeatInstancesReverse.push_back(instance.getReverse()); - } - } else { - auto instance = RepeatInstance(repeat, repeat, {}, {}); - repeatInstances.push_back(instance); - repeatInstancesReverse.push_back(instance.getReverse()); - } - - progressUpdate(); - } - - // 3 - iteratorMultithreading(start, end, - [&] (const std::pair &repeatSupport) { - (void)repeatSupport; - return true; - }, - [&] (const std::pair &repeatSupport) { - const auto repeat = ContigNode(repeatSupport.first); - - auto &repeatInstances = repeatInstancesMap.at(repeat.index()); - - std::list tempInstances; - - for (auto &instance : repeatInstances) { - for (const auto &intig : instance.originalIntigs) { - if (repeatInstancesMap.find(intig.index()) != repeatInstancesMap.end()) { - const auto &intigInstances = repeatInstancesMap.at(intig.index()); - for (const auto &intigInstance : intigInstances) { - if (intigInstance.inOriginalOutigs(repeat)) { - instance.intigsInstances.push_back(intigInstance); - } - } - } else { - tempInstances.push_back(RepeatInstance(intig, intig, {}, {})); - instance.intigsInstances.push_back(tempInstances.back()); - } - } - - for (const auto &outig : instance.originalOutigs) { - if (repeatInstancesMap.find(outig.index()) != repeatInstancesMap.end()) { - const auto &outigInstances = repeatInstancesMap.at(outig.index()); - for (const auto &outigInstance : outigInstances) { - if (outigInstance.inOriginalIntigs(repeat)) { - instance.outigsInstances.push_back(outigInstance); - } - } - } else { - tempInstances.push_back(RepeatInstance(outig, outig, {}, {})); - instance.outigsInstances.push_back(tempInstances.back()); - } - } - - if (instance.instance == instance.original) { - in_edge_iterator inIt, inLast; - for (std::tie(inIt, inLast) = in_edges(instance.original, g_contigGraph); - inIt != inLast; ++inIt) - { - #pragma omp critical (edges2remove) - edges2remove.push_back(OldEdge(source(*inIt, g_contigGraph), instance.original)); - } - - out_edge_iterator outIt, outLast; - for (std::tie(outIt, outLast) = out_edges(instance.original, g_contigGraph); - outIt != outLast; ++outIt) - { - #pragma omp critical (edges2remove) - edges2remove.push_back(OldEdge(instance.original, target(*outIt, g_contigGraph))); - } - } else { - #pragma omp critical (vertices2add) - vertices2add.push_back(NewVertex(instance.original, instance.instance)); - } - - for (const RepeatInstance &intigInstance : instance.intigsInstances) { - #pragma omp critical (edges2add) - edges2add.push_back(NewEdge(intigInstance.instance, instance.instance, - get(edge_bundle, g_contigGraph, edge(intigInstance.original, instance.original, g_contigGraph).first))); - } - - for (const RepeatInstance &outigInstance : instance.outigsInstances) { - #pragma omp critical (edges2add) - edges2add.push_back(NewEdge(instance.instance, outigInstance.instance, - get(edge_bundle, g_contigGraph, edge(instance.original, outigInstance.original, g_contigGraph).first))); - } - } - - #pragma omp critical (cerr) - progressUpdate(); - }); - - std::sort(vertices2add.begin(), vertices2add.end(), - [] (const NewVertex& v1, const NewVertex& v2) { - return v1.node.index() < v2.node.index(); - }); - std::sort(edges2add.begin(), edges2add.end(), - [] (const NewEdge &e1, const NewEdge &e2) { - return (e1.u.index() < e2.u.index()) || (e1.u.index() == e2.u.index() && e1.v.index() < e2.v.index()); - }); - - int modifications = edges2remove.size() + vertices2add.size() + edges2add.size(); - progressStart("Graph modification", modifications); - - g_contigNames.unlock(); - for (const auto &oldEdge : edges2remove) { - if (g_contigGraph.edge(oldEdge.u, oldEdge.v).second) { - remove_edge(oldEdge.u, oldEdge.v, g_contigGraph); - } - - progressUpdate(); - } - for (const auto &newVertex : vertices2add) { - assert(in_degree(newVertex.original, g_contigGraph) == 0); - assert(out_degree(newVertex.original, g_contigGraph) == 0); - - assert(g_contigSequences.size() == newVertex.node.index()); - assert(g_contigComments.size() == newVertex.node.id()); - - g_contigSequences.push_back(getContigSequence(newVertex.original)); - g_contigSequences.push_back(getContigSequence(newVertex.original ^ true)); - - std::string name = createContigName(); - put(vertex_name, g_contigGraph, newVertex.node, name); - add_vertex(get(vertex_bundle, g_contigGraph, newVertex.original), g_contigGraph); - - g_contigComments.push_back(getContigComment(newVertex.original)); - - assert(in_degree(newVertex.node, g_contigGraph) == 0); - assert(out_degree(newVertex.node, g_contigGraph) == 0); - - progressUpdate(); - } - for (const auto &newEdge : edges2add) { - if (!g_contigGraph.edge(newEdge.u, newEdge.v).second) { - g_contigGraph.add_edge(newEdge.u, newEdge.v, newEdge.distance); - } - - progressUpdate(); - } - g_contigNames.lock(); -} + // 1 + iteratorMultithreading( + start, + end, + [&](const std::pair& repeatSupport) { + (void)repeatSupport; + return true; + }, + [&](const std::pair& repeatSupport) { + const auto repeat = ContigNode(repeatSupport.first); + const auto& supportMap = repeatSupport.second; + +#pragma omp critical(repeatInstancesMap) + { + repeatInstancesMap.emplace(repeat.index(), std::vector()); + repeatInstancesMap.emplace((repeat ^ true).index(), std::vector()); + } + + for (const auto& intigIdxAndOutigsSupp : supportMap) { + const auto intig = ContigNode(intigIdxAndOutigsSupp.first); + for (const auto& outigIdxAndSupp : intigIdxAndOutigsSupp.second) { + const auto outig = ContigNode(outigIdxAndSupp.first); + const auto& support = outigIdxAndSupp.second; + int dist1 = distanceBetween(intig, repeat); + int dist2 = distanceBetween(repeat, outig); + + ImaginaryContigPath path = { { intig, 0 }, + { repeat, dist1 }, + { outig, dist2 } }; + + if (support.good()) { +#pragma omp critical(supportedPaths) + supportedPaths.insert(path); + } else { +#pragma omp critical(unsupportedPaths) + unsupportedPaths.insert(path); + +#pragma omp critical(supportedPaths) + { + if (supportedPaths.find(path) != supportedPaths.end()) { + supportedPaths.erase(path); + } + } + } + } + } + +#pragma omp critical(cerr) + progressUpdate(); + }, + std::min(4, threads)); + + // 2 + for (auto it = start; it != end; it++) { + const std::pair& repeatSupport = *it; + const auto repeat = ContigNode(repeatSupport.first); + const auto& supportMap = repeatSupport.second; + + assert(repeatInstancesMap.find(repeat.index()) != repeatInstancesMap.end()); + assert(repeatInstancesMap.find((repeat ^ true).index()) != repeatInstancesMap.end()); + + auto& repeatInstances = repeatInstancesMap.at(repeat.index()); + auto& repeatInstancesReverse = repeatInstancesMap.at((repeat ^ true).index()); + + assert(repeatInstances.size() == 0); + assert(repeatInstancesReverse.size() == 0); + + for (const auto& intigIdxAndOutigsSupp : supportMap) { + const auto intig = ContigNode(intigIdxAndOutigsSupp.first); + const auto& outigsSupp = intigIdxAndOutigsSupp.second; + + std::vector supportedOutigs; + for (const auto& outigIdxAndSupp : outigsSupp) { + const auto& outig = ContigNode(outigIdxAndSupp.first); + const auto& support = outigIdxAndSupp.second; + if (support.good()) { + supportedOutigs.push_back(outig); + } + } + + bool matched = false; + for (auto& instance : repeatInstances) { + if (instance.originalOutigs.size() == supportedOutigs.size()) { + matched = true; + for (const auto& outig : supportedOutigs) { + bool found = false; + for (const auto& instanceOutig : instance.originalOutigs) { + if (outig == instanceOutig) { + found = true; + break; + } + } + if (!found) { + matched = false; + break; + } + } + } + if (matched) { + instance.originalIntigs.push_back(intig); + break; + } + } + + if (!matched) { + if (supportedOutigs.size() > 0) { + std::vector intigs = { intig }; + if (repeatInstances.size() == 0) { + repeatInstances.push_back( + RepeatInstance(repeat, repeat, intigs, supportedOutigs)); + } else { + ContigNode repeatCopy = ContigNode(lastId++, repeat.sense()); + repeatInstances.push_back( + RepeatInstance(repeatCopy, repeat, intigs, supportedOutigs)); + } + } + } + } + + if (repeatInstances.size() > 0) { + std::set intigIdxs; + + for (const auto& instance : repeatInstances) { + for (const auto& intig : instance.originalIntigs) { + assert(intigIdxs.find(intig.index()) == intigIdxs.end()); + intigIdxs.insert(intig.index()); + } + assert(instance.originalOutigs.size() > 0); + repeatInstancesReverse.push_back(instance.getReverse()); + } + } else { + auto instance = RepeatInstance(repeat, repeat, {}, {}); + repeatInstances.push_back(instance); + repeatInstancesReverse.push_back(instance.getReverse()); + } + + progressUpdate(); + } -void writeHistograms(const Resolution& resolution, const std::string& prefix, int subiteration) { - if (opt::verbose) { - std::cerr << "Writing algorithm histograms..." << std::flush; - } + // 3 + iteratorMultithreading( + start, + end, + [&](const std::pair& repeatSupport) { + (void)repeatSupport; + return true; + }, + [&](const std::pair& repeatSupport) { + const auto repeat = ContigNode(repeatSupport.first); + + auto& repeatInstances = repeatInstancesMap.at(repeat.index()); + + std::list tempInstances; + + for (auto& instance : repeatInstances) { + for (const auto& intig : instance.originalIntigs) { + if (repeatInstancesMap.find(intig.index()) != repeatInstancesMap.end()) { + const auto& intigInstances = repeatInstancesMap.at(intig.index()); + for (const auto& intigInstance : intigInstances) { + if (intigInstance.inOriginalOutigs(repeat)) { + instance.intigsInstances.push_back(intigInstance); + } + } + } else { + tempInstances.push_back(RepeatInstance(intig, intig, {}, {})); + instance.intigsInstances.push_back(tempInstances.back()); + } + } + + for (const auto& outig : instance.originalOutigs) { + if (repeatInstancesMap.find(outig.index()) != repeatInstancesMap.end()) { + const auto& outigInstances = repeatInstancesMap.at(outig.index()); + for (const auto& outigInstance : outigInstances) { + if (outigInstance.inOriginalIntigs(repeat)) { + instance.outigsInstances.push_back(outigInstance); + } + } + } else { + tempInstances.push_back(RepeatInstance(outig, outig, {}, {})); + instance.outigsInstances.push_back(tempInstances.back()); + } + } + + if (instance.instance == instance.original) { + in_edge_iterator inIt, inLast; + for (std::tie(inIt, inLast) = in_edges(instance.original, g_contigGraph); + inIt != inLast; + ++inIt) { +#pragma omp critical(edges2remove) + edges2remove.push_back( + OldEdge(source(*inIt, g_contigGraph), instance.original)); + } + + out_edge_iterator outIt, outLast; + for (std::tie(outIt, outLast) = out_edges(instance.original, g_contigGraph); + outIt != outLast; + ++outIt) { +#pragma omp critical(edges2remove) + edges2remove.push_back( + OldEdge(instance.original, target(*outIt, g_contigGraph))); + } + } else { +#pragma omp critical(vertices2add) + vertices2add.push_back(NewVertex(instance.original, instance.instance)); + } + + for (const RepeatInstance& intigInstance : instance.intigsInstances) { +#pragma omp critical(edges2add) + edges2add.push_back(NewEdge( + intigInstance.instance, + instance.instance, + get(edge_bundle, + g_contigGraph, + edge(intigInstance.original, instance.original, g_contigGraph).first))); + } + + for (const RepeatInstance& outigInstance : instance.outigsInstances) { +#pragma omp critical(edges2add) + edges2add.push_back(NewEdge( + instance.instance, + outigInstance.instance, + get(edge_bundle, + g_contigGraph, + edge(instance.original, outigInstance.original, g_contigGraph).first))); + } + } + +#pragma omp critical(cerr) + progressUpdate(); + }); + + std::sort( + vertices2add.begin(), vertices2add.end(), [](const NewVertex& v1, const NewVertex& v2) { + return v1.node.index() < v2.node.index(); + }); + std::sort(edges2add.begin(), edges2add.end(), [](const NewEdge& e1, const NewEdge& e2) { + return (e1.u.index() < e2.u.index()) || + (e1.u.index() == e2.u.index() && e1.v.index() < e2.v.index()); + }); + + int modifications = edges2remove.size() + vertices2add.size() + edges2add.size(); + progressStart("Graph modification", modifications); + + g_contigNames.unlock(); + for (const auto& oldEdge : edges2remove) { + if (g_contigGraph.edge(oldEdge.u, oldEdge.v).second) { + remove_edge(oldEdge.u, oldEdge.v, g_contigGraph); + } + + progressUpdate(); + } + for (const auto& newVertex : vertices2add) { + assert(in_degree(newVertex.original, g_contigGraph) == 0); + assert(out_degree(newVertex.original, g_contigGraph) == 0); - std::string findsFilename = prefix + "-r" + std::to_string(resolution.r) + "-" + std::to_string(subiteration + 1) + "-finds.tsv"; - std::ofstream findsFile(findsFilename.c_str()); - findsFile << resolution.findsHistogram; + assert(g_contigSequences.size() == newVertex.node.index()); + assert(g_contigComments.size() == newVertex.node.id()); - std::string fractionFindsFilename = prefix + "-r" + std::to_string(resolution.r) + "-" + std::to_string(subiteration + 1) + "-percent-finds.tsv"; - std::ofstream fractionFindsFile(fractionFindsFilename.c_str()); - fractionFindsFile << resolution.fractionFindsHistogram; + g_contigSequences.push_back(getContigSequence(newVertex.original)); + g_contigSequences.push_back(getContigSequence(newVertex.original ^ true)); - std::string calculatedTestsFilename = prefix + "-r" + std::to_string(resolution.r) + "-" + std::to_string(subiteration + 1) + "-calculated-tests.tsv"; - std::ofstream calculatedTestsFile(calculatedTestsFilename.c_str()); - calculatedTestsFile << resolution.calculatedTestsHistogram; + std::string name = createContigName(); + put(vertex_name, g_contigGraph, newVertex.node, name); + add_vertex(get(vertex_bundle, g_contigGraph, newVertex.original), g_contigGraph); - if (opt::verbose) { - std::cerr << " Done!" << std::endl; - } -} + g_contigComments.push_back(getContigComment(newVertex.original)); -void -resolveShort(const std::vector& readFilepaths, - ImaginaryContigPaths& supportedPaths, - ImaginaryContigPaths& unsupportedPaths) -{ - if (!determineShortReadStats(readFilepaths)) { - return; - } + assert(in_degree(newVertex.node, g_contigGraph) == 0); + assert(out_degree(newVertex.node, g_contigGraph) == 0); - if (opt::verbose) { std::cerr << "\nRunning resolution algorithm...\n"; } - - assert(g_contigSequences.size() > 0); - assert(g_contigSequences.size() / 2 == g_contigComments.size()); - assert(ReadBatch::batches.size() > 0); + progressUpdate(); + } + for (const auto& newEdge : edges2add) { + if (!g_contigGraph.edge(newEdge.u, newEdge.v).second) { + g_contigGraph.add_edge(newEdge.u, newEdge.v, newEdge.distance); + } - std::vector> histograms; - for (auto batch : ReadBatch::batches) { - ReadBatch::current = batch; + progressUpdate(); + } + g_contigNames.lock(); +} - for (int r : ReadBatch::current.rValues) { - if (int(r) < int(opt::k)) { - std::cerr << "r value " << r << "(" << ReadBatch::current.size << ") is too short - skipping." << std::endl; - continue; - } +void +writeHistograms(const Resolution& resolution, const std::string& prefix, int subiteration) +{ + if (opt::verbose) { + std::cerr << "Writing algorithm histograms..." << std::flush; + } - if (opt::verbose) { std::cerr << "\nRead size = " << batch.size << ", r = " << r << " ...\n\n"; } + std::string findsFilename = prefix + "-r" + std::to_string(resolution.r) + "-" + + std::to_string(subiteration + 1) + "-finds.tsv"; + std::ofstream findsFile(findsFilename.c_str()); + findsFile << resolution.findsHistogram; - buildFilters(readFilepaths, r, opt::bloomSize); + std::string fractionFindsFilename = prefix + "-r" + std::to_string(resolution.r) + "-" + + std::to_string(subiteration + 1) + "-percent-finds.tsv"; + std::ofstream fractionFindsFile(fractionFindsFilename.c_str()); + fractionFindsFile << resolution.fractionFindsHistogram; - for (size_t j = 0; j < MAX_SUBITERATIONS; j++) { - if (opt::verbose) { std::cerr << "\nSubiteration " << j + 1 << "...\n"; } + std::string calculatedTestsFilename = prefix + "-r" + std::to_string(resolution.r) + "-" + + std::to_string(subiteration + 1) + + "-calculated-tests.tsv"; + std::ofstream calculatedTestsFile(calculatedTestsFilename.c_str()); + calculatedTestsFile << resolution.calculatedTestsHistogram; - int unsupportedCountPrev = unsupportedPaths.size(); + if (opt::verbose) { + std::cerr << " Done!" << std::endl; + } +} - Resolution resolution = resolveRepeats(); +void +resolveShort( + const std::vector& readFilepaths, + ImaginaryContigPaths& supportedPaths, + ImaginaryContigPaths& unsupportedPaths) +{ + if (!determineShortReadStats(readFilepaths)) { + return; + } - if (!resolution.failed) { - processGraph(resolution, supportedPaths, unsupportedPaths); - assembleContigs(); - if (!opt::histPrefix.empty()) { - writeHistograms(resolution, opt::histPrefix, j); - } - } + if (opt::verbose) { + std::cerr << "\nRunning resolution algorithm...\n"; + } - int newUnsupportedCount = unsupportedPaths.size() - unsupportedCountPrev; - assert(newUnsupportedCount >= 0); - if (newUnsupportedCount == 0) { break; } - } - } - } + assert(g_contigSequences.size() > 0); + assert(g_contigSequences.size() / 2 == g_contigComments.size()); + assert(ReadSize::readSizes.size() > 0); + + std::vector> histograms; + for (auto batch : ReadSize::readSizes) { + ReadSize::current = batch; + + for (int r : ReadSize::current.rValues) { + if (int(r) < int(opt::k)) { + std::cerr << "r value " << r << "(" << ReadSize::current.size + << ") is too short - skipping." << std::endl; + continue; + } + + if (opt::verbose) { + std::cerr << "\nRead size = " << batch.size << ", r = " << r << " ...\n\n"; + } + + buildFilters(readFilepaths, r, opt::bloomSize); + + for (size_t j = 0; j < MAX_SUBITERATIONS; j++) { + if (opt::verbose) { + std::cerr << "\nSubiteration " << j + 1 << "...\n"; + } + + int unsupportedCountPrev = unsupportedPaths.size(); + + Resolution resolution = resolveRepeats(); + + if (!resolution.failed) { + processGraph(resolution, supportedPaths, unsupportedPaths); + assembleContigs(); + if (!opt::histPrefix.empty()) { + writeHistograms(resolution, opt::histPrefix, j); + } + } + + int newUnsupportedCount = unsupportedPaths.size() - unsupportedCountPrev; + assert(newUnsupportedCount >= 0); + if (newUnsupportedCount == 0) { + break; + } + } + } + } - if (opt::verbose) { std::cerr << "Resolution algorithm done.\n\n"; } + if (opt::verbose) { + std::cerr << "Resolution algorithm done.\n\n"; + } } diff --git a/RResolver/RAlgorithmsShort.h b/RResolver/RAlgorithmsShort.h index 44abfaa80..96886cb4d 100644 --- a/RResolver/RAlgorithmsShort.h +++ b/RResolver/RAlgorithmsShort.h @@ -1,152 +1,186 @@ #ifndef RRESOLVER_RALGORITHMS_SHORT_H #define RRESOLVER_RALGORITHMS_SHORT_H 1 -#include "RUtils.h" #include "BloomFilters.h" -#include "Contigs.h" -#include "SequenceTree.h" #include "Common/Histogram.h" +#include "Contigs.h" -#include -#include #include -#include #include +#include #include +#include +#include +#include -const int MIN_MARGIN = 2; -const int MAX_TESTS_OFFSET = 16; -const int R_VALUES_STEP = 20; -const int R_STEPS_MAX = 1; -const int R_MAX_K_DIFF = 80; -const int MAX_SUBITERATIONS = 2; -const long HIST_SAMPLE_SIZE = LONG_MAX; -const long REPEAT_CASES_LIMIT = LONG_MAX; -const int RMER_QUALITY_THRESHOLD = 30; -const long READ_STATS_SAMPLE_SIZE = 100000; -const double READ_BATCH_FRACTION_THRESHOLD = 0.30; -const long PATH_COMBINATIONS_MULTITHREAD_THRESHOLD = 5000; +const int MIN_MARGIN = 2; +const int R_HEURISTIC = 45; +const double R_HEURISTIC_A = 0.49; +const double R_HEURISTIC_B = 63.5; +const int MAX_SUBITERATIONS = 2; +const long HIST_SAMPLE_SIZE = LONG_MAX; +const long REPEAT_CASES_LIMIT = LONG_MAX; +const long READ_STATS_SAMPLE_SIZE = 100000; +const double READ_BATCH_FRACTION_THRESHOLD = 0.15; +const long PATH_COMBINATIONS_MULTITHREAD_THRESHOLD = 5000; const double SUPPORTED_PATHS_MIN = 0.15; -const double COV_APPROX_FORMULA_FACTOR = 5.00; +const double COV_APPROX_FORMULA_FACTOR = 4.00; const double SPACED_SEEDS_SNP_FRACTION = 1.00; -const int MAX_READ_SIZE = 300; +const int MAX_READ_SIZE = 300; namespace opt { - /** Read Bloom filter size in bytes. */ - extern size_t bloomSize; +/** Read Bloom filter size in bytes. */ +extern size_t bloomSize; - /** The number of parallel threads */ - extern int threads; +/** The number of parallel threads */ +extern int threads; - /** Prefix for the histogram files */ - extern std::string histPrefix; +/** Prefix for the histogram files */ +extern std::string histPrefix; - /** Name of the file to write resulting graph to */ - extern std::string outputGraphPath; +/** Name of the file to write resulting graph to */ +extern std::string outputGraphPath; - /** Name of the file to write resulting graph to */ - extern std::string outputContigsPath; +/** Name of the file to write resulting graph to */ +extern std::string outputContigsPath; - /** Number of kmers required to be found for a path to be supported */ - extern int threshold; +/** Number of kmers required to be found for a path to be supported */ +extern int threshold; - /** Minimum number of sliding window moves */ - extern int min_tests; +/** Number of Rmers to extract per read */ +extern int extract; - /** Maximum number of branching paths */ - extern int branching; +/** Minimum number of sliding window moves */ +extern int minTests; - /** Flag indicating whether error correction is enabled */ - extern int error_correction; +/** Maximum number of sliding window moves */ +extern int maxTests; - /** Name of the file to write supported paths to */ - extern std::string outputSupportedPathsPath; +/** Maximum number of branching paths */ +extern int branching; - /** Name of the file to write unsupported paths to */ - extern std::string outputUnsupportedPathsPath; +/** Explicitly specified r values. */ +extern std::vector rValues; - extern unsigned k; // used by ContigProperties +/** Explicitly set coverage approximation factors. */ +extern std::vector covApproxFactors; - extern int format; // used by ContigProperties - -} +/** Base pair quality threshold that large k-mers bases should have at minimum. */ +extern int readQualityThreshold; -class Support { +/** Flag indicating whether error correction is enabled */ +extern int errorCorrection; -public: +/** Name of the file to write supported paths to */ +extern std::string outputSupportedPathsPath; - Support() = default; +/** Name of the file to write unsupported paths to */ +extern std::string outputUnsupportedPathsPath; - Support(int calculatedTests): found(-1), tests(-1), calculatedTests(calculatedTests) { - assert(calculatedTests >= 0); - } +extern unsigned k; // used by ContigProperties - Support(int found, int tests): - found(found), tests(tests) - { - assert(found >= 0); - assert(tests > 0); - assert(calculatedTests == -1); - } +extern int format; // used by ContigProperties - Support(int found, int tests, int calculatedTests): - found(found), tests(tests), calculatedTests(calculatedTests) - { - assert(found >= 0); - assert(tests > 0); - assert(calculatedTests >= 0); - } - - bool unknown() const { - return tests == -1; - } - - void reset() { - found = -1; - tests = -1; - } - - bool good() const { - return unknown() || found >= opt::threshold; - } - - bool operator>(const Support& s) { - return found > s.found; - } - - bool operator<(const Support& s) { - return found < s.found; - } - - int found = -1; - int tests = -1; - int calculatedTests = -1; +} +class Support +{ + public: + + enum class UnknownReason : uint8_t { + UNDETERMINED = 0, // Not yet processed + TOO_MANY_COMBINATIONS, // Branching out exploded beyond a threshold + OVER_MAX_TESTS, // Planned tests was above the threshold + POSSIBLE_TESTS_LT_PLANNED, // Planned tests could not be carried out due to low coverage + WINDOW_NOT_LONG_ENOUGH, // Window too small / repeat too large to all the planned tests + HEAD_SHORTER_THAN_MARGIN, // One of the branches to the left was too short for planned tests + TAIL_SHORTER_THAN_MARGIN, // One of the branches to the right was too short for planned tests + DIFFERENT_CULPRIT // The path was fine, but another path in this repeat was unknown so all the paths + // for this repeat became unknown + }; + + Support() = default; + + Support(UnknownReason unknownReason): unknownReason(unknownReason) {}; + + Support(int calculatedTests, UnknownReason unknownReason) + : found(-1) + , tests(-1) + , unknownReason(unknownReason) + { + assert(calculatedTests >= 0); + if (calculatedTests > std::numeric_limitscalculatedTests)>::max()) { this->calculatedTests = std::numeric_limitscalculatedTests)>::max(); } else { + this->calculatedTests = calculatedTests; + } + } + + Support(int8_t found, int8_t tests) + : found(found) + , tests(tests) + { + assert(found >= 0); + assert(tests > 0); + assert(calculatedTests == -1); + } + + Support(int8_t found, int8_t tests, int calculatedTests) + : found(found) + , tests(tests) + { + assert(found >= 0); + assert(tests > 0); + assert(calculatedTests >= 0); + if (calculatedTests > std::numeric_limitscalculatedTests)>::max()) { this->calculatedTests = std::numeric_limitscalculatedTests)>::max(); } else { + this->calculatedTests = calculatedTests; + } + } + + bool unknown() const { return tests == -1; } + + void reset() + { + found = -1; + tests = -1; + } + + bool good() const { return unknown() || found >= opt::threshold; } + + bool operator>(const Support& s) { return found > s.found; } + + bool operator<(const Support& s) { return found < s.found; } + + int8_t found = -1; + int8_t tests = -1; + int8_t calculatedTests = -1; + UnknownReason unknownReason = UnknownReason::UNDETERMINED; }; -class ReadBatch { - -public: - - ReadBatch(int size): size(size) {} +class ReadSize +{ - double getFractionOfTotal() const { return double(sampleCount) / double(readsSampleSize); } + public: + ReadSize(int size) + : size(size) + {} - int size; - std::vector rValues; - Histogram qualThresholdPositions; - long sampleCount = 0; + double getFractionOfTotal() const { return double(sampleCount) / double(readsSampleSize); } - static long readsSampleSize; - static std::vector batches; - static ReadBatch current; + int size; + std::vector rValues; + Histogram qualThresholdPositions; + long sampleCount = 0; + double covApproxFactor = COV_APPROX_FORMULA_FACTOR; + static long readsSampleSize; + static std::vector readSizes; + static ReadSize current; }; void -resolveShort(const std::vector& readFilepaths, - ImaginaryContigPaths& supportedPaths, - ImaginaryContigPaths& unsupportedPaths); +resolveShort( + const std::vector& readFilepaths, + ImaginaryContigPaths& supportedPaths, + ImaginaryContigPaths& unsupportedPaths); #endif diff --git a/RResolver/README.md b/RResolver/README.md index 6be53f98c..73fa0b3fa 100644 --- a/RResolver/README.md +++ b/RResolver/README.md @@ -1,28 +1,27 @@ RResolver =================== - RResolver, which stands for repeat or read resolver, whichever you prefer, improves genome assemblies at unitig stage by using a sliding window at read size level across repeats to determine which paths are correct. Synopsis =================== - `abyss-rresolver-short " [OPTION]... [ ...]` Options =================== - - * `b`: read Bloom filter size. - Unit suffixes 'K' (kilobytes), 'M' (megabytes), - or 'G' (gigabytes) may be used. [`required`] + * `b`: read Bloom filter size. Unit suffixes 'K' (kilobytes), 'M' (megabytes), or 'G' (gigabytes) may be used. [`required`] * `k`: assembly k-mer size [`required`] - * `g`: write the contig adjacency graph to FILE [`required`] + * `g`: write the contig adjacency graph to FILE. [`required`] * `c`: write the contigs to FILE [`required`] * `j`: use N parallel threads [`1`] - * `h`: write the algorithm histograms with the given prefix. - Histograms are omitted if no prefix is given. - * `t`: set path support threshold to N. [`5`] - * `m`: set minimum number of sliding window moves to N. [`20`] - * `r`: set maximum number of branching paths to N. [`75`] + * `h`: write the algorithm histograms with the given prefix. Histograms are omitted if no prefix is given. + * `t`: set path support threshold to N. [`4`] + * `x`: extract N r-mers per read. [`4`] + * `m`: set minimum number of sliding window moves to N. Cannot be higher than 127. [`20`] + * `M`: set maximum number of sliding window moves to N. Cannot be higher than 127. [`36`] + * `n`: set maximum number of branching paths to N. [`75`] + * `r`: explicitly set r value (k value used by rresolver). The number of set r values should be equal to the number of read sizes. + * `a`: explicitly set coverage approximation factor. + * `e`: enable correction of a 1bp error in kmers. [`false`] * `S`: write supported paths to FILE. * `U`: write unsupported paths to FILE. @@ -31,4 +30,4 @@ Authors + [**Vladimir Nikolic**](https://github.com/schutzekatze) + Supervised by [**Dr. Inanc Birol**](https://www.bcgsc.ca/people/inanc-birol). -Copyright 2020 Canada's Michael Smith Genome Sciences Centre \ No newline at end of file +Copyright 2021 Canada's Michael Smith Genome Sciences Centre \ No newline at end of file diff --git a/RResolver/RResolverShort.cpp b/RResolver/RResolverShort.cpp index 2849aea87..c2a8a0c64 100644 --- a/RResolver/RResolverShort.cpp +++ b/RResolver/RResolverShort.cpp @@ -6,302 +6,381 @@ #include "RAlgorithmsShort.h" +#if _OPENMP +#include +#endif + +#include #include #include -#include #include #include #define PROGRAM "rresolver-short" static const char VERSION_MESSAGE[] = -PROGRAM " (" PACKAGE_NAME ") " VERSION "\n" -"Written by Vladimir Nikolic.\n" -"\n" -"Copyright 2020 Canada's Michael Smith Genome Sciences Centre\n"; + PROGRAM " (" PACKAGE_NAME ") " VERSION "\n" + "Written by Vladimir Nikolic.\n" + "\n" + "Copyright 2020 Canada's Michael Smith Genome Sciences Centre\n"; static const char USAGE_MESSAGE[] = -"Usage: " PROGRAM " [OPTION]... [ ...]\n" -"Resolve unitig repeats using a sliding window and\n" -"and short read information.\n" -"\n" -" Arguments:\n" -"\n" -" contigs in FASTA format\n" -" contig adjacency graph\n" -" reads in FASTA format\n" -"\n" -" Options:\n" -"\n" -" -b, --bloom-size=N read Bloom filter size.\n" -" Unit suffixes 'K' (kilobytes), 'M' (megabytes),\n" -" or 'G' (gigabytes) may be used. [required]\n" -" -g, --graph=FILE write the contig adjacency graph to FILE. [required]\n" -" -c, --contigs=FILE write the contigs to FILE. [required]\n" -" -j, --threads=N use N parallel threads [1]\n" -" -k, --kmer=N assembly k-mer size\n" -" -h, --hist=PREFIX write the algorithm histograms with the given prefix.\n" -" Histograms are omitted if no prefix is given." -" -t, --threshold=N set path support threshold to N. [4]" -" -m, --min-tests=N set minimum number of sliding window moves to N. [20]" -" -r, --branching=N set maximum number of branching paths to N. [75]" -" -e, --error-correction enable correction of a 1bp error in kmers. [false]" -" -S, --supported=FILE write supported paths to FILE.\n" -" -U, --unsupported=FILE write unsupported paths to FILE.\n" -" Used for path sequence quality check.\n" -" --adj output the graph in ADJ format [default]\n" -" --asqg output the graph in ASQG format\n" -" --dot output the graph in GraphViz format\n" -" --gfa output the graph in GFA1 format\n" -" --gfa1 output the graph in GFA1 format\n" -" --gfa2 output the graph in GFA2 format\n" -" --gv output the graph in GraphViz format\n" -" --sam output the graph in SAM format\n" -" -v, --verbose display verbose output\n" -" --help display this help and exit\n" -" --version output version information and exit\n" -"\n" -"Report bugs to <" PACKAGE_BUGREPORT ">.\n"; + "Usage: " PROGRAM " [OPTION]... [ ...]\n" + "Resolve unitig repeats using a sliding window and\n" + "and short read information.\n" + "\n" + " Arguments:\n" + "\n" + " contigs in FASTA format\n" + " contig adjacency graph\n" + " reads in FASTA format\n" + "\n" + " Options:\n" + "\n" + " -b, --bloom-size=N read Bloom filter size. Unit suffixes 'K' (kilobytes), 'M' (megabytes), or 'G' (gigabytes) may be used. [required]\n" + " -g, --graph=FILE write the contig adjacency graph to FILE. [required]\n" + " -c, --contigs=FILE write the contigs to FILE. [required]\n" + " -j, --threads=N use N parallel threads [1]\n" + " -k, --kmer=N assembly k-mer size\n" + " -h, --hist=PREFIX write the algorithm histograms with the given prefix. Histograms are omitted if no prefix is given.\n" + " -t, --threshold=N set path support threshold to N. [4]\n" + " -x, --extract=N extract N r-mers per read. [4]\n" + " -m, --min-tests=N set minimum number of sliding window moves to N. Cannot be higher than 127. [20]\n" + " -M, --max-tests=N set maximum number of sliding window moves to N. Cannot be higher than 127. [36]\n" + " -n, --branching=N set maximum number of branching paths to N. [75]\n" + " -r, --rmer=N explicitly set r value (k value used by rresolver). The number of set r values should be equal to the number of read sizes.\n" + " -a, --approx-factor explicitly set coverage approximation factor.\n" + " -q, --quality--threshold=N minimum quality all bases in rmers should have, on average. [35] (UNUSED)\n" + " -e, --error-correction enable correction of a 1bp error in kmers. [false]\n" + " -S, --supported=FILE write supported paths to FILE.\n" + " -U, --unsupported=FILE write unsupported paths to FILE.\n" + " Used for path sequence quality check.\n" + " --adj output the graph in ADJ format [default]\n" + " --asqg output the graph in ASQG format\n" + " --dot output the graph in GraphViz format\n" + " --gfa output the graph in GFA1 format\n" + " --gfa1 output the graph in GFA1 format\n" + " --gfa2 output the graph in GFA2 format\n" + " --gv output the graph in GraphViz format\n" + " --sam output the graph in SAM format\n" + " -v, --verbose display verbose output\n" + " --help display this help and exit\n" + " --version output version information and exit\n" + "\n" + "Report bugs to <" PACKAGE_BUGREPORT ">.\n"; namespace opt { - /** Read Bloom filter size in bytes. */ - size_t bloomSize = 0; +/** Read Bloom filter size in bytes. */ +size_t bloomSize = 0; + +/** The number of parallel threads */ +int threads = 1; + +/** Prefix for the histogram files */ +std::string histPrefix; + +/** Name of the file to write resulting graph to */ +std::string outputGraphPath; + +/** Name of the file to write resulting graph to */ +std::string outputContigsPath; + +/** Number of kmers required to be found for a path to be supported */ +int threshold = 4; - /** The number of parallel threads */ - int threads = 1; +/** Number of Rmers to extract per read */ +int extract = 4; - /** Prefix for the histogram files */ - std::string histPrefix; +/** Minimum number of sliding window moves */ +int minTests = 20; - /** Name of the file to write resulting graph to */ - std::string outputGraphPath; +/** Maximum number of sliding window moves */ +int maxTests = 36; - /** Name of the file to write resulting graph to */ - std::string outputContigsPath; +/** Maximum number of branching paths */ +int branching = 75; - /** Number of kmers required to be found for a path to be supported */ - int threshold = 4; +/** Explicitly specified r values. */ +std::vector rValues; - /** Minimum number of sliding window moves */ - int min_tests = 20; +/** Explicitly set coverage approximation factors. */ +std::vector covApproxFactors; - /** Maximum number of branching paths */ - int branching = 75; +/** Base pair quality threshold that large k-mers bases should have at minimum. */ +int readQualityThreshold = 35; - /** Flag indicating whether error correction is enabled */ - int error_correction = 0; +/** Flag indicating whether error correction is enabled */ +int errorCorrection = 0; - /** Name of the file to write supported paths to */ - std::string outputSupportedPathsPath; +/** Name of the file to write supported paths to */ +std::string outputSupportedPathsPath; - /** Name of the file to write unsupported paths to */ - std::string outputUnsupportedPathsPath; +/** Name of the file to write unsupported paths to */ +std::string outputUnsupportedPathsPath; - unsigned k = 0; // used by ContigProperties +unsigned k = 0; // used by ContigProperties - int format; // used by ContigProperties +int format; // used by ContigProperties } -static const char shortopts[] = "b:j:g:c:k:h:t:m:r:eS:U:v"; +static const char shortopts[] = "b:j:g:c:k:h:t:x:m:M:n:r:a:q:eS:U:v"; -enum { OPT_HELP = 1, OPT_VERSION }; +enum +{ + OPT_HELP = 1, + OPT_VERSION +}; static const struct option longopts[] = { - { "bloom-size", required_argument, NULL, 'b' }, - { "threads", required_argument, NULL, 'j' }, - { "graph", required_argument, NULL, 'g' }, - { "contigs", required_argument, NULL, 'c' }, - { "kmer", required_argument, NULL, 'k' }, - { "hist", required_argument, NULL, 'h' }, - { "threshold", required_argument, NULL, 't' }, - { "min-tests", required_argument, NULL, 'm' }, - { "branching", required_argument, NULL, 'r' }, - { "error-correction", no_argument, &opt::error_correction, 1}, - { "supported", required_argument, NULL, 'S' }, - { "unsupported", required_argument, NULL, 'U' }, - { "adj", no_argument, &opt::format, ADJ }, - { "asqg", no_argument, &opt::format, ASQG }, - { "dot", no_argument, &opt::format, DOT }, - { "gfa", no_argument, &opt::format, GFA1 }, - { "gfa1", no_argument, &opt::format, GFA1 }, - { "gfa2", no_argument, &opt::format, GFA2 }, - { "gv", no_argument, &opt::format, DOT }, - { "sam", no_argument, &opt::format, SAM }, - { "verbose", no_argument, NULL, 'v' }, - { "help", no_argument, NULL, OPT_HELP }, - { "version", no_argument, NULL, OPT_VERSION }, - { NULL, 0, NULL, 0 } + { "bloom-size", required_argument, NULL, 'b' }, + { "threads", required_argument, NULL, 'j' }, + { "graph", required_argument, NULL, 'g' }, + { "contigs", required_argument, NULL, 'c' }, + { "kmer", required_argument, NULL, 'k' }, + { "hist", required_argument, NULL, 'h' }, + { "threshold", required_argument, NULL, 't' }, + { "extract", required_argument, NULL, 'x' }, + { "min-tests", required_argument, NULL, 'm' }, + { "max-tests", required_argument, NULL, 'M' }, + { "branching", required_argument, NULL, 'n' }, + { "rmer", required_argument, NULL, 'r' }, + { "approx-factor", required_argument, NULL, 'a' }, + { "quality-threshold", required_argument, NULL, 'q' }, + { "error-correction", no_argument, &opt::errorCorrection, 1 }, + { "supported", required_argument, NULL, 'S' }, + { "unsupported", required_argument, NULL, 'U' }, + { "adj", no_argument, &opt::format, ADJ }, + { "asqg", no_argument, &opt::format, ASQG }, + { "dot", no_argument, &opt::format, DOT }, + { "gfa", no_argument, &opt::format, GFA1 }, + { "gfa1", no_argument, &opt::format, GFA1 }, + { "gfa2", no_argument, &opt::format, GFA2 }, + { "gv", no_argument, &opt::format, DOT }, + { "sam", no_argument, &opt::format, SAM }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, OPT_HELP }, + { "version", no_argument, NULL, OPT_VERSION }, + { NULL, 0, NULL, 0 } }; -void check_options(int argc, bool& die) { - if (opt::bloomSize == 0) { - std::cerr << PROGRAM ": missing or invalid value for mandatory option `-b'" << std::endl; - die = true; - } - - if (argc - optind < 3) { - std::cerr << PROGRAM ": missing input file arguments" << std::endl; - die = true; - } - - if (opt::k <= 0) { - std::cerr << PROGRAM ": missing or invalid value for mandatory option `-k'" << std::endl; - die = true; - } - - if (opt::outputGraphPath.empty()) { - std::cerr << PROGRAM ": missing or invalid value for mandatory option `-g`" << std::endl; - die = true; - } - - if (opt::outputContigsPath.empty()) { - std::cerr << PROGRAM ": missing or invalid value for mandatory option `-c`" << std::endl; - die = true; - } - - if (opt::threads <= 0) { - std::cerr << PROGRAM ": invalid number of threads `-j`" << std::endl; - die = true; - } +void +check_options(int argc, bool& die) +{ + if (opt::bloomSize == 0) { + std::cerr << PROGRAM ": missing or invalid value for mandatory option `-b'" << std::endl; + die = true; + } + + if (argc - optind < 3) { + std::cerr << PROGRAM ": missing input file arguments" << std::endl; + die = true; + } + + if (opt::k <= 0) { + std::cerr << PROGRAM ": missing or invalid value for mandatory option `-k'" << std::endl; + die = true; + } + + if (opt::readQualityThreshold <= 0) { + std::cerr << PROGRAM ": invalid value for option `-q'" << std::endl; + die = true; + } + + if (opt::outputGraphPath.empty()) { + std::cerr << PROGRAM ": missing or invalid value for mandatory option `-g`" << std::endl; + die = true; + } + + if (opt::outputContigsPath.empty()) { + std::cerr << PROGRAM ": missing or invalid value for mandatory option `-c`" << std::endl; + die = true; + } + + if (opt::threads <= 0) { + std::cerr << PROGRAM ": invalid number of threads `-j`" << std::endl; + die = true; + } + + if (opt::minTests > opt::maxTests) { + std::cerr << PROGRAM ": --min-tests cannot be higher than --max-tests" << std::endl; + die = true; + } } -void writePaths(std::ostream& stream, const ImaginaryContigPaths& paths) +void +writePaths(std::ostream& stream, const ImaginaryContigPaths& paths) { - int counter = 0; - for (const auto &path : paths) { - stream << '>' << counter++ << '\n' << getPathSequence(path) << '\n'; - } + int counter = 0; + for (const auto& path : paths) { + stream << '>' << counter++ << '\n' << getPathSequence(path) << '\n'; + } } -void writeResults(const ImaginaryContigPaths &supportedPaths, - const ImaginaryContigPaths &unsupportedPaths, - const std::string &commandLine) +void +writeResults( + const ImaginaryContigPaths& supportedPaths, + const ImaginaryContigPaths& unsupportedPaths, + const std::string& commandLine) { - if (!opt::outputContigsPath.empty()) { - storeContigs(opt::outputContigsPath); + if (!opt::outputContigsPath.empty()) { + storeContigs(opt::outputContigsPath); } - if (!opt::outputGraphPath.empty()) { - storeContigGraph(opt::outputGraphPath, PROGRAM, commandLine); + if (!opt::outputGraphPath.empty()) { + storeContigGraph(opt::outputGraphPath, PROGRAM, commandLine); } - if (!opt::outputSupportedPathsPath.empty()) { - if (opt::verbose) { - std::cerr << "Writing supported paths to `" << opt::outputSupportedPathsPath << "'..." << std::endl; - } - std::ofstream outputSupportedPaths(opt::outputSupportedPathsPath); - writePaths(outputSupportedPaths, supportedPaths); - assert(outputSupportedPaths.good()); - outputSupportedPaths.close(); - if (opt::verbose) { - std::cerr << "Supported paths written." << std::endl; - } - } - - if (!opt::outputUnsupportedPathsPath.empty()) { - if (opt::verbose) { - std::cerr << "Writing unsupported paths to `" << opt::outputUnsupportedPathsPath << "'..." << std::endl; - } - std::ofstream outputUnsupportedPaths(opt::outputUnsupportedPathsPath); - writePaths(outputUnsupportedPaths, unsupportedPaths); - assert(outputUnsupportedPaths.good()); - outputUnsupportedPaths.close(); - if (opt::verbose) { - std::cerr << "Unsupported paths written." << std::endl; - } - } + if (!opt::outputSupportedPathsPath.empty()) { + if (opt::verbose) { + std::cerr << "Writing supported paths to `" << opt::outputSupportedPathsPath << "'..." + << std::endl; + } + std::ofstream outputSupportedPaths(opt::outputSupportedPathsPath); + writePaths(outputSupportedPaths, supportedPaths); + assert(outputSupportedPaths.good()); + outputSupportedPaths.close(); + if (opt::verbose) { + std::cerr << "Supported paths written." << std::endl; + } + } + + if (!opt::outputUnsupportedPathsPath.empty()) { + if (opt::verbose) { + std::cerr << "Writing unsupported paths to `" << opt::outputUnsupportedPathsPath + << "'..." << std::endl; + } + std::ofstream outputUnsupportedPaths(opt::outputUnsupportedPathsPath); + writePaths(outputUnsupportedPaths, unsupportedPaths); + assert(outputUnsupportedPaths.good()); + outputUnsupportedPaths.close(); + if (opt::verbose) { + std::cerr << "Unsupported paths written." << std::endl; + } + } } -int main(int argc, char** argv) +int +main(int argc, char** argv) { std::string commandLine; { std::ostringstream ss; char** last = argv + argc - 1; - std::copy(argv, last, std::ostream_iterator(ss, " ")); + std::copy(argv, last, std::ostream_iterator(ss, " ")); ss << *last; commandLine = ss.str(); } - bool die = false; //scary, and spooky - for (int c; (c = getopt_long(argc, argv, - shortopts, longopts, NULL)) != -1;) { - std::istringstream arg(optarg != NULL ? optarg : ""); - switch (c) { - case '?': - die = true; break; - case 'b': - opt::bloomSize = SIToBytes(arg); break; - case 'j': - arg >> opt::threads; break; - case 'k': - arg >> opt::k; break; - case 'h': - arg >> opt::histPrefix; break; - case 'g': - arg >> opt::outputGraphPath; break; - case 'c': - arg >> opt::outputContigsPath; break; - case 't': - arg >> opt::threshold; break; - case 'm': - arg >> opt::min_tests; break; - case 'r': - arg >> opt::branching; break; - case 'S': - arg >> opt::outputSupportedPathsPath; break; - case 'U': - arg >> opt::outputUnsupportedPathsPath; break; - case 'v': - ++opt::verbose; break; - case OPT_HELP: - std::cout << USAGE_MESSAGE; exit(EXIT_SUCCESS); - case OPT_VERSION: - std::cout << VERSION_MESSAGE; exit(EXIT_SUCCESS); - } - - if (optarg != NULL && (!arg.eof() || arg.fail())) { - std::cerr << PROGRAM ": invalid option: `-" - << char(c) << optarg << "'" << std::endl; - exit(EXIT_FAILURE); - } - } - - check_options(argc, die); - - if (die) { - std::cerr << "Try `" << PROGRAM - << " --help' for more information." << std::endl; - exit(EXIT_FAILURE); - } + bool die = false; // scary, and spooky + for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) { + std::istringstream arg(optarg != NULL ? optarg : ""); + switch (c) { + case '?': + die = true; + break; + case 'b': + opt::bloomSize = SIToBytes(arg); + break; + case 'j': + arg >> opt::threads; + break; + case 'k': + arg >> opt::k; + break; + case 'h': + arg >> opt::histPrefix; + break; + case 'g': + arg >> opt::outputGraphPath; + break; + case 'c': + arg >> opt::outputContigsPath; + break; + case 't': + arg >> opt::threshold; + break; + case 'x': + arg >> opt::extract; + break; + case 'm': + arg >> opt::minTests; + break; + case 'M': + arg >> opt::maxTests; + break; + case 'n': + arg >> opt::branching; + break; + case 'r': + int r; + arg >> r; + opt::rValues.push_back(r); + break; + case 'a': + double a; + arg >> a; + opt::covApproxFactors.push_back(a); + break; + case 'q': + arg >> opt::readQualityThreshold; + break; + case 'S': + arg >> opt::outputSupportedPathsPath; + break; + case 'U': + arg >> opt::outputUnsupportedPathsPath; + break; + case 'v': + ++opt::verbose; + break; + case OPT_HELP: + std::cout << USAGE_MESSAGE; + exit(EXIT_SUCCESS); + case OPT_VERSION: + std::cout << VERSION_MESSAGE; + exit(EXIT_SUCCESS); + } + + if (optarg != NULL && (!arg.eof() || arg.fail())) { + std::cerr << PROGRAM ": invalid option: `-" << char(c) << optarg << "'" << std::endl; + exit(EXIT_FAILURE); + } + } + + check_options(argc, die); + + if (die) { + std::cerr << "Try `" << PROGRAM << " --help' for more information." << std::endl; + exit(EXIT_FAILURE); + } #if _OPENMP - if (opt::threads > 0) - omp_set_num_threads(opt::threads); + if (opt::threads > 0) + omp_set_num_threads(opt::threads); #endif - std::string contigsPath(argv[optind++]); - std::string contigGraphPath(argv[optind++]); + std::string contigsPath(argv[optind++]); + std::string contigGraphPath(argv[optind++]); + + loadContigGraph(contigGraphPath); + loadContigs(contigsPath); - loadContigGraph(contigGraphPath); - loadContigs(contigsPath); + std::vector readFilepaths; + for (int i = optind; i < argc; i++) { + readFilepaths.push_back(argv[i]); + } - std::vector readFilepaths; - for (int i = optind; i < argc; i++) { - readFilepaths.push_back(argv[i]); - } + ImaginaryContigPaths supportedPaths, unsupportedPaths; + resolveShort(readFilepaths, supportedPaths, unsupportedPaths); - ImaginaryContigPaths supportedPaths, unsupportedPaths; - resolveShort(readFilepaths, supportedPaths, unsupportedPaths); - - if (opt::verbose) { - std::cerr << "Stats after resolution:" << std::endl; - printGraphStats(std::cerr, g_contigGraph); - } + if (opt::verbose) { + std::cerr << "Stats after resolution:" << std::endl; + printGraphStats(std::cerr, g_contigGraph); + } - writeResults(supportedPaths, unsupportedPaths, commandLine); + writeResults(supportedPaths, unsupportedPaths, commandLine); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/RResolver/RUtils.cpp b/RResolver/RUtils.cpp index 94f2e22a6..c7b2215cf 100644 --- a/RResolver/RUtils.cpp +++ b/RResolver/RUtils.cpp @@ -2,9 +2,9 @@ #include "Common/Options.h" -#include #include #include +#include static std::string progressName; static unsigned progressNumber = 0; @@ -13,78 +13,92 @@ static unsigned progressUpdates; static unsigned progressLastPrinted; static bool progressRunning = false; -void progressStart(const std::string& name, unsigned total) { - if (opt::verbose) { - progressName = name; - std::string tempName = name; - tempName[0] = std::tolower(tempName[0]); - std::cerr << '\n' << ++progressNumber << ". Starting " + tempName << "...\n"; - std::cerr << "Progress: 0%" << std::flush; - progressTotal = total; - progressUpdates = 0; - progressLastPrinted = 0; - assert(!progressRunning); - progressRunning = true; - } +void +progressStart(const std::string& name, unsigned total) +{ + if (opt::verbose) { + progressName = name; + std::string tempName = name; + tempName[0] = std::tolower(tempName[0]); + std::cerr << '\n' << ++progressNumber << ". Starting " + tempName << "...\n"; + std::cerr << "Progress: 0%" << std::flush; + progressTotal = total; + progressUpdates = 0; + progressLastPrinted = 0; + assert(!progressRunning); + progressRunning = true; + } } -void progressUpdate() { - if (opt::verbose) { - assert(progressRunning); - progressUpdates++; - assert(progressUpdates <= progressTotal); - double fraction = double(progressUpdates) / progressTotal; +void +progressUpdate() +{ + if (opt::verbose) { + assert(progressRunning); + progressUpdates++; + assert(progressUpdates <= progressTotal); + double fraction = double(progressUpdates) / progressTotal; - if (progressUpdates == progressTotal) { - std::cerr << "\rProgress: 100%\n" << progressName << " done." << std::endl; - progressRunning = false; - } else if (double(progressUpdates - progressLastPrinted) / progressTotal - >= PROGRESS_PRINT_FRACTION && progressUpdates < progressTotal) - { - std::cerr << "\rProgress: " << int(fraction * 100.0) << "%" << std::flush; - progressLastPrinted = progressUpdates; - } - } + if (progressUpdates == progressTotal) { + std::cerr << "\rProgress: 100%\n" << progressName << " done." << std::endl; + progressRunning = false; + } else if ( + double(progressUpdates - progressLastPrinted) / progressTotal >= + PROGRESS_PRINT_FRACTION && + progressUpdates < progressTotal) { + std::cerr << "\rProgress: " << int(fraction * 100.0) << "%" << std::flush; + progressLastPrinted = progressUpdates; + } + } } -unsigned nChoosek(unsigned n, unsigned k) { - if (k > n) return 0; - if (k * 2 > n) k = n - k; - if (k == 0) return 1; +unsigned +nChoosek(unsigned n, unsigned k) +{ + if (k > n) + return 0; + if (k * 2 > n) + k = n - k; + if (k == 0) + return 1; - long result = n; - for(unsigned i = 2; i <= k; ++i) { - result *= (n - i + 1); - result /= i; - } - return result; + long result = n; + for (unsigned i = 2; i <= k; ++i) { + result *= (n - i + 1); + result /= i; + } + return result; } -std::vector> genPermutations(unsigned n) { - std::vector> permutations; - std::vector permutation; - for (unsigned i = 0; i < n; i++) { - permutation.push_back(i); - } - for (unsigned i = 0; i < n; i++) { - permutations.push_back(permutation); - std::next_permutation(permutation.begin(), permutation.end()); - } - return permutations; +std::vector> +genPermutations(unsigned n) +{ + std::vector> permutations; + std::vector permutation; + for (unsigned i = 0; i < n; i++) { + permutation.push_back(i); + } + for (unsigned i = 0; i < n; i++) { + permutations.push_back(permutation); + std::next_permutation(permutation.begin(), permutation.end()); + } + return permutations; } -std::vector> genCombinations(unsigned n, unsigned k) { - std::vector v(n); - std::fill(v.begin(), v.begin() + k, true); - std::vector> combinations; - do { - std::vector combination; - for (unsigned i = 0; i < n; ++i) { - if (v[i]) { - combination.push_back(i); - } - } - combinations.push_back(combination); - } while (std::prev_permutation(v.begin(), v.end())); - return combinations; +std::vector> +genCombinations(unsigned n, unsigned k) +{ + std::vector v(n); + std::fill(v.begin(), v.begin() + k, true); + std::vector> combinations; + do { + std::vector combination; + for (unsigned i = 0; i < n; ++i) { + if (v[i]) { + combination.push_back(i); + } + } + combinations.push_back(combination); + } while (std::prev_permutation(v.begin(), v.end())); + return combinations; } diff --git a/RResolver/RUtils.h b/RResolver/RUtils.h index e0ca43edb..4a14487af 100644 --- a/RResolver/RUtils.h +++ b/RResolver/RUtils.h @@ -1,63 +1,75 @@ #ifndef RUTILS_H #define RUTILS_H 1 -#include -#include #include +#include +#include #if _OPENMP const unsigned MAX_SIMULTANEOUS_TASKS = 60000; -# include +#include #endif const double PROGRESS_PRINT_FRACTION = 0.01; -template -void iteratorMultithreading(const IteratorT& start, const IteratorT &end, - const FilterT &filter, const ActionT &action, - const int threads = 0) +template +void +iteratorMultithreading( + const IteratorT& start, + const IteratorT& end, + const FilterT& filter, + const ActionT& action, + const int threads = 0) { #if _OPENMP - int threadsBefore = 1; - if (threads > 0) { - threadsBefore = omp_get_max_threads(); - omp_set_num_threads(threads); - } + int threadsBefore = 1; + if (threads > 0) { + threadsBefore = omp_get_max_threads(); + omp_set_num_threads(threads); + } +#else + (void)threads; #endif - IteratorT it = start; - while (it != end) { - #pragma omp parallel - #pragma omp single - { + IteratorT it = start; + while (it != end) { +#pragma omp parallel +#pragma omp single + { #if _OPENMP - for (unsigned i = 0; i < MAX_SIMULTANEOUS_TASKS; i++) + for (unsigned i = 0; i < MAX_SIMULTANEOUS_TASKS; i++) #endif - { - if (filter(*it)) { - #pragma omp task firstprivate (it) - { - action(*it); - } - } - ++it; + { + if (filter(*it)) { +#pragma omp task firstprivate(it) + { + action(*it); + } + } + ++it; #if _OPENMP - if (it == end) break; + if (it == end) + break; #endif - } - } - } + } + } + } #if _OPENMP - if (threads > 0) { - omp_set_num_threads(threadsBefore); - } + if (threads > 0) { + omp_set_num_threads(threadsBefore); + } #endif } -void progressStart(const std::string& name, unsigned total); -void progressUpdate(); +void +progressStart(const std::string& name, unsigned total); +void +progressUpdate(); -unsigned nChoosek(unsigned n, unsigned k); -std::vector> genPermutations(unsigned n); -std::vector> genCombinations(unsigned n, unsigned k); +unsigned +nChoosek(unsigned n, unsigned k); +std::vector> +genPermutations(unsigned n); +std::vector> +genCombinations(unsigned n, unsigned k); #endif diff --git a/RResolver/SequenceTree.cpp b/RResolver/SequenceTree.cpp index 87f8c0c95..19cb2694e 100644 --- a/RResolver/SequenceTree.cpp +++ b/RResolver/SequenceTree.cpp @@ -1,180 +1,204 @@ #include "SequenceTree.h" -#include -#include -#include #include +#include #include +#include +#include #include -#include - -class SequenceTreeNode: public ContigNode { - -public: - - SequenceTreeNode(const ContigNode& contigNode, - const int overlap, const int maxLength, const bool forward); - - std::vector getChildren(const int maxChildren = std::numeric_limits::max()) const; - const Sequence treeSequence() const; +#include - int treeSequenceStart; - int treeSequenceLength; - int maxLength; - bool forward; +class SequenceTreeNode : public ContigNode +{ + public: + SequenceTreeNode( + const ContigNode& contigNode, + const int overlap, + const int maxLength, + const bool forward); + + std::vector getChildren( + const int maxChildren = std::numeric_limits::max()) const; + const Sequence treeSequence() const; + + int treeSequenceStart; + int treeSequenceLength; + int maxLength; + bool forward; }; const int EXPECTED_BASES_PER_NODE = 4; -SequenceTreeNode::SequenceTreeNode(const ContigNode& contigNode, - const int overlap, const int maxLength, const bool forward): - ContigNode(contigNode), maxLength(maxLength), forward(forward) +SequenceTreeNode::SequenceTreeNode( + const ContigNode& contigNode, + const int overlap, + const int maxLength, + const bool forward) + : ContigNode(contigNode) + , maxLength(maxLength) + , forward(forward) { - assert(overlap >= 0); - assert(maxLength > 0); - treeSequenceStart = overlap; - const auto &sequence = getContigSequence(*this); - const int size = sequence.size(); - int treeSequenceEnd = std::min(overlap + maxLength, size); - assert(treeSequenceStart >= 0); - assert(treeSequenceEnd > 0); - assert(treeSequenceEnd > treeSequenceStart); - treeSequenceLength = treeSequenceEnd - treeSequenceStart; - if (!forward) { - treeSequenceStart = size - treeSequenceEnd; - assert(treeSequenceStart >= 0); - } - assert(treeSequenceStart + treeSequenceLength <= size); + assert(overlap >= 0); + assert(maxLength > 0); + treeSequenceStart = overlap; + const auto& sequence = getContigSequence(*this); + const int size = sequence.size(); + int treeSequenceEnd = std::min(overlap + maxLength, size); + assert(treeSequenceStart >= 0); + assert(treeSequenceEnd > 0); + assert(treeSequenceEnd > treeSequenceStart); + treeSequenceLength = treeSequenceEnd - treeSequenceStart; + if (!forward) { + treeSequenceStart = size - treeSequenceEnd; + assert(treeSequenceStart >= 0); + } + assert(treeSequenceStart + treeSequenceLength <= size); } -std::vector SequenceTreeNode::getChildren(const int maxChildren) const { - std::vector children; - if (maxLength > treeSequenceLength) { - if (forward) { - int step = out_degree(*this, g_contigGraph) / maxChildren; - if (step <= 0) { step = 1; } - out_edge_iterator it, itLast; - for (std::tie(it, itLast) = out_edges(*this, g_contigGraph); - it != itLast; std::advance(it, step)) - { - ContigNode outig = target(*it, g_contigGraph); - - children.push_back(SequenceTreeNode( - outig, -distanceBetween(*this, outig), - maxLength - treeSequenceLength, forward)); - if (int(children.size()) >= maxChildren) { - break; - } - } - } else { - int step = in_degree(*this, g_contigGraph) / maxChildren; - if (step <= 0) { step = 1; } - in_edge_iterator it, itLast; - for (std::tie(it, itLast) = in_edges(*this, g_contigGraph); - it != itLast; std::advance(it, step)) - { - ContigNode intig = source(*it, g_contigGraph); - - children.push_back(SequenceTreeNode( - intig, -distanceBetween(intig, *this), - maxLength - treeSequenceLength, forward)); - if (int(children.size()) >= maxChildren) { - break; - } - } - } - } - return children; +std::vector +SequenceTreeNode::getChildren(const int maxChildren) const +{ + std::vector children; + if (maxLength > treeSequenceLength) { + if (forward) { + int step = out_degree(*this, g_contigGraph) / maxChildren; + if (step <= 0) { + step = 1; + } + out_edge_iterator it, itLast; + for (std::tie(it, itLast) = out_edges(*this, g_contigGraph); it != itLast; + std::advance(it, step)) { + ContigNode outig = target(*it, g_contigGraph); + + children.push_back(SequenceTreeNode( + outig, + -distanceBetween(*this, outig), + maxLength - treeSequenceLength, + forward)); + if (int(children.size()) >= maxChildren) { + break; + } + } + } else { + int step = in_degree(*this, g_contigGraph) / maxChildren; + if (step <= 0) { + step = 1; + } + in_edge_iterator it, itLast; + for (std::tie(it, itLast) = in_edges(*this, g_contigGraph); it != itLast; + std::advance(it, step)) { + ContigNode intig = source(*it, g_contigGraph); + + children.push_back(SequenceTreeNode( + intig, + -distanceBetween(intig, *this), + maxLength - treeSequenceLength, + forward)); + if (int(children.size()) >= maxChildren) { + break; + } + } + } + } + return children; } -const Sequence SequenceTreeNode::treeSequence() const { - assert(treeSequenceStart >= 0); - assert(treeSequenceLength > 0); - return Sequence(getContigSequence(*this).c_str(), treeSequenceStart, treeSequenceLength); +const Sequence +SequenceTreeNode::treeSequence() const +{ + assert(treeSequenceStart >= 0); + assert(treeSequenceLength > 0); + return Sequence(getContigSequence(*this).c_str(), treeSequenceStart, treeSequenceLength); } typedef std::list Trace; typedef std::list Traces; static Traces -getTreeTraces(const ContigNode& start, - const int overlap, const int maxLength, - const bool forward, const int maxPaths) +getTreeTraces( + const ContigNode& start, + const int overlap, + const int maxLength, + const bool forward, + const int maxPaths) { - assert(maxLength > 0); - - Traces traces; - std::queue> queue; - - SequenceTreeNode root(start, overlap, maxLength, forward); - - int level = 1; - int leaves = 1; - traces.push_back({ root }); - queue.push(traces.back()); - while (!queue.empty()) { - Trace &trace = queue.front(); - queue.pop(); - - const SequenceTreeNode &node = trace.back(); - assert(node.maxLength > 0 && node.maxLength <= maxLength); - - assert(int(trace.size()) >= level); - level = trace.size(); - - auto children = node.getChildren(); - if ((children.size() > 0) && (leaves + int(children.size()) - 1 <= maxPaths)) { - for (size_t i = 0; i < children.size(); i++) { - const auto &child = children[i]; - assert(child.maxLength > 0); - assert(child.maxLength < node.maxLength); - if (i < children.size() - 1) { - Trace childTrace = trace; - childTrace.push_back(child); - assert(childTrace.size() == trace.size() + 1); - traces.push_back(childTrace); - queue.push(traces.back()); - } else { - trace.push_back(child); - queue.push(trace); - } - } - leaves += children.size() - 1; - } - } - - return traces; + assert(maxLength > 0); + + Traces traces; + std::queue> queue; + + SequenceTreeNode root(start, overlap, maxLength, forward); + + int level = 1; + int leaves = 1; + traces.push_back({ root }); + queue.push(traces.back()); + while (!queue.empty()) { + Trace& trace = queue.front(); + queue.pop(); + + const SequenceTreeNode& node = trace.back(); + assert(node.maxLength > 0 && node.maxLength <= maxLength); + + assert(int(trace.size()) >= level); + level = trace.size(); + + auto children = node.getChildren(); + if ((children.size() > 0) && (leaves + int(children.size()) - 1 <= maxPaths)) { + for (size_t i = 0; i < children.size(); i++) { + const auto& child = children[i]; + assert(child.maxLength > 0); + assert(child.maxLength < node.maxLength); + if (i < children.size() - 1) { + Trace childTrace = trace; + childTrace.push_back(child); + assert(childTrace.size() == trace.size() + 1); + traces.push_back(childTrace); + queue.push(traces.back()); + } else { + trace.push_back(child); + queue.push(trace); + } + } + leaves += children.size() - 1; + } + } + + return traces; } -std::list -getTreeSequences(const ContigNode& start, - const int overlap, const int maxLength, - const bool forward, const int maxPaths) +std::vector +getTreeSequences( + const ContigNode& start, + const int overlap, + const int maxLength, + const bool forward, + const int maxPaths) { - assert(overlap >= 0); - assert(maxLength > 0); - assert(maxPaths >= 1); - - auto traces = getTreeTraces(start, overlap, maxLength, - forward, maxPaths); - - std::list sequences; - - for (const auto &trace : traces) { - Sequence sequence; - sequence.reserve(trace.size() * EXPECTED_BASES_PER_NODE); - if (forward) { - for (auto it = trace.begin(); it != trace.end(); it++) { - sequence += it->treeSequence(); - } - } else { - for (auto it = trace.rbegin(); it != trace.rend(); it++) { - sequence += it->treeSequence(); - } - } - sequences.push_back(sequence); - } - - return sequences; + assert(overlap >= 0); + assert(maxLength > 0); + assert(maxPaths >= 1); + + auto traces = getTreeTraces(start, overlap, maxLength, forward, maxPaths); + + std::vector sequences; + sequences.reserve(traces.size()); + + for (const auto& trace : traces) { + Sequence sequence; + sequence.reserve(trace.size() * EXPECTED_BASES_PER_NODE); + if (forward) { + for (Trace::const_iterator it = trace.begin(); !(it == trace.end()); it++) { + sequence += it->treeSequence(); + } + } else { + for (Trace::const_reverse_iterator it = trace.rbegin(); !(it == trace.rend()); it++) { + sequence += it->treeSequence(); + } + } + sequences.push_back(sequence); + } + + return sequences; } diff --git a/RResolver/SequenceTree.h b/RResolver/SequenceTree.h index 58f171ad9..e9a24708e 100644 --- a/RResolver/SequenceTree.h +++ b/RResolver/SequenceTree.h @@ -1,14 +1,18 @@ #ifndef RRESOLVER_SEQUENCETREE_H #define RRESOLVER_SEQUENCETREE_H 1 -#include "Contigs.h" #include "Common/Sequence.h" +#include "Contigs.h" #include +#include -std::list -getTreeSequences(const ContigNode& start, - const int overlap, const int maxLength, - const bool forward, const int maxPaths); +std::vector +getTreeSequences( + const ContigNode& start, + const int overlap, + const int maxLength, + const bool forward, + const int maxPaths); #endif diff --git a/RResolver/btllib/.gitignore b/RResolver/btllib/.gitignore deleted file mode 100644 index ab6853b6d..000000000 --- a/RResolver/btllib/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -build/ -*.class -*.jar -python/ -!extras/python/ -java/ -!extras/java/ diff --git a/RResolver/btllib/.gitmodules b/RResolver/btllib/.gitmodules new file mode 100644 index 000000000..bddc06c92 --- /dev/null +++ b/RResolver/btllib/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/sdsl-lite"] + path = external/sdsl-lite + url = https://github.com/simongog/sdsl-lite diff --git a/RResolver/btllib/Doxyfile b/RResolver/btllib/Doxyfile deleted file mode 100644 index 2b4270e7d..000000000 --- a/RResolver/btllib/Doxyfile +++ /dev/null @@ -1,2549 +0,0 @@ -# Doxyfile 1.8.17 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the configuration -# file that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# https://www.gnu.org/software/libiconv/ for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = "btllib" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = NO - -# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line -# such as -# /*************** -# as being the beginning of a Javadoc-style comment "banner". If set to NO, the -# Javadoc-style will behave just like regular comments and it will not be -# interpreted by doxygen. -# The default value is: NO. - -JAVADOC_BANNER = NO - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 2 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) - -ALIASES = - -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice -# sources only. Doxygen will then generate output that is more tailored for that -# language. For instance, namespaces will be presented as modules, types will be -# separated into more groups, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_SLICE = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: -# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser -# tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is -# Fortran), use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See https://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 5. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 5 - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = NO - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual -# methods of a class will be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIV_VIRTUAL = NO - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# declarations. If set to NO, these declarations will be included in the -# documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# (including Cygwin) ands Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. -# The default value is: NO. - -WARN_AS_ERROR = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = include/btllib README.md - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: https://www.gnu.org/software/libiconv/) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen -# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd, -# *.vhdl, *.ucf, *.qsf and *.ice. - -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.idl \ - *.ddl \ - *.odl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.cs \ - *.d \ - *.php \ - *.php4 \ - *.php5 \ - *.phtml \ - *.inc \ - *.m \ - *.markdown \ - *.md \ - *.mm \ - *.dox \ - *.doc \ - *.txt \ - *.py \ - *.pyw \ - *.f90 \ - *.f95 \ - *.f03 \ - *.f08 \ - *.f \ - *.for \ - *.tcl \ - *.vhd \ - *.vhdl \ - *.ucf \ - *.qsf \ - *.ice - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = include/btllib/index_queue.hpp \ - include/btllib/data_stream.hpp - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = btllib::SeqReader::read_* - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = * - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = README.md - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# entity all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see https://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = docs - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# https://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - -# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML -# documentation will contain a main index with vertical navigation menus that -# are dynamically created via JavaScript. If disabled, the navigation index will -# consists of multiple levels of tabs that are statically embedded in every HTML -# page. Disable this option to support browsers that do not have JavaScript, -# like the Qt help browser. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_MENUS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: https://developer.apple.com/xcode/), introduced with OSX -# 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy -# genXcode/_index.html for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = NO - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - -# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands -# to create new LaTeX commands to be used in formulas as building blocks. See -# the section "Including formulas" for details. - -FORMULA_MACROFILE = - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# https://www.mathjax.org) which uses client side JavaScript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /