-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Draft PhyloFingerprints and write rudimentary test
- Loading branch information
Showing
5 changed files
with
305 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#pragma once | ||
#ifndef DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE | ||
#define DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE | ||
|
||
#include <algorithm> | ||
#include <cmath> | ||
|
||
#include "../polyfill/bit_floor.hpp" | ||
#include "../polyfill/bit_width.hpp" | ||
#include "../polyfill/countr_zero.hpp" | ||
|
||
// #include "../../../third-party/Empirical/include/emp/base/vector.hpp" | ||
#include "../../../third-party/signalgp-lite/include/sgpl/utility/ThreadLocalRandom.hpp" | ||
|
||
namespace dish2 { | ||
|
||
class PhyloFingerprints { | ||
|
||
uint64_t generation_counter{}; | ||
|
||
// ordered from most ancient (index 0 / fingerprints.front()) | ||
// to most recent (index n - 1 / fingerprints.back()) | ||
std::vector<uint64_t> fingerprints; | ||
// absolute generation corresponding to each fingerprint | ||
std::vector<uint64_t> generations; | ||
|
||
// todo remove | ||
public: | ||
|
||
// msb == most significant bit | ||
static uint64_t clear_msb(const uint64_t v) { | ||
return v - std::bit_floor(v); | ||
} | ||
|
||
// lsb == lowest set bit | ||
// aka (x & -x).set_width() | ||
// not defined for x == 0 | ||
// == https://oeis.org/A001511 - 1 | ||
static uint64_t get_lsb_index(const uint64_t v) { | ||
emp_assert(v); | ||
return std::countr_zero(v); | ||
} | ||
|
||
// 0 == no drop | ||
// should generate sequence | ||
// 0, 0, 1, 0, 1, 2, 1, 0, 1, 2, 1, 3, 1, 2, 1, 0, ... | ||
// non-zero interludes are from https://oeis.org/A001511 | ||
// zeros occur when index is an even power of two | ||
static uint64_t calc_drop_idx(const uint64_t gen) { | ||
const uint64_t leftover = clear_msb(gen); | ||
if ( leftover ) return get_lsb_index(leftover) + 1; | ||
else return 0; | ||
} | ||
|
||
void append_fingerprint() { | ||
fingerprints.push_back( | ||
sgpl::tlrand.Get().GetUInt64() | ||
); | ||
generations.push_back( | ||
generation_counter | ||
); | ||
} | ||
|
||
void maybe_drop_fingerprint(const size_t drop_idx) { | ||
if ( drop_idx ) { | ||
fingerprints.erase( | ||
std::next( std::rbegin(fingerprints), drop_idx ).base() | ||
); | ||
generations.erase( | ||
std::next( std::rbegin(generations), drop_idx ).base() | ||
); | ||
} | ||
} | ||
|
||
void print() { | ||
for ( auto& g : generations ) std::cout << generation_counter - g << " "; | ||
std::cout << '\n'; | ||
} | ||
|
||
public: | ||
|
||
PhyloFingerprints() { append_fingerprint(); ++generation_counter; } | ||
|
||
void ElapseGeneration() { | ||
++generation_counter; | ||
|
||
maybe_drop_fingerprint( calc_drop_idx(generation_counter) ); | ||
|
||
append_fingerprint(); | ||
} | ||
|
||
size_t DivergedAtLeast( const PhyloFingerprints& other ) const { | ||
const auto [our_mismatch, others_mismatch] = std::mismatch( | ||
std::begin( fingerprints ), std::end( fingerprints ), | ||
std::begin( other.fingerprints ) | ||
); | ||
|
||
const size_t num_divergent_fingerprints = std::distance( | ||
our_mismatch, std::end( fingerprints ) | ||
); | ||
|
||
if ( num_divergent_fingerprints ) { | ||
const size_t first_divergent_generation = generations[ | ||
generations.size() - num_divergent_fingerprints | ||
]; | ||
return generation_counter - first_divergent_generation; | ||
} else return 0; | ||
|
||
} | ||
|
||
size_t DivergedLessThan( const PhyloFingerprints& other ) const { | ||
const auto [our_mismatch, others_mismatch] = std::mismatch( | ||
std::begin( fingerprints ), std::end( fingerprints ), | ||
std::begin( other.fingerprints ) | ||
); | ||
|
||
const size_t num_common_fingerprints = std::distance( | ||
std::begin( fingerprints ), our_mismatch | ||
); | ||
|
||
if ( num_common_fingerprints ) { | ||
const size_t last_common_generation = generations[ | ||
num_common_fingerprints - 1 | ||
]; | ||
return generation_counter - last_common_generation; | ||
} else return -1; | ||
|
||
} | ||
|
||
}; | ||
|
||
} // namespace dish2 | ||
|
||
#endif // #ifndef DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#pragma once | ||
#ifndef DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE | ||
#define DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE | ||
|
||
#include "bit_width.hpp" | ||
|
||
namespace std { | ||
|
||
template< typename T > | ||
T bit_floor(T x) { | ||
if (x != 0) return T{1} << (std::bit_width(x) - 1); | ||
else return 0; | ||
} | ||
|
||
} // namespace std | ||
|
||
#endif // #ifndef DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#pragma once | ||
#ifndef DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE | ||
#define DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE | ||
|
||
namespace std { | ||
|
||
template< typename T > | ||
size_t bit_width(T x) { | ||
size_t res{}; | ||
while (x) { | ||
x >>= 1; | ||
++res; | ||
} | ||
return res; | ||
} | ||
|
||
} // namespace std | ||
|
||
#endif // #ifndef DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#pragma once | ||
#ifndef DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE | ||
#define DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE | ||
|
||
namespace std { | ||
|
||
// adapted from https://johnmcfarlane.github.io/cnl/bit_8h_source.html | ||
template< typename T > | ||
size_t countr_zero(T x) { | ||
return (x & 1) | ||
? 0 | ||
: countr_zero<T>(static_cast<T>(x >> 1)) + 1 | ||
; | ||
} | ||
|
||
} // namespace std | ||
|
||
#endif // #ifndef DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#include <sstream> | ||
|
||
#define CATCH_CONFIG_MAIN | ||
|
||
#include "Catch/single_include/catch2/catch.hpp" | ||
#include "cereal/include/cereal/archives/binary.hpp" | ||
#include "cereal/include/cereal/archives/json.hpp" | ||
#include "conduit/include/uitsl/mpi/MpiGuard.hpp" | ||
|
||
#include "dish2/genome/PhyloFingerprints.hpp" | ||
#include "dish2/spec/Spec.hpp" | ||
|
||
using Spec = dish2::Spec_default; | ||
|
||
const uitsl::MpiGuard guard; | ||
|
||
// TEST_CASE("Test Binary Serialization") { | ||
// | ||
// dish2::EventTags<Spec> original{}; | ||
// | ||
// std::stringstream ss; | ||
// | ||
// { | ||
// cereal::JSONOutputArchive oarchive(ss); // Create an output archive | ||
// oarchive(original); // Write the data to the archive | ||
// } // archive goes out of scope, ensuring all contents are flushed | ||
// | ||
// dish2::EventTags<Spec> dup; | ||
// { | ||
// cereal::JSONInputArchive iarchive(ss); // Create an input archive | ||
// iarchive(dup); | ||
// } | ||
// | ||
// REQUIRE( original == dup ); | ||
// | ||
// } | ||
// | ||
// TEST_CASE("Test JSON Serialization") { | ||
// | ||
// dish2::EventTags<Spec> original{}; | ||
// | ||
// std::stringstream ss; | ||
// | ||
// { | ||
// cereal::JSONOutputArchive oarchive(ss); // Create an output archive | ||
// oarchive(original); // Write the data to the archive | ||
// } // archive goes out of scope, ensuring all contents are flushed | ||
// | ||
// dish2::EventTags<Spec> dup; | ||
// { | ||
// cereal::JSONInputArchive iarchive(ss); // Create an input archive | ||
// iarchive(dup); | ||
// } | ||
// | ||
// REQUIRE( original == dup ); | ||
// | ||
// } | ||
|
||
TEST_CASE("get_lsb_index") { | ||
|
||
for (size_t gen{1}; gen < 100; ++gen) { | ||
// std::cout | ||
// << gen << " " | ||
// << dish2::PhyloFingerprints::get_lsb_index(gen) + 1 | ||
// << '\n'; | ||
} | ||
|
||
} | ||
|
||
TEST_CASE("clear_msb") { | ||
|
||
for (size_t gen{}; gen < 100; ++gen) { | ||
// std::cout | ||
// << gen << " " | ||
// << dish2::PhyloFingerprints::clear_msb(gen) | ||
// << '\n'; | ||
} | ||
|
||
} | ||
|
||
TEST_CASE("calc_drop_idx") { | ||
|
||
for (size_t gen{}; gen < 100; ++gen) { | ||
// std::cout | ||
// << gen << " " | ||
// << dish2::PhyloFingerprints::calc_drop_idx(gen) | ||
// << '\n'; | ||
} | ||
|
||
} | ||
|
||
TEST_CASE("logrithmic growth") { | ||
// TODO | ||
} | ||
|
||
TEST_CASE("Deduction") { | ||
|
||
dish2::PhyloFingerprints parent; | ||
|
||
for (size_t gen{}; gen < 100; ++gen) parent.ElapseGeneration(); | ||
|
||
dish2::PhyloFingerprints offspring = parent; | ||
|
||
for (size_t gen{}; gen < 100; ++gen) { | ||
std::cout | ||
<< "actualy diverged " << gen << " generations ago," | ||
<< "deduced between " | ||
<< parent.DivergedAtLeast( offspring ) | ||
<< " and " | ||
<< parent.DivergedLessThan( offspring ) | ||
<< " generations\n"; | ||
|
||
parent.ElapseGeneration(); | ||
offspring.ElapseGeneration(); | ||
} | ||
|
||
} |