From 0607c2dd4af4eb326db3754fe846aaff4f4186f1 Mon Sep 17 00:00:00 2001 From: "mmore500.login+git@gmail.com" Date: Mon, 31 May 2021 19:58:22 -0400 Subject: [PATCH] Draft PhyloFingerprints and write rudimentary test --- include/dish2/genome/PhyloFingerprints.hpp | 134 +++++++++++++++++++++ include/dish2/polyfill/bit_floor.hpp | 17 +++ include/dish2/polyfill/bit_width.hpp | 19 +++ include/dish2/polyfill/countr_zero.hpp | 18 +++ tests/dish2/genome/PhyloFingerprints.cpp | 117 ++++++++++++++++++ 5 files changed, 305 insertions(+) create mode 100644 include/dish2/genome/PhyloFingerprints.hpp create mode 100644 include/dish2/polyfill/bit_floor.hpp create mode 100644 include/dish2/polyfill/bit_width.hpp create mode 100644 include/dish2/polyfill/countr_zero.hpp create mode 100644 tests/dish2/genome/PhyloFingerprints.cpp diff --git a/include/dish2/genome/PhyloFingerprints.hpp b/include/dish2/genome/PhyloFingerprints.hpp new file mode 100644 index 0000000000..83841e8c7b --- /dev/null +++ b/include/dish2/genome/PhyloFingerprints.hpp @@ -0,0 +1,134 @@ +#pragma once +#ifndef DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE +#define DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE + +#include +#include + +#include "../polyfill/bit_floor.hpp" +#include "../polyfill/bit_width.hpp" +#include "../polyfill/countr_zero.hpp" + +// #include "../../../third-party/Empirical/include/emp/base/vector.hpp" +#include "../../../third-party/signalgp-lite/include/sgpl/utility/ThreadLocalRandom.hpp" + +namespace dish2 { + +class PhyloFingerprints { + + uint64_t generation_counter{}; + + // ordered from most ancient (index 0 / fingerprints.front()) + // to most recent (index n - 1 / fingerprints.back()) + std::vector fingerprints; + // absolute generation corresponding to each fingerprint + std::vector generations; + +// todo remove +public: + + // msb == most significant bit + static uint64_t clear_msb(const uint64_t v) { + return v - std::bit_floor(v); + } + + // lsb == lowest set bit + // aka (x & -x).set_width() + // not defined for x == 0 + // == https://oeis.org/A001511 - 1 + static uint64_t get_lsb_index(const uint64_t v) { + emp_assert(v); + return std::countr_zero(v); + } + + // 0 == no drop + // should generate sequence + // 0, 0, 1, 0, 1, 2, 1, 0, 1, 2, 1, 3, 1, 2, 1, 0, ... + // non-zero interludes are from https://oeis.org/A001511 + // zeros occur when index is an even power of two + static uint64_t calc_drop_idx(const uint64_t gen) { + const uint64_t leftover = clear_msb(gen); + if ( leftover ) return get_lsb_index(leftover) + 1; + else return 0; + } + + void append_fingerprint() { + fingerprints.push_back( + sgpl::tlrand.Get().GetUInt64() + ); + generations.push_back( + generation_counter + ); + } + + void maybe_drop_fingerprint(const size_t drop_idx) { + if ( drop_idx ) { + fingerprints.erase( + std::next( std::rbegin(fingerprints), drop_idx ).base() + ); + generations.erase( + std::next( std::rbegin(generations), drop_idx ).base() + ); + } + } + + void print() { + for ( auto& g : generations ) std::cout << generation_counter - g << " "; + std::cout << '\n'; + } + +public: + + PhyloFingerprints() { append_fingerprint(); ++generation_counter; } + + void ElapseGeneration() { + ++generation_counter; + + maybe_drop_fingerprint( calc_drop_idx(generation_counter) ); + + append_fingerprint(); + } + + size_t DivergedAtLeast( const PhyloFingerprints& other ) const { + const auto [our_mismatch, others_mismatch] = std::mismatch( + std::begin( fingerprints ), std::end( fingerprints ), + std::begin( other.fingerprints ) + ); + + const size_t num_divergent_fingerprints = std::distance( + our_mismatch, std::end( fingerprints ) + ); + + if ( num_divergent_fingerprints ) { + const size_t first_divergent_generation = generations[ + generations.size() - num_divergent_fingerprints + ]; + return generation_counter - first_divergent_generation; + } else return 0; + + } + + size_t DivergedLessThan( const PhyloFingerprints& other ) const { + const auto [our_mismatch, others_mismatch] = std::mismatch( + std::begin( fingerprints ), std::end( fingerprints ), + std::begin( other.fingerprints ) + ); + + const size_t num_common_fingerprints = std::distance( + std::begin( fingerprints ), our_mismatch + ); + + if ( num_common_fingerprints ) { + const size_t last_common_generation = generations[ + num_common_fingerprints - 1 + ]; + return generation_counter - last_common_generation; + } else return -1; + + } + +}; + +} // namespace dish2 + +#endif // #ifndef DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE diff --git a/include/dish2/polyfill/bit_floor.hpp b/include/dish2/polyfill/bit_floor.hpp new file mode 100644 index 0000000000..e4261273ec --- /dev/null +++ b/include/dish2/polyfill/bit_floor.hpp @@ -0,0 +1,17 @@ +#pragma once +#ifndef DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE +#define DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE + +#include "bit_width.hpp" + +namespace std { + + template< typename T > + T bit_floor(T x) { + if (x != 0) return T{1} << (std::bit_width(x) - 1); + else return 0; + } + +} // namespace std + +#endif // #ifndef DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE diff --git a/include/dish2/polyfill/bit_width.hpp b/include/dish2/polyfill/bit_width.hpp new file mode 100644 index 0000000000..3dc97fdda0 --- /dev/null +++ b/include/dish2/polyfill/bit_width.hpp @@ -0,0 +1,19 @@ +#pragma once +#ifndef DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE +#define DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE + +namespace std { + + template< typename T > + size_t bit_width(T x) { + size_t res{}; + while (x) { + x >>= 1; + ++res; + } + return res; + } + +} // namespace std + +#endif // #ifndef DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE diff --git a/include/dish2/polyfill/countr_zero.hpp b/include/dish2/polyfill/countr_zero.hpp new file mode 100644 index 0000000000..eed250e35f --- /dev/null +++ b/include/dish2/polyfill/countr_zero.hpp @@ -0,0 +1,18 @@ +#pragma once +#ifndef DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE +#define DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE + +namespace std { + + // adapted from https://johnmcfarlane.github.io/cnl/bit_8h_source.html + template< typename T > + size_t countr_zero(T x) { + return (x & 1) + ? 0 + : countr_zero(static_cast(x >> 1)) + 1 + ; + } + +} // namespace std + +#endif // #ifndef DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE diff --git a/tests/dish2/genome/PhyloFingerprints.cpp b/tests/dish2/genome/PhyloFingerprints.cpp new file mode 100644 index 0000000000..a3f189cb0e --- /dev/null +++ b/tests/dish2/genome/PhyloFingerprints.cpp @@ -0,0 +1,117 @@ +#include + +#define CATCH_CONFIG_MAIN + +#include "Catch/single_include/catch2/catch.hpp" +#include "cereal/include/cereal/archives/binary.hpp" +#include "cereal/include/cereal/archives/json.hpp" +#include "conduit/include/uitsl/mpi/MpiGuard.hpp" + +#include "dish2/genome/PhyloFingerprints.hpp" +#include "dish2/spec/Spec.hpp" + +using Spec = dish2::Spec_default; + +const uitsl::MpiGuard guard; + +// TEST_CASE("Test Binary Serialization") { +// +// dish2::EventTags original{}; +// +// std::stringstream ss; +// +// { +// cereal::JSONOutputArchive oarchive(ss); // Create an output archive +// oarchive(original); // Write the data to the archive +// } // archive goes out of scope, ensuring all contents are flushed +// +// dish2::EventTags dup; +// { +// cereal::JSONInputArchive iarchive(ss); // Create an input archive +// iarchive(dup); +// } +// +// REQUIRE( original == dup ); +// +// } +// +// TEST_CASE("Test JSON Serialization") { +// +// dish2::EventTags original{}; +// +// std::stringstream ss; +// +// { +// cereal::JSONOutputArchive oarchive(ss); // Create an output archive +// oarchive(original); // Write the data to the archive +// } // archive goes out of scope, ensuring all contents are flushed +// +// dish2::EventTags dup; +// { +// cereal::JSONInputArchive iarchive(ss); // Create an input archive +// iarchive(dup); +// } +// +// REQUIRE( original == dup ); +// +// } + +TEST_CASE("get_lsb_index") { + + for (size_t gen{1}; gen < 100; ++gen) { + // std::cout + // << gen << " " + // << dish2::PhyloFingerprints::get_lsb_index(gen) + 1 + // << '\n'; + } + +} + +TEST_CASE("clear_msb") { + + for (size_t gen{}; gen < 100; ++gen) { + // std::cout + // << gen << " " + // << dish2::PhyloFingerprints::clear_msb(gen) + // << '\n'; + } + +} + +TEST_CASE("calc_drop_idx") { + + for (size_t gen{}; gen < 100; ++gen) { + // std::cout + // << gen << " " + // << dish2::PhyloFingerprints::calc_drop_idx(gen) + // << '\n'; + } + +} + +TEST_CASE("logrithmic growth") { + // TODO +} + +TEST_CASE("Deduction") { + + dish2::PhyloFingerprints parent; + + for (size_t gen{}; gen < 100; ++gen) parent.ElapseGeneration(); + + dish2::PhyloFingerprints offspring = parent; + + for (size_t gen{}; gen < 100; ++gen) { + std::cout + << "actualy diverged " << gen << " generations ago," + << "deduced between " + << parent.DivergedAtLeast( offspring ) + << " and " + << parent.DivergedLessThan( offspring ) + << " generations\n"; + + parent.ElapseGeneration(); + offspring.ElapseGeneration(); + } + +}