Skip to content

Commit

Permalink
Draft PhyloFingerprints and write rudimentary test
Browse files Browse the repository at this point in the history
  • Loading branch information
mmore500 committed May 31, 2021
1 parent c094664 commit 0607c2d
Show file tree
Hide file tree
Showing 5 changed files with 305 additions and 0 deletions.
134 changes: 134 additions & 0 deletions include/dish2/genome/PhyloFingerprints.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#pragma once
#ifndef DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE
#define DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE

#include <algorithm>
#include <cmath>

#include "../polyfill/bit_floor.hpp"
#include "../polyfill/bit_width.hpp"
#include "../polyfill/countr_zero.hpp"

// #include "../../../third-party/Empirical/include/emp/base/vector.hpp"
#include "../../../third-party/signalgp-lite/include/sgpl/utility/ThreadLocalRandom.hpp"

namespace dish2 {

class PhyloFingerprints {

uint64_t generation_counter{};

// ordered from most ancient (index 0 / fingerprints.front())
// to most recent (index n - 1 / fingerprints.back())
std::vector<uint64_t> fingerprints;
// absolute generation corresponding to each fingerprint
std::vector<uint64_t> generations;

// todo remove
public:

// msb == most significant bit
static uint64_t clear_msb(const uint64_t v) {
return v - std::bit_floor(v);
}

// lsb == lowest set bit
// aka (x & -x).set_width()
// not defined for x == 0
// == https://oeis.org/A001511 - 1
static uint64_t get_lsb_index(const uint64_t v) {
emp_assert(v);
return std::countr_zero(v);
}

// 0 == no drop
// should generate sequence
// 0, 0, 1, 0, 1, 2, 1, 0, 1, 2, 1, 3, 1, 2, 1, 0, ...
// non-zero interludes are from https://oeis.org/A001511
// zeros occur when index is an even power of two
static uint64_t calc_drop_idx(const uint64_t gen) {
const uint64_t leftover = clear_msb(gen);
if ( leftover ) return get_lsb_index(leftover) + 1;
else return 0;
}

void append_fingerprint() {
fingerprints.push_back(
sgpl::tlrand.Get().GetUInt64()
);
generations.push_back(
generation_counter
);
}

void maybe_drop_fingerprint(const size_t drop_idx) {
if ( drop_idx ) {
fingerprints.erase(
std::next( std::rbegin(fingerprints), drop_idx ).base()
);
generations.erase(
std::next( std::rbegin(generations), drop_idx ).base()
);
}
}

void print() {
for ( auto& g : generations ) std::cout << generation_counter - g << " ";
std::cout << '\n';
}

public:

PhyloFingerprints() { append_fingerprint(); ++generation_counter; }

void ElapseGeneration() {
++generation_counter;

maybe_drop_fingerprint( calc_drop_idx(generation_counter) );

append_fingerprint();
}

size_t DivergedAtLeast( const PhyloFingerprints& other ) const {
const auto [our_mismatch, others_mismatch] = std::mismatch(
std::begin( fingerprints ), std::end( fingerprints ),
std::begin( other.fingerprints )
);

const size_t num_divergent_fingerprints = std::distance(
our_mismatch, std::end( fingerprints )
);

if ( num_divergent_fingerprints ) {
const size_t first_divergent_generation = generations[
generations.size() - num_divergent_fingerprints
];
return generation_counter - first_divergent_generation;
} else return 0;

}

size_t DivergedLessThan( const PhyloFingerprints& other ) const {
const auto [our_mismatch, others_mismatch] = std::mismatch(
std::begin( fingerprints ), std::end( fingerprints ),
std::begin( other.fingerprints )
);

const size_t num_common_fingerprints = std::distance(
std::begin( fingerprints ), our_mismatch
);

if ( num_common_fingerprints ) {
const size_t last_common_generation = generations[
num_common_fingerprints - 1
];
return generation_counter - last_common_generation;
} else return -1;

}

};

} // namespace dish2

#endif // #ifndef DISH2_GENOME_PHYLOFINGERPRINTS_HPP_INCLUDE
17 changes: 17 additions & 0 deletions include/dish2/polyfill/bit_floor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once
#ifndef DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE
#define DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE

#include "bit_width.hpp"

namespace std {

template< typename T >
T bit_floor(T x) {
if (x != 0) return T{1} << (std::bit_width(x) - 1);
else return 0;
}

} // namespace std

#endif // #ifndef DISH2_POLYFILL_BIT_FLOOR_HPP_INCLUDE
19 changes: 19 additions & 0 deletions include/dish2/polyfill/bit_width.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once
#ifndef DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE
#define DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE

namespace std {

template< typename T >
size_t bit_width(T x) {
size_t res{};
while (x) {
x >>= 1;
++res;
}
return res;
}

} // namespace std

#endif // #ifndef DISH2_POLYFILL_BIT_WIDTH_HPP_INCLUDE
18 changes: 18 additions & 0 deletions include/dish2/polyfill/countr_zero.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once
#ifndef DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE
#define DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE

namespace std {

// adapted from https://johnmcfarlane.github.io/cnl/bit_8h_source.html
template< typename T >
size_t countr_zero(T x) {
return (x & 1)
? 0
: countr_zero<T>(static_cast<T>(x >> 1)) + 1
;
}

} // namespace std

#endif // #ifndef DISH2_POLYFILL_COUNTR_ZERO_HPP_INCLUDE
117 changes: 117 additions & 0 deletions tests/dish2/genome/PhyloFingerprints.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#include <sstream>

#define CATCH_CONFIG_MAIN

#include "Catch/single_include/catch2/catch.hpp"
#include "cereal/include/cereal/archives/binary.hpp"
#include "cereal/include/cereal/archives/json.hpp"
#include "conduit/include/uitsl/mpi/MpiGuard.hpp"

#include "dish2/genome/PhyloFingerprints.hpp"
#include "dish2/spec/Spec.hpp"

using Spec = dish2::Spec_default;

const uitsl::MpiGuard guard;

// TEST_CASE("Test Binary Serialization") {
//
// dish2::EventTags<Spec> original{};
//
// std::stringstream ss;
//
// {
// cereal::JSONOutputArchive oarchive(ss); // Create an output archive
// oarchive(original); // Write the data to the archive
// } // archive goes out of scope, ensuring all contents are flushed
//
// dish2::EventTags<Spec> dup;
// {
// cereal::JSONInputArchive iarchive(ss); // Create an input archive
// iarchive(dup);
// }
//
// REQUIRE( original == dup );
//
// }
//
// TEST_CASE("Test JSON Serialization") {
//
// dish2::EventTags<Spec> original{};
//
// std::stringstream ss;
//
// {
// cereal::JSONOutputArchive oarchive(ss); // Create an output archive
// oarchive(original); // Write the data to the archive
// } // archive goes out of scope, ensuring all contents are flushed
//
// dish2::EventTags<Spec> dup;
// {
// cereal::JSONInputArchive iarchive(ss); // Create an input archive
// iarchive(dup);
// }
//
// REQUIRE( original == dup );
//
// }

TEST_CASE("get_lsb_index") {

for (size_t gen{1}; gen < 100; ++gen) {
// std::cout
// << gen << " "
// << dish2::PhyloFingerprints::get_lsb_index(gen) + 1
// << '\n';
}

}

TEST_CASE("clear_msb") {

for (size_t gen{}; gen < 100; ++gen) {
// std::cout
// << gen << " "
// << dish2::PhyloFingerprints::clear_msb(gen)
// << '\n';
}

}

TEST_CASE("calc_drop_idx") {

for (size_t gen{}; gen < 100; ++gen) {
// std::cout
// << gen << " "
// << dish2::PhyloFingerprints::calc_drop_idx(gen)
// << '\n';
}

}

TEST_CASE("logrithmic growth") {
// TODO
}

TEST_CASE("Deduction") {

dish2::PhyloFingerprints parent;

for (size_t gen{}; gen < 100; ++gen) parent.ElapseGeneration();

dish2::PhyloFingerprints offspring = parent;

for (size_t gen{}; gen < 100; ++gen) {
std::cout
<< "actualy diverged " << gen << " generations ago,"
<< "deduced between "
<< parent.DivergedAtLeast( offspring )
<< " and "
<< parent.DivergedLessThan( offspring )
<< " generations\n";

parent.ElapseGeneration();
offspring.ElapseGeneration();
}

}

0 comments on commit 0607c2d

Please sign in to comment.