Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Untangly cleanup v2 #486

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ endif()
# Set optimization through command line; see INSTALL.md
if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(EXTRA_FLAGS "-Ofast -march=native -pipe -msse4.2 -funroll-all-loops") # -fprofile-generate=../pgo")
#set(EXTRA_FLAGS "-O0 -march=native -msse4.2 -pg") # for profiling with gprof
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to be able to replace -O3 with -Ofast
endif ()

Expand Down
48 changes: 28 additions & 20 deletions src/algorithms/stepindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,14 +226,13 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph,
const uint64_t& nthreads) {
// iterate through the paths, recording steps in the structure we'll use to build the mphf
{
std::vector<step_handle_t> steps;
std::vector<nid_t> nodes;
graph.for_each_step_in_path(
path, [&](const step_handle_t& step) {
steps.push_back(step);
nodes.push_back(graph.get_id(graph.get_handle_of_step(step)));
});
steps.push_back(graph.path_end(path));
steps.push_back(graph.path_end(path)); // dangerous...
// sort the steps, nb. they're unique
ips4o::parallel::sort(steps.begin(), steps.end(), std::less<>(), nthreads);
// build the hash function (quietly)
Expand All @@ -254,15 +253,12 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph,
// here, we sort steps by handle and then position
// and build our handle->step list and step->offset maps
// these are steps sorted by the bbhash of their node id, and then their offset in the path
std::vector<std::tuple<uint64_t, uint64_t, step_handle_t>> steps_by_node;
std::vector<std::tuple<uint64_t, uint64_t, step_handle_t, step_it>> steps_by_node;
uint64_t offset = 0;
graph.for_each_step_in_path(
path, [&](const step_handle_t& step) {
steps_by_node.push_back(std::make_tuple(node_mphf->lookup(graph.get_id(graph.get_handle_of_step(step))),
offset,
step));
offset += graph.get_length(graph.get_handle_of_step(step));
});
for (step_it step = steps.begin(); step != steps.end(); ++step) {
steps_by_node.emplace_back(node_mphf->lookup(graph.get_id(graph.get_handle_of_step(*step))), offset, *step, step);
offset += graph.get_length(graph.get_handle_of_step(*step));
}
if (offset == 0) {
std::cerr << "[odgi::algorithms::stepindex] unable to index empty path " << graph.get_path_name(path) << std::endl;
std::abort();
Expand All @@ -275,14 +271,15 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph,
node_offset[0] = 0; // first offset is 0 by definition
for (auto& node_step : steps_by_node) {
auto& idx = std::get<0>(node_step);
//auto& offset = std::get<1>(node_step); // just used for sorting
auto& offset = std::get<1>(node_step); // just used for sorting
auto& step = std::get<2>(node_step);
auto& it_step = std::get<3>(node_step);
//std::cerr << "idx = " << idx << " " << as_integers(step)[0] << ":" << as_integers(step)[1] << std::endl;
if (idx != last_idx) {
node_offset[idx] = node_steps.size();
}
step_offset[step_mphf->lookup(step)] = node_steps.size();
node_steps.push_back(step);
node_steps.push_back(std::make_pair(it_step, offset));
last_idx = idx;
}
if (last_idx != node_count-1) {
Expand Down Expand Up @@ -313,33 +310,44 @@ uint64_t path_step_index_t::n_steps_on_node(const nid_t& id) const {
return node_offset[idx+1] - node_offset[idx];
}

std::pair<bool, step_handle_t>
std::pair<bool, std::pair<path_step_index_t::step_it, uint64_t>>
path_step_index_t::get_next_step_on_node(const nid_t& id, const step_handle_t& step) const {
auto node_idx = get_node_idx(id);
auto curr_steps = node_offset[node_idx];
auto next_steps = node_offset[node_idx+1];
auto step_idx = step_offset[get_step_idx(step)];
bool has_next = step_idx + 1 < next_steps;
if (has_next) {
return std::make_pair(true, node_steps[step_idx+1]);
return std::pair(true, node_steps[step_idx+1]);
} else {
step_handle_t empty_step;
return std::make_pair(false, empty_step);
auto empty_step = steps.end();
return std::pair(false, std::pair(empty_step, 0));
}
}

std::pair<bool, step_handle_t>
std::pair<bool, std::pair<path_step_index_t::step_it, uint64_t>>
path_step_index_t::get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const {
auto curr_steps = node_offset[get_node_idx(id)];
auto step_idx = step_offset[get_step_idx(step)];
bool has_prev = step_idx > curr_steps;
if (has_prev) {
return std::make_pair(true, node_steps[step_idx-1]);
return std::pair(true, node_steps[step_idx-1]);
} else {
step_handle_t empty_step;
return std::make_pair(false, empty_step);
auto empty_step = steps.end();
return std::pair(false, std::pair(empty_step, 0));
}
}

// get the first step in the path
path_step_index_t::step_it path_step_index_t::path_begin(void) const {
return steps.begin();
}

// get the last step in the path
path_step_index_t::step_it path_step_index_t::path_back(void) const {
return std::prev(std::prev(steps.end()));
}


}
}
18 changes: 15 additions & 3 deletions src/algorithms/stepindex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@ struct path_step_index_t {
~path_step_index_t(void);
// map from node id in the path to an index in node_offsets
boophf_uint64_t* node_mphf = nullptr;
// steps in the path
std::vector<step_handle_t> steps;
// iterator typedef for convenience
typedef std::vector<step_handle_t>::const_iterator step_it;
// map to the beginning of a range in node_steps
std::vector<uint64_t> node_offset;
// record the steps in positional order by node (index given in node_offset)
std::vector<step_handle_t> node_steps;
std::vector<std::pair<step_it, uint64_t>> node_steps;
// map from step to an index in step_offset
boophf_step_t* step_mphf = nullptr;
// index in handle_steps for the given step
Expand All @@ -114,9 +118,17 @@ struct path_step_index_t {
uint64_t n_steps_on_node(const nid_t& id) const;
// these functions require, but do not check, that our step is in the indexed path
// next step on node (sorted by position in path), (false, _) if there is no next step
std::pair<bool, step_handle_t> get_next_step_on_node(const nid_t& id, const step_handle_t& step) const;
std::pair<bool, std::pair<step_it, uint64_t>>
get_next_step_on_node(const nid_t& id, const step_handle_t& step) const;
// prev step on node (sorted by position in path), (false, _) if there is no next step
std::pair<bool, step_handle_t> get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const;
std::pair<bool, std::pair<step_it, uint64_t>>
get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const;

// get the first step in the path
step_it path_begin(void) const;

// get the last step in the path
step_it path_back(void) const;
};

}
Expand Down
Loading