Skip to content

Commit

Permalink
Merge pull request #585 from pangenome/flip_ref_paths
Browse files Browse the repository at this point in the history
`odgi flip -r/--ref-flips`: flip with respect to a set of paths (used as references)
  • Loading branch information
AndreaGuarracino authored Aug 9, 2024
2 parents a1f169c + 5b9de41 commit 2151dd8
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 16 deletions.
60 changes: 50 additions & 10 deletions src/algorithms/flip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ using namespace handlegraph;

void flip_paths(graph_t& graph,
graph_t& into,
const std::vector<path_handle_t>& no_flips) {
const std::vector<path_handle_t>& no_flips,
const std::vector<path_handle_t>& ref_flips) {
// Copy the nodes
graph.for_each_handle([&](const handle_t& h) {
into.create_handle(graph.get_sequence(h), graph.get_id(h));
});
// Copy the edges
graph.for_each_handle([&](const handle_t& h) {
graph.follow_edges(h, false, [&](const handle_t& next) {
into.create_edge(into.get_handle(graph.get_id(h), graph.get_is_reverse(h)),
Expand All @@ -22,15 +25,47 @@ void flip_paths(graph_t& graph,
into.get_handle(graph.get_id(h), graph.get_is_reverse(h)));
});
});

// Paths to not flip
ska::flat_hash_set<path_handle_t> no_flip;
for (auto& p : no_flips) { no_flip.insert(p); }

// Paths to use as reference for flipping
ska::flat_hash_set<path_handle_t> ref_flip;
for (auto& p : ref_flips) { ref_flip.insert(p); }

// Prepare all path handles in a vector (for parallel processing)
std::vector<path_handle_t> paths;
std::vector<path_handle_t> into_paths;
// for each path, find its average orientation
graph.for_each_path_handle([&](const path_handle_t& p) { paths.push_back(p); });

// Check whether reference paths are (in general) in fwd or rev orientation
uint64_t ref_rev = 0;
uint64_t ref_fwd = 0;
// OpenMP's reduction: each thread maintains its own private copy of these variables
// during the parallel region, and OpenMP combines them at the end using addition.
#pragma omp parallel for reduction(+:ref_rev,ref_fwd)
for (auto& path : paths) {
if (ref_flip.count(path)) {
graph.for_each_step_in_path(
path,
[&ref_rev,&ref_fwd,&graph](const step_handle_t& s) {
auto h = graph.get_handle_of_step(s);
auto len = graph.get_length(h);
if (graph.get_is_reverse(h)) {
ref_rev += len;
} else {
ref_fwd += len;
}
});
}
}

#pragma omp parallel for
for (auto& path : paths) {
if (!no_flip.count(path)) {
// ref_paths must not be flipped either
if (!no_flip.count(path) && !ref_flip.count(path)) {
// Check path orientation with respect to the graph
uint64_t rev = 0;
uint64_t fwd = 0;
graph.for_each_step_in_path(
Expand All @@ -44,11 +79,20 @@ void flip_paths(graph_t& graph,
fwd += len;
}
});
// those that tend to be reversed more than forward should be flipped
if (rev > fwd) {

// Check if the path should be flipped
bool flip_path = false;
if (ref_flip.size() > 0) {
// if ref paths are reversed, reversed paths should be
// not flipped to stay consistent with the reference
flip_path = ref_rev > ref_fwd ? rev < fwd : rev > fwd;
} else {
// those that tend to be reversed more than forward should be flipped
flip_path = rev > fwd;
}
if (flip_path) {
auto name = graph.get_path_name(path) + "_inv";
auto flipped = into.create_path_handle(name);
into_paths.push_back(flipped);
std::vector<handle_t> v;
graph.for_each_step_in_path(
path,
Expand All @@ -63,7 +107,6 @@ void flip_paths(graph_t& graph,
}
} else {
auto fwd = into.create_path_handle(graph.get_path_name(path));
into_paths.push_back(fwd);
graph.for_each_step_in_path(
path,
[&fwd,&into,&graph](const step_handle_t& s) {
Expand All @@ -75,7 +118,6 @@ void flip_paths(graph_t& graph,
} else {
// add the path as-is
auto fwd = into.create_path_handle(graph.get_path_name(path));
into_paths.push_back(fwd);
graph.for_each_step_in_path(
path,
[&fwd,&into,&graph](const step_handle_t& s) {
Expand All @@ -87,7 +129,6 @@ void flip_paths(graph_t& graph,
}

ska::flat_hash_set<std::pair<handle_t, handle_t>> edges_to_create;

#pragma omp parallel for
for (auto& path : paths) {
// New edges can be due only when paths are flipped
Expand All @@ -104,7 +145,6 @@ void flip_paths(graph_t& graph,
});
}
}

// add missing edges
for (auto edge: edges_to_create) {
into.create_edge(edge.first, edge.second);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithms/flip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ using namespace handlegraph;
/// the graph in the forward orientation.
void flip_paths(graph_t& graph,
graph_t& into,
const std::vector<path_handle_t>& no_flips);
const std::vector<path_handle_t>& no_flips,
const std::vector<path_handle_t>& ref_flips);

}

Expand Down
17 changes: 12 additions & 5 deletions src/subcommand/flip_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ int main_flip(int argc, char **argv) {
}
const std::string prog_name = "odgi flip";
argv[0] = (char *) prog_name.c_str();
--argc
;
--argc;

args::ArgumentParser parser("Flip (reverse complement) paths to match the graph.");
args::Group mandatory_opts(parser, "[ MANDATORY ARGUMENTS ]");
args::ValueFlag<std::string> og_in_file(mandatory_opts, "FILE", "Load the succinct variation graph in ODGI format from this *FILE*. The file name usually ends with *.og*. GFA is also supported.", {'i', "idx"});
args::ValueFlag<std::string> og_out_file(mandatory_opts, "FILE", "Write the sorted dynamic succinct variation graph to this file (e.g. *.og*).", {'o', "out"});
args::Group flip_opts(parser, "[ Flip Options ]");
args::ValueFlag<std::string> _no_flips(flip_opts, "FILE", "Don't flip paths listed one per line in FILE.", {'n', "no-flips"});
args::ValueFlag<std::string> _ref_flips(flip_opts, "FILE", "Flip paths to match the orientation of the paths listed one per line in FILE.", {'r', "ref-flips"});
args::Group threading_opts(parser, "[ Threading ]");
args::ValueFlag<uint64_t> nthreads(threading_opts, "N", "Number of threads to use for parallel operations.",
{'t', "threads"});
Expand Down Expand Up @@ -80,7 +81,8 @@ int main_flip(int argc, char **argv) {
graph.set_number_of_threads(num_threads);

std::vector<path_handle_t> no_flips;
if (_no_flips) {
std::vector<path_handle_t> ref_flips;
if (_no_flips || _ref_flips) {
// path loading
auto load_paths = [&](const std::string& path_names_file) {
std::ifstream path_names_in(path_names_file);
Expand Down Expand Up @@ -116,11 +118,16 @@ int main_flip(int argc, char **argv) {
return paths;
};

no_flips = load_paths(args::get(_no_flips));
if (_no_flips) {
no_flips = load_paths(args::get(_no_flips));
}
if (_ref_flips) {
ref_flips = load_paths(args::get(_ref_flips));
}
}

graph_t into;
algorithms::flip_paths(graph, into, no_flips);
algorithms::flip_paths(graph, into, no_flips, ref_flips);

const std::string outfile = args::get(og_out_file);
if (outfile == "-") {
Expand Down

0 comments on commit 2151dd8

Please sign in to comment.