Skip to content

Commit

Permalink
Merge pull request #559 from pangenome/extract_dedup
Browse files Browse the repository at this point in the history
`odgi extract`: avoid duplicated subpaths
  • Loading branch information
AndreaGuarracino authored Feb 12, 2024
2 parents 7bbd41d + 4097440 commit d42a868
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/position.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ struct path_range_t {
std::string data;
};

struct path_range_comparator {
bool operator() (const path_range_t& lhs, const path_range_t& rhs) const {
if (lhs.begin.path != rhs.begin.path) return lhs.begin.path < rhs.begin.path;
if (lhs.end.path != rhs.end.path) return lhs.end.path < rhs.end.path;
if (lhs.begin.offset != rhs.begin.offset) return lhs.begin.offset < rhs.begin.offset;
return lhs.end.offset < rhs.end.offset;
}
};

inline std::string& get_long_path_name(std::tuple<std::string, uint64_t, uint64_t> path_long_start_end) {
return std::get<0>(path_long_start_end);
}
Expand Down
11 changes: 11 additions & 0 deletions src/subcommand/extract_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,17 @@ namespace odgi {
return std::binary_search(source_paths_from_path_ranges.begin(), source_paths_from_path_ranges.end(), x);
}), source_paths->end());

// We don't cut nodes for the extraction, so close path intervals can generate identical subpaths.
// To avoid duplicated subpaths in the final subgraph, we remove duplicated path ranges.
{
std::set<odgi::path_range_t, odgi::path_range_comparator> unique_path_ranges;

for (const auto& path_range : path_ranges) {
unique_path_ranges.insert(path_range);
}

path_ranges.assign(unique_path_ranges.begin(), unique_path_ranges.end());
}

if (max_dist_subpaths > 0) {
// Iterate multiple times to merge subpaths which became mergeable during the first iteration where new nodes were added
Expand Down

0 comments on commit d42a868

Please sign in to comment.