Skip to content

Commit

Permalink
Merge pull request #3068 from vgteam/allow-add-overlap
Browse files Browse the repository at this point in the history
Pre-index non-alt paths to fix #3054
  • Loading branch information
adamnovak authored Oct 29, 2020
2 parents a5407ea + 5e0473e commit f86e9eb
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 11 deletions.
19 changes: 16 additions & 3 deletions src/graph_synchronizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@ namespace vg {
using namespace std;

GraphSynchronizer::GraphSynchronizer(VG& graph) : graph(graph) {
// Nothing to do!
// Because in general paths can overlap each other, and because we can't
// build a path index after a path has been modified (since we don't keep
// the ranks up to date internally), we need to build all the indexes up
// front, even if we're just working on a single path.
graph.for_each_path_handle([&](const path_handle_t& path) {
string name = graph.get_path_name(path);
if (!Paths::is_alt(name)) {
// We only care about reference paths.
get_path_index(name);
}
});
}

void GraphSynchronizer::with_path_index(const string& path_name, const function<void(const PathIndex&)>& to_run) {
Expand All @@ -29,7 +39,10 @@ const string& GraphSynchronizer::get_path_sequence(const string& path_name) {

// We need a function to grab the index for a path
PathIndex& GraphSynchronizer::get_path_index(const string& path_name) {


// We don't work on alt paths; there could be too many to pre-index.
assert(!Paths::is_alt(path_name));

if (!indexes.count(path_name)) {
// Not already made. Generate it.
indexes.emplace(piecewise_construct,
Expand Down Expand Up @@ -115,7 +128,7 @@ void GraphSynchronizer::Lock::lock() {
cerr << endl;
}
#endif

// Make them into pos_ts that point left to right, the way Jordan thinks.
pos_t left_pos = make_pos_t(start_left.node, start_left.is_end, 0);
pos_t right_pos = make_pos_t(end_right.node, !end_right.is_end,
Expand Down
12 changes: 7 additions & 5 deletions src/variant_adder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ namespace vg {
using namespace std;
using namespace vg::io;

VariantAdder::VariantAdder(VG& graph) : graph(graph), sync(graph) {
VariantAdder::VariantAdder(VG& graph) : graph(graph), sync([&](VG& g) -> VG& {
// Dice nodes in the graph for GCSA indexing *before* constructing the synchronizer.
g.dice_nodes(max_node_size);
return g;
}(this->graph)) {


graph.paths.for_each_name([&](const string& name) {
// Save the names of all the graph paths, so we don't need to lock the
// graph to check them.
Expand All @@ -18,10 +24,6 @@ VariantAdder::VariantAdder(VG& graph) : graph(graph), sync(graph) {
// Show progress if the graph does.
show_progress = graph.show_progress;

// Make sure to dice nodes to 1024 or smaller, the max size that GCSA2
// supports, in case we need to GCSA-index part of the graph.
graph.dice_nodes(max_node_size);

// Configure the aligner to use a full length bonus
aligner.full_length_bonus = 5;
}
Expand Down
4 changes: 2 additions & 2 deletions src/vg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4004,7 +4004,7 @@ void VG::divide_node(Node* node, vector<int>& positions, vector<Node*>& parts) {
#ifdef debug_divide

#pragma omp critical (cerr)
cerr << omp_get_thread_num() << ": dividing mapping " << pb2json(*m) << endl;
cerr << omp_get_thread_num() << ": dividing mapping " << *m << endl;
#endif

string path_name = paths.mapping_path_name(m);
Expand Down Expand Up @@ -4077,7 +4077,7 @@ void VG::divide_node(Node* node, vector<int>& positions, vector<Node*>& parts) {
#pragma omp critical (cerr)
cerr << omp_get_thread_num() << ": produced mappings:" << endl;
for(auto mapping : mapping_parts) {
cerr << "\t" << pb2json(mapping) << endl;
cerr << "\t" << mapping << endl;
}
#endif
}
Expand Down
24 changes: 24 additions & 0 deletions test/add/multi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"node": [
{"id": 1, "sequence": "CTTAAAATGATCGGGACTTTTCAAATCTTATTT"}
],
"edge": [
],
"path": [
{"name": "ref", "mapping": [
{"rank": 1, "edit": [
{"from_length": 33, "to_length": 33}
], "position": {"node_id": 1, "offset": 0, "is_reverse": true}}
]},
{"name": "ref2", "mapping": [
{"rank": 1, "edit": [
{"from_length": 33, "to_length": 33}
], "position": {"node_id": 1, "offset": 0}}
]},
{"name": "ref3", "mapping": [
{"rank": 1, "edit": [
{"from_length": 33, "to_length": 33}
], "position": {"node_id": 1, "offset": 0, "is_reverse": true}}
]}
]
}
15 changes: 15 additions & 0 deletions test/add/multi.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
##fileformat=VCFv4.0
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=1000GenomesPilot-NCBI36
##phasing=partial
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3 SAMPLE4
ref 18 . TC T 100 PASS . GT 1/0 0/0 0|0 ././1
ref 21 . CGA GAC 100 PASS . GT 0/1 0/0 ./1 ./1/.
ref 23 . A AC 100 PASS . GT 0/0 1/0 . ./0
ref3 18 . TC T 100 PASS . GT 1/0 0/0 0|0 ././1
ref3 21 . CGA GAC 100 PASS . GT 0/1 0/0 ./1 ./1/.
ref3 23 . A AC 100 PASS . GT 0/0 1/0 . ./0
4 changes: 3 additions & 1 deletion test/t/31_vg_add.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 11
plan tests 12

vg construct -r add/ref.fa > ref.vg
vg add -v add/benedict.vcf ref.vg > benedict.vg
Expand Down Expand Up @@ -44,6 +44,8 @@ is "$(vg view -c x.vg | jq -c '.path[].mapping[] | select(.rank | not)' | wc -l)

is "$(vg view -Jv add/backward.json | vg add -v add/benedict.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with backward nodes can be added to"

is "$(vg view -Jv add/multi.json | vg add -v add/multi.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with multiple overlapping paths nodes can be added to"

is "$(vg view -Jv add/backward_and_forward.json | vg add -v add/benedict.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with backward and forward nodes can be added to"

rm -rf ref.vg ref.pg benedict.vg benedict2.vg benedict3.vg x-ref.vg x.vg refN.vg no-n.vg with-n.vg ngap.vg ngap-add.vg
Expand Down

2 comments on commit f86e9eb

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 14164 seconds

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch v1.28.0. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 13921 seconds

Please sign in to comment.