Skip to content

Commit 63f1b51

Browse files
committed
Pre-index non-alt paths to fix #3054
1 parent fac2dd9 commit 63f1b51

File tree

6 files changed

+67
-11
lines changed

6 files changed

+67
-11
lines changed

src/graph_synchronizer.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,17 @@ namespace vg {
88
using namespace std;
99

1010
GraphSynchronizer::GraphSynchronizer(VG& graph) : graph(graph) {
11-
// Nothing to do!
11+
// Because in general paths can overlap each other, and because we can't
12+
// build a path index after a path has been modified (since we don't keep
13+
// the ranks up to date internally), we need to build all the indexes up
14+
// front, even if we're just working on a single path.
15+
graph.for_each_path_handle([&](const path_handle_t& path) {
16+
string name = graph.get_path_name(path);
17+
if (!Paths::is_alt(name)) {
18+
// We only care about reference paths.
19+
get_path_index(name);
20+
}
21+
});
1222
}
1323

1424
void GraphSynchronizer::with_path_index(const string& path_name, const function<void(const PathIndex&)>& to_run) {
@@ -29,7 +39,10 @@ const string& GraphSynchronizer::get_path_sequence(const string& path_name) {
2939

3040
// We need a function to grab the index for a path
3141
PathIndex& GraphSynchronizer::get_path_index(const string& path_name) {
32-
42+
43+
// We don't work on alt paths; there could be too many to pre-index.
44+
assert(!Paths::is_alt(path_name));
45+
3346
if (!indexes.count(path_name)) {
3447
// Not already made. Generate it.
3548
indexes.emplace(piecewise_construct,
@@ -115,7 +128,7 @@ void GraphSynchronizer::Lock::lock() {
115128
cerr << endl;
116129
}
117130
#endif
118-
131+
119132
// Make them into pos_ts that point left to right, the way Jordan thinks.
120133
pos_t left_pos = make_pos_t(start_left.node, start_left.is_end, 0);
121134
pos_t right_pos = make_pos_t(end_right.node, !end_right.is_end,

src/variant_adder.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@ namespace vg {
88
using namespace std;
99
using namespace vg::io;
1010

11-
VariantAdder::VariantAdder(VG& graph) : graph(graph), sync(graph) {
11+
VariantAdder::VariantAdder(VG& graph) : graph(graph), sync([&](VG& g) -> VG& {
12+
// Dice nodes in the graph for GCSA indexing *before* constructing the synchronizer.
13+
g.dice_nodes(max_node_size);
14+
return g;
15+
}(this->graph)) {
16+
17+
1218
graph.paths.for_each_name([&](const string& name) {
1319
// Save the names of all the graph paths, so we don't need to lock the
1420
// graph to check them.
@@ -18,10 +24,6 @@ VariantAdder::VariantAdder(VG& graph) : graph(graph), sync(graph) {
1824
// Show progress if the graph does.
1925
show_progress = graph.show_progress;
2026

21-
// Make sure to dice nodes to 1024 or smaller, the max size that GCSA2
22-
// supports, in case we need to GCSA-index part of the graph.
23-
graph.dice_nodes(max_node_size);
24-
2527
// Configure the aligner to use a full length bonus
2628
aligner.full_length_bonus = 5;
2729
}

src/vg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4004,7 +4004,7 @@ void VG::divide_node(Node* node, vector<int>& positions, vector<Node*>& parts) {
40044004
#ifdef debug_divide
40054005

40064006
#pragma omp critical (cerr)
4007-
cerr << omp_get_thread_num() << ": dividing mapping " << pb2json(*m) << endl;
4007+
cerr << omp_get_thread_num() << ": dividing mapping " << *m << endl;
40084008
#endif
40094009

40104010
string path_name = paths.mapping_path_name(m);
@@ -4077,7 +4077,7 @@ void VG::divide_node(Node* node, vector<int>& positions, vector<Node*>& parts) {
40774077
#pragma omp critical (cerr)
40784078
cerr << omp_get_thread_num() << ": produced mappings:" << endl;
40794079
for(auto mapping : mapping_parts) {
4080-
cerr << "\t" << pb2json(mapping) << endl;
4080+
cerr << "\t" << mapping << endl;
40814081
}
40824082
#endif
40834083
}

test/add/multi.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"node": [
3+
{"id": 1, "sequence": "CTTAAAATGATCGGGACTTTTCAAATCTTATTT"}
4+
],
5+
"edge": [
6+
],
7+
"path": [
8+
{"name": "ref", "mapping": [
9+
{"rank": 1, "edit": [
10+
{"from_length": 33, "to_length": 33}
11+
], "position": {"node_id": 1, "offset": 0, "is_reverse": true}}
12+
]},
13+
{"name": "ref2", "mapping": [
14+
{"rank": 1, "edit": [
15+
{"from_length": 33, "to_length": 33}
16+
], "position": {"node_id": 1, "offset": 0}}
17+
]},
18+
{"name": "ref3", "mapping": [
19+
{"rank": 1, "edit": [
20+
{"from_length": 33, "to_length": 33}
21+
], "position": {"node_id": 1, "offset": 0, "is_reverse": true}}
22+
]}
23+
]
24+
}

test/add/multi.vcf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
##fileformat=VCFv4.0
2+
##fileDate=20090805
3+
##source=myImputationProgramV3.1
4+
##reference=1000GenomesPilot-NCBI36
5+
##phasing=partial
6+
##FILTER=<ID=q10,Description="Quality below 10">
7+
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
8+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
9+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3 SAMPLE4
10+
ref 18 . TC T 100 PASS . GT 1/0 0/0 0|0 ././1
11+
ref 21 . CGA GAC 100 PASS . GT 0/1 0/0 ./1 ./1/.
12+
ref 23 . A AC 100 PASS . GT 0/0 1/0 . ./0
13+
ref3 18 . TC T 100 PASS . GT 1/0 0/0 0|0 ././1
14+
ref3 21 . CGA GAC 100 PASS . GT 0/1 0/0 ./1 ./1/.
15+
ref3 23 . A AC 100 PASS . GT 0/0 1/0 . ./0

test/t/31_vg_add.t

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap
55

66
PATH=../bin:$PATH # for vg
77

8-
plan tests 11
8+
plan tests 12
99

1010
vg construct -r add/ref.fa > ref.vg
1111
vg add -v add/benedict.vcf ref.vg > benedict.vg
@@ -44,6 +44,8 @@ is "$(vg view -c x.vg | jq -c '.path[].mapping[] | select(.rank | not)' | wc -l)
4444

4545
is "$(vg view -Jv add/backward.json | vg add -v add/benedict.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with backward nodes can be added to"
4646

47+
is "$(vg view -Jv add/multi.json | vg add -v add/multi.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with multiple overlapping paths nodes can be added to"
48+
4749
is "$(vg view -Jv add/backward_and_forward.json | vg add -v add/benedict.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with backward and forward nodes can be added to"
4850

4951
rm -rf ref.vg ref.pg benedict.vg benedict2.vg benedict3.vg x-ref.vg x.vg refN.vg no-n.vg with-n.vg ngap.vg ngap-add.vg

0 commit comments

Comments
 (0)