Skip to content

Commit

Permalink
Merge pull request #4128 from vgteam/gcsa-mem-limit
Browse files Browse the repository at this point in the history
Extend autoindex rewinding functionality to apply to GCSA2 memory use
  • Loading branch information
jeizenga authored Oct 21, 2023
2 parents 26134a9 + 4ecdecb commit 8341a7a
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 9 deletions.
2 changes: 1 addition & 1 deletion deps/gcsa2
2 changes: 1 addition & 1 deletion src/build_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void build_gcsa_lcp(const HandleGraph& graph,
params.reduceLimit(kmer_bytes);

// set up the input graph using the kmers
gcsa::InputGraph input_graph({ tmpfile }, true);
gcsa::InputGraph input_graph({ tmpfile }, true, params);
// run the GCSA construction
gcsa = new gcsa::GCSA(input_graph, params);
// and the LCP array construction
Expand Down
10 changes: 8 additions & 2 deletions src/index_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3576,6 +3576,8 @@ IndexRegistry VGIndexes::get_vg_index_registry() {
auto params = gcsa::ConstructionParameters();
params.setSteps(IndexingParameters::gcsa_doubling_steps);
params.setLimitBytes(IndexingParameters::gcsa_size_limit);
// we use the literal limit here because this is a measurement of memory use, not an estimate
params.setMemoryLimitBytes(plan->literal_target_memory_usage());

#ifdef debug_index_registry_recipes
cerr << "enumerating k-mers for input pruned graphs:" << endl;
Expand Down Expand Up @@ -3615,7 +3617,7 @@ IndexRegistry VGIndexes::get_vg_index_registry() {

// construct the indexes (giving empty mapping name is sufficient to make
// indexing skip the unfolded code path)
gcsa::InputGraph input_graph(dbg_names, true, gcsa::Alphabet(),
gcsa::InputGraph input_graph(dbg_names, true, params, gcsa::Alphabet(),
mapping_filename);
gcsa::GCSA gcsa_index(input_graph, params);
gcsa::LCPArray lcp_array(input_graph, params);
Expand All @@ -3639,7 +3641,7 @@ IndexRegistry VGIndexes::get_vg_index_registry() {
// update pruning params
IndexingParameters::pruning_walk_length *= IndexingParameters::pruning_walk_length_increase_factor;
IndexingParameters::pruning_max_node_degree *= IndexingParameters::pruning_max_node_degree_decrease_factor;
string msg = "[IndexRegistry]: Exceeded disk use limit while performing k-mer doubling steps. "
string msg = "[IndexRegistry]: Exceeded disk or memory use limit while performing k-mer doubling steps. "
"Rewinding to pruning step with more aggressive pruning to simplify the graph.";
throw RewindPlanException(msg, pruned_graphs);
}
Expand Down Expand Up @@ -4135,6 +4137,10 @@ bool IndexingPlan::is_intermediate(const IndexName& identifier) const {
int64_t IndexingPlan::target_memory_usage() const {
return IndexingParameters::max_memory_proportion * registry->get_target_memory_usage();
}

int64_t IndexingPlan::literal_target_memory_usage() const {
return registry->get_target_memory_usage();
}

string IndexingPlan::output_filepath(const IndexName& identifier) const {
return output_filepath(identifier, 0, 1);
Expand Down
5 changes: 4 additions & 1 deletion src/index_registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,11 @@ class IndexingPlan {
/// plan, and false if it is to be preserved.
bool is_intermediate(const IndexName& identifier) const;

/// TODO: is this where this function wants to live?
// TODO: is this where this function wants to live?
/// The memory limit, with a little slosh for prediction inaccuracy
int64_t target_memory_usage() const;
/// The mmeory limit with no slosh
int64_t literal_target_memory_usage() const;

/// Returns the recipes in the plan that depend on this index, including the one in which
/// it was created (if any)
Expand Down
2 changes: 1 addition & 1 deletion src/subcommand/index_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ int main_index(int argc, char** argv) {
if (show_progress) {
cerr << "Building the GCSA2 index..." << endl;
}
gcsa::InputGraph input_graph(dbg_names, true, gcsa::Alphabet(), mapping_name);
gcsa::InputGraph input_graph(dbg_names, true, params, gcsa::Alphabet(), mapping_name);
gcsa::GCSA gcsa_index(input_graph, params);
gcsa::LCPArray lcp_array(input_graph, params);
if (show_progress) {
Expand Down
4 changes: 2 additions & 2 deletions src/subcommand/msga_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,8 +625,8 @@ int main_msga(int argc, char** argv) {
write_gcsa_kmers_to_tmpfile(path_graph, idx_kmer_size, limit, head_id, tail_id));
});
// Make the index with the kmers
gcsa::InputGraph input_graph(tmpfiles, true);
gcsa::ConstructionParameters params;
gcsa::ConstructionParameters params;
gcsa::InputGraph input_graph(tmpfiles, true, params);
params.setSteps(doubling_steps);
// build the GCSA index
gcsaidx = new gcsa::GCSA(input_graph, params);
Expand Down
6 changes: 5 additions & 1 deletion test/t/52_vg_autoindex.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 47
plan tests 49

rm auto.*

Expand Down Expand Up @@ -151,5 +151,9 @@ is "$(echo $?)" 0 "Indexing is successful after rewinding from k-mer generation"
is "$(vg autoindex -p auto -w map --gcsa-size-limit 2000000 -g graphs/linked_cycles.gfa 2>&1 | grep Rewind | wc -l)" 1 "Running out of room during GCSA2 indexing triggers a rewind"
is "$(echo $?)" 0 "Indexing is successful after rewinding from GCSA2 indexing"

# use the memory limit to trigger a rewide
is "$(vg autoindex -p auto -w map -M 512M -g graphs/linked_cycles.gfa 2>&1 | grep Rewind | wc -l)" 1 "Running out of memory during GCSA2 indexing triggers a rewind"
is "$(echo $?)" 0 "Indexing is successful after rewinding from GCSA2 indexing"

rm auto.*
rm read.fq read.gam

2 comments on commit 8341a7a

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17580 seconds

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch v1.52.0. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17638 seconds

Please sign in to comment.