Skip to content

Commit

Permalink
Merge pull request #3814 from vgteam/fix-disk-limit
Browse files Browse the repository at this point in the history
Restore disk limit for GCSA2 indexing in autoindex
  • Loading branch information
jeizenga authored Dec 16, 2022
2 parents 2bc7439 + 9bdb32f commit 8570100
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 62 deletions.
63 changes: 2 additions & 61 deletions src/index_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ double IndexingParameters::pruning_walk_length_increase_factor = 1.5;
double IndexingParameters::pruning_max_node_degree_decrease_factor = 0.75;
int IndexingParameters::gcsa_initial_kmer_length = gcsa::Key::MAX_LENGTH;
int IndexingParameters::gcsa_doubling_steps = gcsa::ConstructionParameters::DOUBLING_STEPS;
int64_t IndexingParameters::gcsa_size_limit = 2ll * 1024ll * 1024ll * 1024ll;
int64_t IndexingParameters::gcsa_size_limit = 2ll * 1024ll * 1024ll * 1024ll * 1024ll;
int64_t IndexingParameters::gbwt_insert_batch_size = gbwt::DynamicGBWT::INSERT_BATCH_SIZE;
int IndexingParameters::gbwt_insert_batch_size_increase_factor = 10;
int IndexingParameters::gbwt_sampling_interval = gbwt::DynamicGBWT::SAMPLE_INTERVAL;
Expand Down Expand Up @@ -2343,65 +2343,6 @@ IndexRegistry VGIndexes::get_vg_index_registry() {
// MaxNodeID Recipes
////////////////////////////////////

// these recipes actually kinda mess up everything now that we just make the max node id
// alongside the graphs during construction

//#ifdef debug_index_registry_setup
// cerr << "registering MaxNodeID recipes" << endl;
//#endif
//
// // meta-recipe to write max node id down to a file
// auto write_max_node_id = [](const vector<const IndexFile*>& inputs,
// const IndexingPlan* plan,
// const IndexGroup& constructing) {
//
// if (IndexingParameters::verbosity != IndexingParameters::None) {
// cerr << "[IndexRegistry]: Determining node ID interval." << endl;
// }
//
// // TODO: this is pretty unoptimized in that we have to load the whole graph just
// // to read the max node id
//
// assert(constructing.size() == 1);
// assert(inputs.size() == 1);
// vector<vector<string>> all_outputs(constructing.size());
// auto output_index = *constructing.begin();
// auto graph_files = inputs.at(0)->get_filenames();
//
// // test I/O
// for (const string& graph_file : graph_files) {
// ifstream infile;
// init_in(infile, graph_file);
// }
// string output_name = plan->output_filepath(output_index);
// ofstream outfile;
// init_out(outfile, output_name);
//
// VGset graph_set(graph_files);
// nid_t max_node_id = graph_set.max_node_id();
//
// outfile << max_node_id;
//
// all_outputs[0].push_back(output_name);
// return all_outputs;
// };
//
// registry.register_recipe({"MaxNodeID"}, {"VG"},
// [write_max_node_id](const vector<const IndexFile*>& inputs,
// const IndexingPlan* plan,
// AliasGraph& alias_graph,
// const IndexGroup& constructing) {
// return write_max_node_id(inputs, plan, constructing);
// });
//
// registry.register_recipe({"Spliced MaxNodeID"}, {"Spliced VG w/ Transcript Paths"},
// [write_max_node_id](const vector<const IndexFile*>& inputs,
// const IndexingPlan* plan,
// AliasGraph& alias_graph,
// const IndexGroup& constructing) {
// return write_max_node_id(inputs, plan, constructing);
// });

////////////////////////////////////
// GBWT Recipes
////////////////////////////////////
Expand Down Expand Up @@ -5077,7 +5018,7 @@ string IndexRegistry::to_dot(const vector<IndexName>& targets) const {
for (size_t priority_idx = 0; priority_idx < recipes.size(); ++priority_idx, ++recipe_idx) {
const auto& recipe = recipes[priority_idx];
string recipe_dot_id = "R" + to_string(recipe_idx);
recipe_to_dot_id[RecipeName(recipe_record.first, recipe_idx)] = recipe_dot_id;
recipe_to_dot_id[RecipeName(recipe_record.first, priority_idx)] = recipe_dot_id;
bool recipe_in_plan = plan_elements.count(RecipeName(recipe_record.first, priority_idx));
if (recipe_in_plan) {
strm << recipe_dot_id << "[label=\"" << priority_idx << "\" shape=circle style=bold];" << endl;
Expand Down
2 changes: 1 addition & 1 deletion src/kmer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ void write_gcsa_kmers(const HandleGraph& graph, int kmer_size, ostream& out, siz
if (!size_limit_exceeded.load()) {
// we didn't exceed the size limit while waiting for the critical block
if (total_bytes + bytes_required > size_limit) {
cerr << "error: [write_gcsa_kmers()] size limit exceeded" << endl;
cerr << "error: [write_gcsa_kmers()] size limit of " << size_limit << " bytes exceeded" << endl;
size_limit_exceeded.store(1);
}
else {
Expand Down

2 comments on commit 8570100

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 10904 seconds

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch v1.45.0. View the full report here.

10 tests passed, 6 tests failed and 0 tests skipped in 8287 seconds

Failed tests:

  • test_sim_chr21_snp1kg (2040 seconds)
  • test_sim_mhc_cactus (204 seconds)
  • test_sim_mhc_snp1kg (395 seconds)
  • test_sim_mhc_snp1kg_mpmap (323 seconds)
  • test_sim_chr21_snp1kg_trained (1811 seconds)
  • test_sim_yeast_cactus (1007 seconds)

Please sign in to comment.