From b0fc8bcddb970397521fd8cce1f3c91316c544be Mon Sep 17 00:00:00 2001 From: lutfia95 Date: Fri, 28 Jan 2022 23:15:33 +0100 Subject: [PATCH 1/8] static assertion check --- config.toml | 15 +- src/config/configReader.cpp | 634 +++++++++++++-------------------- src/config/configReader.hpp | 102 +++--- src/main/adaptive_sampling.hpp | 2 +- src/main/classify.hpp | 137 ++++--- src/main/ibfbuild.hpp | 5 +- src/main/main.cpp | 369 +++++++------------ 7 files changed, 540 insertions(+), 724 deletions(-) diff --git a/config.toml b/config.toml index de8766d..e083470 100644 --- a/config.toml +++ b/config.toml @@ -1,24 +1,23 @@ - -usage = "classify" #["build", "target", "classify"] +usage = "classify" #["build", "target", "classify", "test"] output_directory = 'RB_out' log_directory = 'RB_out/log' [IBF] -kmer_size = 13 #(unsigned integer with default 13) only required for 'usage = "build"' or if target_file/deplete_file is a fasta formate file +kmer_size = 15 #(unsigned integer with default 13) only required for 'usage = "build"' or if target_file/deplete_file is a fasta formate file fragment_size = 100000 #(unsigned integer with default 100000) only required for 'usage = "build" or if target_file/deplete_file is a fasta formate file threads = 3 #(unsigned integer with default 3) -target_files = ['C:/ReadBouncer/build/main/Release/Listeria_monocytogenes_ATCC_19115_.fasta','C:/ReadBouncer/build/main/Release/Pseudomonas_aeruginosa_complete_genome.fasta'] -deplete_files = ['C:/ReadBouncer/build/main/Release/Bacillus_subtilis_complete_genome.fasta','C:/ReadBouncer/build/main/Release/Enterococcus_faecalis_complete_genome.fasta'] -read_files = ['C:/ReadBouncer/build/main/Release/Listeria.fastq','C:/ReadBouncer/build/main/Release/SaccharomycesReal.fasta'] +target_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Listeria_monocytogenes_ATCC_19115_.fasta','/mnt/c/ReadBouncerToml/build/main/Release/Pseudomonas_aeruginosa_complete_genome.fasta'] +deplete_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Bacillus_subtilis_complete_genome.fasta','/mnt/c/ReadBouncerToml/build/main/Release/Enterococcus_faecalis_complete_genome.fasta'] +read_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Listeria.fastq','/mnt/c/ReadBouncerToml/build/main/Release/SaccharomycesReal.fasta'] exp_seq_error_rate = 0.1 #(unsigned float between 0 and 1 default 0.1) chunk_length = 350 #(unsigned integer with default 250) max_chunks = 1 #(unsigned integer with default 5) [MinKNOW] -host = "localhost"#(ip address or name of the computer hosting MinKNOW) -port = 9501 #(port number used fo grpc communication by by MinKNOW instance) +host = "localhost" #(ip address or name of the computer hosting MinKNOW) +port = "9501" #(port number used fo grpc communication by by MinKNOW instance) flowcell = "MS00000" #(name of the flowcell used) diff --git a/src/config/configReader.cpp b/src/config/configReader.cpp index 3630a58..45bf299 100644 --- a/src/config/configReader.cpp +++ b/src/config/configReader.cpp @@ -7,7 +7,7 @@ #include // seqan libraries #include -#include +#include //#include @@ -26,66 +26,181 @@ typedef seqan::BinningDirectory< seqan::InterleavedBloomFilter, seqan::BDConfig< seqan::Dna5, seqan::Normal, seqan::Uncompressed > > TIbf_; +/** + * ConfigReader constructor + * @param tomlFile /path/to/config.toml + */ -configReader::configReader(std::string const fileName) { +ConfigReader::ConfigReader(std::string const tomlFile) { - this->tomlFile = fileName; + this->tomlInputFile = tomlFile; + std::ifstream tomlFileReadBouncer(tomlInputFile, std::ios_base::binary); + this->configuration_ = toml::parse(tomlFileReadBouncer, /*optional -> */ tomlInputFile); + + if (!tomlFileReadBouncer.is_open()) { + std::cerr << "Error parsing the toml file: " << tomlInputFile << '\n'; + } + }; /** - * Find the usage from toml file - * @return: One usage of [build, classify, deplete, target] + * Parse output dir with usage + * */ -std::string configReader::usage() { - - std::ifstream tomlFileReadBouncer(this->tomlFile, std::ios_base::binary); - assert(tomlFileReadBouncer.good()); +void ConfigReader::parse_general(){ + + try + { + this->log_dir = toml::find(this->configuration_, "log_directory"); + this->log_dir = log_dir.make_preferred(); + + if (!std::filesystem::is_directory(this->log_dir) || !std::filesystem::exists(this->log_dir)) + { + std::filesystem::create_directories(this->log_dir); + } + + this->output_dir = toml::find(this->configuration_, "output_directory"); + this->output_dir = this->output_dir.make_preferred(); + + if (!std::filesystem::is_directory(this->output_dir) || !std::filesystem::exists(this->output_dir)) + { + std::filesystem::create_directories(this->output_dir); + } + this->usage = toml::find(configuration_, "usage"); + + } + catch (const toml::exception& e) + { + std::cerr << "Could not parse " << tomlInputFile << std::endl; + std::cerr << e.what() << std::endl; + } + catch (std::out_of_range& e) + { + std::cerr << "Error in " << tomlInputFile << std::endl; + std::cerr << e.what() << std::endl; + + } +} - if (tomlFileReadBouncer.is_open()) { - std::cout << "We could open and read the toml file: " << this->tomlFile << '\n'; - } - else { +/** + * Write log file based on usage path/to/log_dir/configLog.toml + * @param usage [build, classify, target, test] + */ - std::cerr << "We couldn't parse or read the toml file: " << this->tomlFile << '\n'; - } +void ConfigReader::createLog(std::string& usage){ - const auto ReadBouncer = toml::parse(tomlFileReadBouncer, /*optional -> */ this->tomlFile); + std::filesystem::path configLog(this->output_dir); + configLog /= "configLog.toml"; + std::fstream outputLog(configLog, std::ios::app | std::ios::out | std::ios::in); - this->toml = ReadBouncer; - const auto usage = toml::find(ReadBouncer, "usage"); + toml::value tbl; + toml::value target_files(toml::array{}); + for (std::filesystem::path file : IBF_Parsed.target_files) + target_files.push_back(file.string()); - return usage; + toml::value deplete_files(toml::array{}); + for (std::filesystem::path file : IBF_Parsed.deplete_files) + deplete_files.push_back(file.string()); -} + toml::value read_files(toml::array{}); + for (std::filesystem::path file : IBF_Parsed.read_files) + read_files.push_back(file.string()); + + if (usage == "build"){ + + tbl = toml::value{ { + {usage, toml::table{{ + { "target_files", target_files}, + { "deplete_files", deplete_files}, + { "kmer-size", IBF_Parsed.size_k }, + { "threads", IBF_Parsed.threads }, + { "fragment-size", IBF_Parsed.fragment_size} + }} + }, + } }; + + } -/** - * Write a log from toml file - * @return: toml file - */ + else if (usage == "classify"){ + + tbl = toml::value{ { + {usage, toml::table{{ + { "target_files", target_files}, + { "deplete_files", deplete_files}, + { "read_files", read_files}, + { "kmer-size", IBF_Parsed.size_k }, + { "threads", IBF_Parsed.threads }, + { "fragment-size", IBF_Parsed.fragment_size}, + { "exp_seq_error_rate", IBF_Parsed.error_rate}, + { "chunk_length", IBF_Parsed.chunk_length}, + { "max_chunks", IBF_Parsed.max_chunks}, + + }} + }, + } }; + } + + else if (usage == "target"){ + + tbl = toml::value{ { + {usage, toml::table{{ + { "target_files", target_files}, + { "deplete_files", deplete_files}, + { "kmer-size", IBF_Parsed.size_k }, + { "threads", IBF_Parsed.threads }, + { "fragment-size", IBF_Parsed.fragment_size}, + { "exp_seq_error_rate", IBF_Parsed.error_rate}, + { "chunk_length", IBF_Parsed.chunk_length}, + { "max_chunks", IBF_Parsed.max_chunks}, + {"host" , MinKNOW_Parsed.host}, + {"port" , MinKNOW_Parsed.port}, + {"flowcell" , MinKNOW_Parsed.flowcell}, + {"caller", Basecaller_Parsed.caller}, + {"host", Basecaller_Parsed.guppy_host}, + {"port", Basecaller_Parsed.guppy_port}, + {"threads", Basecaller_Parsed.basecall_threads}, + + }} + }, + } }; + } + else if (usage == "test"){ + tbl = toml::value{ { + {usage, toml::table{{ + {"host" , MinKNOW_Parsed.host}, + {"port" , MinKNOW_Parsed.port}, + {"flowcell" , MinKNOW_Parsed.flowcell}, + }} + }, + } }; -std::fstream configReader::writeTOML() { + } + - auto output_fileTOML = toml::find(this->toml, "output_directory"); - output_fileTOML += "/configLog.toml"; - std::fstream tomlOutput(output_fileTOML, std::ios::app | std::ios::out | std::ios::in); + auto start = std::chrono::system_clock::now(); + auto end = std::chrono::system_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + std::time_t end_time = std::chrono::system_clock::to_time_t(end); - return tomlOutput; - + outputLog << "#Computation time: " << std::ctime(&end_time) << '\n'; + outputLog << toml::format(tbl) << '\n'; + outputLog.close(); } /** * Check if the target/deplete input files are IBF or not - * @param : File name - * @return: Bool due to decision + * @param file input deplete/target file + * @return Bool due to decision + * @throw seqan::Exception if fasta file */ -bool configReader::filterException(std::filesystem::path& file) { +bool ConfigReader::filterException(std::filesystem::path& file) { TIbf_ filter; @@ -101,160 +216,112 @@ bool configReader::filterException(std::filesystem::path& file) { return true; } -// Copied from buildIBF (src/main/ibfbuild.hpp) - -void configReader::buildIBF_(IBF_Build_Params& parser) -{ - std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); - interleave::IBFConfig config{}; - - config.reference_files.emplace_back(parser.reference_file.string()); - config.output_filter_file = parser.bloom_filter_output_path.string(); - config.kmer_size = parser.size_k; - config.threads_build = parser.threads; - config.fragment_length = parser.fragment_size; - config.filter_size = parser.filter_size; - config.verbose = parser.verbose; - - interleave::IBF filter{}; - try - { - interleave::FilterStats stats = filter.create_filter(config); - interleave::print_build_stats(stats); - } - catch (const interleave::IBFBuildException& e) - { - nanolive_logger->error("Error building IBF using the following parameters"); - nanolive_logger->error("Input reference file : " + parser.reference_file.string()); - nanolive_logger->error("Output IBF file : " + parser.bloom_filter_output_path.string()); - nanolive_logger->error("Kmer size : " + parser.size_k); - nanolive_logger->error("Size of reference fragments per bin : " + parser.fragment_size); - nanolive_logger->error("IBF file size in MegaBytes : " + parser.filter_size); - nanolive_logger->error("Building threads : " + parser.threads); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->error("---------------------------------------------------------------------------------------------------"); - nanolive_logger->flush(); - throw; - } - -} - /** - * Parse parameters from toml file to build IBF from the reference sequence(s) - * @param : Toml output file, usage, a list of target and deplete files - * @return: Struct with the needed parameters to construct IBF (all IBF will be constructed within ibfReader) + * Parse all parameters from [IBF] module + * @throw ConfigReader::Exception */ -configReader::IBF_Build_Params configReader::ibfReader(std::fstream& tomlOutput, std::string& usage, - std::vector& target_files_, - std::vector& deplete_files_) { - IBF_Build_Params build_IBF; - int k, t, f; - std::filesystem::path output_fileTOML{}; +void ConfigReader::readIBF(){ + + std::vector rf_tmp; + try { - output_fileTOML = toml::find(this->toml, "output_directory"); - output_fileTOML = output_fileTOML.make_preferred(); - toml::value IBF = toml::find(this->toml, "IBF"); - - k = toml::find_or(this->toml, "IBF", "kmer_size", 13); - t = toml::find_or(this->toml, "IBF", "threads", 1); - f = toml::find_or(this->toml, "IBF", "fragment_size", 100000); + IBF_Parsed.size_k= toml::find_or(this->configuration_, "IBF", "kmer_size", 13); + IBF_Parsed.fragment_size = toml::find_or(this->configuration_, "IBF", "fragment_size", 100000); + IBF_Parsed.threads = toml::find_or(this->configuration_, "IBF", "threads", 1); + IBF_Parsed.error_rate = toml::find_or(this->configuration_, "IBF", "exp_seq_error_rate", 0.1); + IBF_Parsed.chunk_length = toml::find_or(this->configuration_, "IBF", "chunk_length", 250); + IBF_Parsed.max_chunks = toml::find_or(this->configuration_, "IBF", "max_chunks", 5); } catch (std::out_of_range& e) { // TODO: write message in log file - throw ConfigReaderException(e.what()); + throw ConfigReader(e.what()); } - - for (std::filesystem::path file : target_files_) - { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); - } - if (!configReader::filterException(file)) - { - - // TODO: write in log file - std::cout << "The target file is a fasta file, start building ibf ......." << '\n'; - - std::filesystem::path target = std::filesystem::path(output_fileTOML); - target /= file; - target.replace_extension("ibf"); - - build_IBF = { target, file, false, false, k, t, f, 0, true }; - buildIBF_(build_IBF); + try + { + std::vector tmp = toml::find>(this->configuration_, "IBF", "target_files"); + for (std::string s : tmp) + IBF_Parsed.target_files.emplace_back((std::filesystem::path(s)).make_preferred()); + tmp.clear(); + tmp = toml::find>(this->configuration_, "IBF", "deplete_files"); + for (std::string s : tmp) + IBF_Parsed.deplete_files.emplace_back((std::filesystem::path(s)).make_preferred()); + } + catch (toml::exception& e) + { + throw ConfigReader(e.what()); + + } + + for (std::filesystem::path file : IBF_Parsed.target_files) + { + if (!std::filesystem::exists(file)) + { + // TODO: write message in log file + throw ConfigReader("[Error] The following target file does not exist: " + file.string()); + } + } + + for (std::filesystem::path file : IBF_Parsed.deplete_files) + { + if (!std::filesystem::exists(file)) + { + // TODO: write message in log file + throw ConfigReader("[Error] The following deplete file does not exist: " + file.string()); + } + } - } + try + { + rf_tmp = toml::find>(this->configuration_, "IBF", "read_files"); } + catch (toml::exception& e) + { - for (std::filesystem::path file : deplete_files_) + throw ConfigReader(e.what()); + } + + for (std::string file : rf_tmp) { - if (!std::filesystem::exists(file)) + std::filesystem::path rf(file); + rf = rf.make_preferred(); + + if (!std::filesystem::exists(rf)) { // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); + throw ConfigReader("[Error] The following read file does not exist: " + rf.string()); } - - if (!configReader::filterException(file)) + else { - - // TODO: write in log file - std::cout << "The deplete file is a fasta file, start building ibf ......." << '\n'; - - - std::filesystem::path deplete = std::filesystem::path(output_fileTOML); - deplete /= file; - deplete.replace_extension("ibf"); - - build_IBF = { deplete, file, false, false, k, t, f, 0, true }; - buildIBF_(build_IBF); - + IBF_Parsed.read_files.emplace_back(std::move(rf)); } } - - return build_IBF; -}; +} /** - * Parse parameters from toml file for reads classification - * @param : Toml output file, usage, a list of target and deplete files - * @return: Struct with the needed parameters to classify reads + * Parse all parameters from [MinKNOW] module + * @throw ConfigReader::Exception */ -configReader::Classify_Params configReader::classifyReader(std::fstream& tomlOutput, std::string& usage, - std::vector& target_files_, - std::vector& deplete_files_) { +void ConfigReader::readMinKNOW(){ - Classify_Params classifyStruct; - int k, t, f, l, m; - double e; - std::vector read_files{}; - std::filesystem::path output_fileTOML{}; - std::vector rf_tmp{}; try { - std::string out = toml::find(this->toml, "output_directory"); - output_fileTOML = std::filesystem::path(out).make_preferred(); - toml::value IBF = toml::find(this->toml, "IBF"); - - k = toml::find_or(this->toml, "IBF", "kmer_size", 13); - t = toml::find_or(this->toml, "IBF","threads", 1); - f = toml::find_or(this->toml, "IBF", "fragment_size", 100000); - e = toml::find_or(this->toml, "IBF", "exp_seq_error_rate", 0.1); - l = toml::find_or(this->toml, "IBF", "chunk_length", 250); - m = toml::find_or(this->toml, "IBF", "max_chunks", 5); - - rf_tmp = toml::find>(IBF, "read_files"); + toml::value MinKNOW = toml::find(this->configuration_, "MinKNOW"); + MinKNOW_Parsed.flowcell = toml::find(MinKNOW, "flowcell"); + MinKNOW_Parsed.host = toml::find_or(MinKNOW, "host", "127.0.0.1"); + MinKNOW_Parsed.port = toml::find_or(MinKNOW, "port", "9501"); + //channels = toml::find_or>(MinKNOW, "channels", std::vector{}); + } catch (std::out_of_range& e) { @@ -262,100 +329,58 @@ configReader::Classify_Params configReader::classifyReader(std::fstream& tomlOut throw ConfigReaderException(e.what()); } - for (std::string file : rf_tmp) - { - std::filesystem::path rf(file); - rf = rf.make_preferred(); - if (!std::filesystem::exists(rf)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following read file does not exist: " + rf.string()); - } - else - { - read_files.emplace_back(std::move(rf)); - } +} - } +/** + * Parse all parameters from [Basecaller] module + * @throw ConfigReader::Exception + */ - std::vector target_holder{}; - std::vector deplete_holder{}; +void ConfigReader::readBasecaller(){ - for (std::filesystem::path file : target_files_) + try { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); - } - - if (configReader::filterException(file)) - { - target_holder.emplace_back(std::move(file));// If ibf then the target in the same dir - } - else - { - - // TODO: write in log file - std::cout << "The target file is a fasta file, start building ibf ......." << '\n'; - - - std::filesystem::path target = std::filesystem::path(output_fileTOML.string()); - target /= file.filename(); - target.replace_extension("ibf"); - - IBF_Build_Params build_IBF = { target, file, false, false, k, t, f, 0, true }; - buildIBF_(build_IBF); - target_holder.emplace_back(std::move(target)); - } - + toml::value basecaller = toml::find(this->configuration_, "Basecaller"); + Basecaller_Parsed.caller = toml::find_or(basecaller, "caller", "DeepNano"); + Basecaller_Parsed.guppy_host = toml::find(basecaller, "host"); + Basecaller_Parsed.guppy_port = toml::find_or(basecaller, "port", "5555"); + Basecaller_Parsed.basecall_threads = toml::find_or(basecaller, "threads", 3); + //Basecaller_Parsed.guppy_config = toml::find_or(basecaller, "config", "dna_r9.4.1_450bps_fast"); } - - for (std::filesystem::path file : deplete_files_) + catch (std::out_of_range& e) { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); - } - - if (configReader::filterException(file)) - { - deplete_holder.emplace_back(std::move(file));// If ibf then the target in the same dir - } - else - { - - // TODO: write in log file - std::cout << "The deplete file is a fasta file, start building ibf ......." << '\n'; - - - std::filesystem::path deplete = std::filesystem::path(output_fileTOML); - deplete /= file.filename(); - deplete.replace_extension("ibf"); - - IBF_Build_Params build_IBF = { deplete, file, false, false, k, t, f, 0, true }; - buildIBF_(build_IBF); - deplete_holder.emplace_back(std::move(deplete)); - } - + // TODO: write message in log file + throw ConfigReaderException(e.what()); } - classifyStruct = { deplete_holder, target_holder, read_files, output_fileTOML, false, false, 0.95 , e, t, l, m, false }; - - return classifyStruct; +} - }; +/** + * Call private methods to parse parameters from the different three moduls + * @throw ConfigReader::Exception + */ +void ConfigReader::parse(){ + ConfigReader::readIBF(); + ConfigReader::readMinKNOW(); + ConfigReader::readBasecaller(); + +} + +//@TODO /** * Parse parameters from toml file for live reads targeting * @param : Toml output file, usage, a list of target and deplete files * @return: Struct with the needed parameters for live target ((Targeted Sequencing)) */ -configReader::Target_Params configReader::targetReader(std::fstream& tomlOutput, std::string& usage, + +/* + + +ConfigReader::Target_Params ConfigReader::targetReader( std::string& usage, std::vector& target_files_, std::vector& deplete_files_) { @@ -426,7 +451,7 @@ configReader::Target_Params configReader::targetReader(std::fstream& tomlOutput, throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); } - if (configReader::filterException(file)) + if (ConfigReader::filterException(file)) { target_holder.emplace_back(std::move(file));// If ibf then the target in the same dir } @@ -456,7 +481,7 @@ configReader::Target_Params configReader::targetReader(std::fstream& tomlOutput, throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); } - if (configReader::filterException(file)) + if (ConfigReader::filterException(file)) { deplete_holder.emplace_back(std::move(file));// If ibf then the target in the same dir } @@ -487,149 +512,4 @@ configReader::Target_Params configReader::targetReader(std::fstream& tomlOutput, significance, e, false, false, false }; return targetStruct; -}; - -/** - * Parse parameters from toml file for live reads depletion -* @param : Toml output file, usage, a list of target and deplete files -* @return: Struct with the needed parameters for live deplete -*/ -/* -configReader::live_depletion_parser_ configReader::depleteReader(std::fstream& tomlOutput, std::string usage, - std::string target_files_, std::string deplete_files_) { - - - std::string target, deplete; - configReader::ibf_build_parser_ build_IBF; - configReader::live_depletion_parser_ depletionStruct; - - auto output_fileTOML = toml::find(this->toml, "output_directory"); - - const auto& IBF = toml::find(this->toml, "IBF"); - const auto& MinKNOW = toml::find(this->toml, "MinKNOW"); - const auto& basecaller = toml::find(this->toml, "Basecaller"); - - - const auto flowcell = toml::find(MinKNOW, "flowcell"); - const auto hostIP = toml::find(MinKNOW, "host"); - toml::value port_ = toml::get(MinKNOW).at("port"); - - std::string device = flowcell; - std::string MinKNOW_host = hostIP; - int MinKNOW_port = toml::get(port_); - - - toml::value threads = toml::get(IBF).at("threads"); - double significance = 0.95; - double error_rate = toml::find(IBF, "exp_seq_error_rate"); - - const auto weights_ = "48"; - std::string weights = weights_; - int classifyThreads = toml::get(threads); - - const auto caller_ = toml::find(basecaller, "caller"); - toml::value basecaller_threads = toml::get(basecaller).at("threads"); - const auto hostCaller_ = toml::find(basecaller, "host"); - toml::value portBasecaller_ = toml::get(basecaller).at("port"); - - int basecallThreads = toml::get(basecaller_threads); - std::string caller = caller_; - std::string hostCaller = hostCaller_; - int portBasecaller = toml::get(portBasecaller_); - - - std::string target_holder, deplete_holder; - std::stringstream s_stream_1(deplete_files_); - - while (s_stream_1.good()) { - - std::string substr1; - getline(s_stream_1, substr1, ','); - - if (substr1.length() > 1) - { - if (configReader::filterException(substr1)) { - - deplete = deplete_files_ + ","; - deplete_holder += deplete;// if ibf then the deplete file.ibf is in the same dir - - } - else if (!configReader::filterException(substr1)) { - - std::cout << "The deplete file is an fasta file, start building ibf ......." << '\n'; - - deplete = output_fileTOML + substr1 + "_.ibf"; - deplete_holder = deplete_holder + deplete + ","; - - toml::value kmerS = toml::get(IBF).at("kmer_size"); - toml::value fragment_size = toml::get(IBF).at("fragment_size"); - // Load parameters to build IBF from given files - int f = toml::get(fragment_size); - int k = toml::get(kmerS); - - build_IBF = { deplete, substr1, false, false, k, classifyThreads, f, 0, true }; - - buildIBF_(build_IBF); - - } - } - - else { - - std::cout << "No deplete file found! " << '\n'; - std::cout << " " << '\n'; - deplete_holder = ","; - } - } - - - std::stringstream s_stream(target_files_); - while (s_stream.good()) { - - std::string substr; - getline(s_stream, substr, ','); - - if (substr.length() > 1) - { - if (configReader::filterException(substr)) { - - target = target_files_ + ","; - target_holder += target; - } - else if (!configReader::filterException(substr)) { - - std::cout << "The target file is an fasta file, start building ibf ......." << '\n'; - - target = output_fileTOML + substr + "_.ibf"; - target_holder = target_holder + target + ","; - - toml::value kmerS = toml::get(IBF).at("kmer_size"); - toml::value fragment_size = toml::get(IBF).at("fragment_size"); - - int f = toml::get(fragment_size); - int k = toml::get(kmerS); - - build_IBF = { target, substr, false, false, k, classifyThreads, f, 0, true }; - - buildIBF_(build_IBF); - - } - } - - else { - - std::cout << "No target file found! " << '\n'; - std::cout << " " << '\n'; - target_holder = ","; - } - } - - - deplete_holder.pop_back(); - target_holder.pop_back(); - - depletionStruct = { MinKNOW_host, device, deplete_holder, target_holder, weights, MinKNOW_port, basecallThreads, classifyThreads, significance, error_rate, false, false, false }; - - return depletionStruct; -}; -*/ +};*/ \ No newline at end of file diff --git a/src/config/configReader.hpp b/src/config/configReader.hpp index 75fccfe..c898d92 100644 --- a/src/config/configReader.hpp +++ b/src/config/configReader.hpp @@ -42,41 +42,16 @@ extern "C" }; -class configReader { +class ConfigReader { public: - // Define many structs as Lyre to keep the reproducibility of Lyra command line - struct IBF_Build_Params - { - std::filesystem::path bloom_filter_output_path{ }; - std::filesystem::path reference_file{}; - bool command = false; - bool show_help = false; - int size_k = 13; - int threads = 1; - int fragment_size = 100000; - int filter_size = 0; - bool verbose = false; - }; - - struct Classify_Params - { - std::vector ibf_deplete_files{ }; - std::vector ibf_target_files{ }; - std::vector read_files{}; - std::filesystem::path out_dir{}; - bool command = false; - bool show_help = false; - double kmer_significance = 0.95; - double error_rate = 0.1; - int threads = 1; - int preLen = 360; - int max_chunks = 1; - bool verbose = false; - }; - - struct Target_Params + toml::basic_value configuration_ {}; + std::filesystem::path output_dir{}; + std::filesystem::path log_dir{}; + std::string usage; + + struct Target_Params// TODO { std::string host = "127.0.0.1"; std::string device{}; @@ -95,41 +70,56 @@ class configReader { bool command = false; bool show_help = false; bool verbose = false; - uint8_t minChannel = 1; - uint8_t maxChannel = 512; + uint16_t minChannel = 1; + uint16_t maxChannel = 512; }; - configReader(std::string const); - - std::string usage( ); - - std::fstream writeTOML(); - - bool filterException(std::filesystem::path& file); + struct IBF_Params + { + int size_k = 13; + int fragment_size = 100000; + int threads = 1; + std::vector target_files{}; + std::vector deplete_files{}; + std::vector read_files{}; + double error_rate = 0.1; + int chunk_length = 360; + int max_chunks = 1; + }IBF_Parsed; - IBF_Build_Params ibfReader(std::fstream& tomlOutput, std::string& usage, - std::vector& target_files_, - std::vector& deplete_files_); + struct MinKNOW_Params + { + std::string host = "127.0.0.1"; + std::string port = "9501"; + std::string flowcell{}; + uint8_t minChannel = 1; + uint8_t maxChannel = 512; + }MinKNOW_Parsed; - void buildIBF_(IBF_Build_Params& parser); + struct Basecaller_Params + { + std::string caller = "DeepNano"; + std::string guppy_host = "127.0.0.1"; + std::string guppy_port = "5555"; + int basecall_threads = 3; + std::string guppy_config = "dna_r9.4.1_450bps_fast"; + }Basecaller_Parsed; - Classify_Params classifyReader(std::fstream& tomlOutput, std::string& usage, - std::vector& target_files_, - std::vector& deplete_files_); + ConfigReader(std::string const); - //live_depletion_parser_ depleteReader(std::fstream& tomlOutput, std::string usage, std::string target_files_, std::string deplete_files_); + void parse_general(); + bool filterException(std::filesystem::path& file); + void parse(); + void createLog(std::string& usage); - Target_Params targetReader(std::fstream& tomlOutput, std::string& usage, + /*Target_Params targetReader( std::string& usage, std::vector& target_files_, - std::vector& deplete_files_); - - - + std::vector& deplete_files_);*/ private: - std::string tomlFile; - toml::basic_value toml; + std::string tomlInputFile{}; + void readIBF(), readMinKNOW(), readBasecaller(); }; diff --git a/src/main/adaptive_sampling.hpp b/src/main/adaptive_sampling.hpp index dccb277..c592d25 100644 --- a/src/main/adaptive_sampling.hpp +++ b/src/main/adaptive_sampling.hpp @@ -512,7 +512,7 @@ void checkRunning(Runner& runner, readuntil::Acquisition* acq) * @parser: input from the command line * @throws: IBFBuildException */ -void adaptive_sampling(configReader::Target_Params& params) +void adaptive_sampling(ConfigReader::Target_Params& params) { std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); bool withTarget = false; diff --git a/src/main/classify.hpp b/src/main/classify.hpp index 54a7a45..d1707ca 100644 --- a/src/main/classify.hpp +++ b/src/main/classify.hpp @@ -58,13 +58,16 @@ std::vector split(const string& s, char delim) { * classify reads from an input file based on given depletion and/or target filters * @parser : command line input parameters */ -void classify_reads(configReader::Classify_Params& params) +//void classify_reads(ConfigReader::Classify_Params& params) +// Methods in main with vectorS +// void classify_reads(ConfigReader config, std::vector DepletionFilters{}, std::vector TargetFilters{}) +void classify_reads(ConfigReader config) { std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); // create classification config interleave::ClassifyConfig Conf{}; - for (std::filesystem::path read_file : params.read_files) + for (std::filesystem::path read_file : config.IBF_Parsed.read_files) { // initialize depletion and target filters std::vector DepletionFilters{}; @@ -74,60 +77,106 @@ void classify_reads(configReader::Classify_Params& params) bool target = false; // parse depletion IBF if given as parameter - for (std::filesystem::path deplete_file : params.ibf_deplete_files) + for (std::filesystem::path deplete_file : config.IBF_Parsed.deplete_files) { interleave::IBFMeta filter{}; filter.name = deplete_file.stem().string(); interleave::IBF tf{}; interleave::IBFConfig DepleteIBFconfig{}; - try - { - DepleteIBFconfig.input_filter_file = deplete_file.string(); - interleave::FilterStats stats = tf.load_filter(DepleteIBFconfig); - filter.filter = std::move(tf.getFilter()); - if (params.verbose) + + if (config.filterException(deplete_file)){ + try + { + DepleteIBFconfig.input_filter_file = deplete_file.string(); + interleave::FilterStats stats = tf.load_filter(DepleteIBFconfig); + filter.filter = std::move(tf.getFilter()); interleave::print_load_stats(stats); - deplete = true; + deplete = true; + } + catch (interleave::ParseIBFFileException& e) + { + nanolive_logger->error("Error parsing depletion IBF using the following parameters"); + nanolive_logger->error("Depletion IBF file : " + deplete_file.string()); + nanolive_logger->error("Error message : " + std::string(e.what())); + nanolive_logger->flush(); + throw; + } + + DepletionFilters.emplace_back(std::move(filter)); } - catch (interleave::ParseIBFFileException& e) + + else { - nanolive_logger->error("Error parsing depletion IBF using the following parameters"); - nanolive_logger->error("Depletion IBF file : " + deplete_file.string()); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->flush(); - throw; - } + try + { + ibf_build_parser params; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= deplete_file.filename(); + out.replace_extension("ibf"); + params = { out, deplete_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + //tf = buildIBF(params); + filter.filter = buildIBF(params); + } + catch (std::out_of_range& e) + { + throw ConfigReaderException(e.what()); + } DepletionFilters.emplace_back(std::move(filter)); - } + } + } - // parse target IBF if given as parameter - for (std::filesystem::path target_file : params.ibf_target_files) - { - interleave::IBFMeta filter{}; - filter.name = target_file.stem().string(); - interleave::IBF tf{}; - interleave::IBFConfig TargetIBFconfig{}; + // parse target IBF if given as parameter + for (std::filesystem::path target_file : config.IBF_Parsed.target_files) + { + interleave::IBFMeta filter{}; + filter.name = target_file.stem().string(); + interleave::IBF tf{}; + interleave::IBFConfig TargetIBFconfig{}; + if (config.filterException(target_file)){ try { TargetIBFconfig.input_filter_file = target_file.string(); interleave::FilterStats stats = tf.load_filter(TargetIBFconfig); filter.filter = std::move(tf.getFilter()); - if (params.verbose) - interleave::print_load_stats(stats); + interleave::print_load_stats(stats); target = true; } catch (interleave::ParseIBFFileException& e) + { + nanolive_logger->error("Error building IBF for target file using the following parameters"); + nanolive_logger->error("Depletion IBF file : " + target_file.string()); + nanolive_logger->error("Error message : " + std::string(e.what())); + nanolive_logger->flush(); + throw; + } + + TargetFilters.emplace_back(std::move(filter)); + } + + else + { + try { - nanolive_logger->error("Error parsing target IBF using the following parameters"); - nanolive_logger->error("Target IBF file : " + target_file.string()); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->flush(); - throw; + ibf_build_parser params; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= target_file.filename(); + out.replace_extension("ibf"); + params = { out, target_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + //tf = buildIBF(params); + filter.filter = buildIBF(params); + } + + catch (std::out_of_range& e) + { + throw ConfigReaderException(e.what()); } TargetFilters.emplace_back(std::move(filter)); - } + } + } + + // parse input reads //interleave::TReads reads; @@ -135,8 +184,9 @@ void classify_reads(configReader::Classify_Params& params) Conf.strata_filter = -1; - Conf.significance = params.kmer_significance; - Conf.error_rate = params.error_rate; + //Conf.significance = params.kmer_significance; + Conf.significance = 0.95; + Conf.error_rate =config.IBF_Parsed.error_rate; uint64_t found = 0; uint16_t failed = 0; @@ -155,7 +205,7 @@ void classify_reads(configReader::Classify_Params& params) std::vector< std::ofstream> targetFastas{}; for (interleave::IBFMeta f : TargetFilters) { - std::filesystem::path outfile(params.out_dir); + std::filesystem::path outfile(config.output_dir); outfile /= f.name + ".fasta"; std::ofstream outf; outf.open(outfile, std::ios::out); @@ -169,7 +219,7 @@ void classify_reads(configReader::Classify_Params& params) targetFastas.emplace_back(std::move(outf)); } - std::filesystem::path outfile(params.out_dir); + std::filesystem::path outfile(config.output_dir); outfile /= "unclassified.fasta"; if (!seqan::open(UnclassifiedOut, seqan::toCString(outfile.string()))) { @@ -206,8 +256,7 @@ void classify_reads(configReader::Classify_Params& params) } // read length has to be at least the size of the prefix used for read classification - if (seqan::length(seq) < params.preLen) - { + if (seqan::length(seq) < config.IBF_Parsed.chunk_length) { too_short++; continue; } @@ -222,10 +271,10 @@ void classify_reads(configReader::Classify_Params& params) // as long as rea uint8_t i = 0; // try to classify read parser.max_chunks times - while (i < params.max_chunks) + while (i < config.IBF_Parsed.max_chunks) { - uint64_t fragend = (i+1) * params.preLen; - uint64_t fragstart = i * params.preLen; + uint64_t fragend = (i+1) * config.IBF_Parsed.chunk_length; + uint64_t fragstart = i * config.IBF_Parsed.chunk_length; // make sure that last fragment ends at last position of the reference sequence if (fragend > length(seq)) fragend = length(seq); seqan::Infix< seqan::CharString >::Type fragment = seqan::infix(seq, fragstart, fragend); @@ -335,7 +384,7 @@ void classify_reads(configReader::Classify_Params& params) sstr << "Number of classified reads : " << found; nanolive_logger->info(sstr.str()); sstr.str(""); - sstr << "Number of of too short reads (len < " << params.preLen << ") : " << too_short; + sstr << "Number of of too short reads (len < " << config.IBF_Parsed.chunk_length << ") : " << too_short; nanolive_logger->info(sstr.str()); sstr.str(""); sstr << "Number of all reads : " << readCounter; @@ -359,7 +408,7 @@ void classify_reads(configReader::Classify_Params& params) std::stringstream sstr; std::cout << "------------------------------- Final Results -------------------------------" << std::endl; std::cout << "Number of classified reads : " << found << std::endl; - std::cout << "Number of of too short reads (len < " << params.preLen << ") : " << too_short << std::endl; + std::cout << "Number of of too short reads (len < " << config.IBF_Parsed.chunk_length << ") : " << too_short << std::endl; std::cout << "Number of all reads : " << readCounter << std::endl; for (interleave::IBFMeta f : TargetFilters) diff --git a/src/main/ibfbuild.hpp b/src/main/ibfbuild.hpp index e0af87d..c19b936 100644 --- a/src/main/ibfbuild.hpp +++ b/src/main/ibfbuild.hpp @@ -10,7 +10,7 @@ * @parser : input from the command line for "build" command * @throws : IBFBuildException */ -void buildIBF(ibf_build_parser & parser) +interleave::TIbf buildIBF(ibf_build_parser & parser) { std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); interleave::IBFConfig config{}; @@ -24,6 +24,7 @@ void buildIBF(ibf_build_parser & parser) config.verbose = parser.verbose; interleave::IBF filter{}; + //interleave::TIbf filter_out; try { interleave::FilterStats stats = filter.create_filter(config); @@ -44,4 +45,6 @@ void buildIBF(ibf_build_parser & parser) throw; } + return filter.getFilter(); + } \ No newline at end of file diff --git a/src/main/main.cpp b/src/main/main.cpp index 449a183..0b9075f 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -92,25 +92,26 @@ void fill_action_queue(SafeQueue& signal_queue, * core function for testing connection to MinKNOW software and testing unblock all reads * @parser : input from the command line */ -void test_connection(connection_test_parser& parser) +void test_connection(ConfigReader config) { + std::cout << "Trying to connect to MinKNOW" << std::endl; - std::cout << "Host : " << parser.host << std::endl; - std::cout << "Port : " << parser.port << std::endl; + std::cout << "Host : " << config.MinKNOW_Parsed.host << std::endl; + std::cout << "Port : " << config.MinKNOW_Parsed.port << std::endl; std::stringstream sstr; - sstr << "Port : " << parser.port; + sstr << "Port : " << config.MinKNOW_Parsed.port; // create ReadUntilClient object and connect to specified device readuntil::ReadUntilClient& client = readuntil::ReadUntilClient::getClient(); - client.setHost(parser.host); - client.setPort(parser.port); + client.setHost(config.MinKNOW_Parsed.host); + client.setPort(config.MinKNOW_Parsed.port); client.setRootPath(NanoLiveRoot); // TODO: throw exception if connection could not be established try { - if (client.connect(parser.device)) + if (client.connect(config.MinKNOW_Parsed.flowcell)) { std::cout << "Connection successfully established!" << std::endl; std::cout << "You can start live-depletion using these settings." << std::endl; @@ -130,24 +131,24 @@ void test_connection(connection_test_parser& parser) throw; } - + bool unblock_all = false;// as default and no changes in toml file! - if (parser.unblock_all) + if (unblock_all) { readuntil::AnalysisConfiguration* an_conf = (readuntil::AnalysisConfiguration*)client.getMinKnowService(readuntil::MinKnowServiceType::ANALYSIS_CONFIGURATION); an_conf->set_break_reads_after_seconds(0.4); // wait until sequencing run has been started - if (parser.verbose) - std::cout << "Waiting for device to start sequencing!" << ::std::endl; + //if (parser.verbose) + std::cout << "Waiting for device to start sequencing!" << ::std::endl; std::cout << "Please start the sequencing run now!" << ::std::endl; readuntil::Acquisition* acq = (readuntil::Acquisition*)client.getMinKnowService(readuntil::MinKnowServiceType::ACQUISITION); if (acq->hasStarted()) { - if (parser.verbose) - std::cout << "Sequencing has begun. Starting live signal processing!" << ::std::endl; + //if (parser.verbose) + std::cout << "Sequencing has begun. Starting live signal processing!" << ::std::endl; nanolive_logger->info("Sequencing has begun. Starting live signal processing!"); nanolive_logger->flush(); @@ -171,7 +172,7 @@ void test_connection(connection_test_parser& parser) } catch (readuntil::DataServiceException& e) { - nanolive_logger->error("Could not start streaming signals from device (" + parser.device + ")"); + nanolive_logger->error("Could not start streaming signals from device (" + config.MinKNOW_Parsed.flowcell + ")"); nanolive_logger->error("Error message : " + std::string(e.what())); nanolive_logger->flush(); throw; @@ -189,11 +190,8 @@ void test_connection(connection_test_parser& parser) // start live signal streaming from ONT MinKNOW std::vector< std::future< void > > tasks; - if (parser.verbose) - { - std::cout << "Start receiving live signals thread" << std::endl; - std::cout << "Start sending unblock messages thread" << std::endl; - } + std::cout << "Start receiving live signals thread" << std::endl; + std::cout << "Start sending unblock messages thread" << std::endl; // create thread for receiving signals from MinKNOW @@ -231,32 +229,23 @@ void signalHandler(int signum) /** * setup global Logger for ReadBouncer */ -void initializeLogger(const std::string& toml_file) -{ - std::ifstream tomlFileReadBouncer(toml_file, std::ios_base::binary); - + +void initializeLogger(ConfigReader config) +{ try { - const toml::value configuration_ = toml::parse(tomlFileReadBouncer, /*optional -> */ toml_file); - std::filesystem::path log_file = toml::find(configuration_, "log_directory"); - - log_file = log_file.make_preferred(); - - if (!std::filesystem::is_directory(log_file) || !std::filesystem::exists(log_file)) - { - std::filesystem::create_directories(log_file); - } - log_file /= "ReadBouncerLog.txt"; - nanolive_logger = spdlog::rotating_logger_mt("ReadBouncerLog", log_file.string() , 1048576 * 5, 100); + readuntil::CSVFile = std::filesystem::path(config.log_dir); + interleave::InterleavedBloomFilterLog = std::filesystem::path(config.log_dir); + interleave::IbfClassificationLog = std::filesystem::path(config.log_dir); + readuntil::ReadUntilClientLog = std::filesystem::path(config.log_dir); + std::filesystem::path ReadBouncerLog (config.log_dir); + + ReadBouncerLog /= "ReadBouncerLog.txt"; + nanolive_logger = spdlog::rotating_logger_mt("ReadBouncerLog", ReadBouncerLog.string() , 1048576 * 5, 100); nanolive_logger->set_level(spdlog::level::debug); } - catch (const toml::exception& e) - { - std::cerr << "Could not parse " << toml_file << std::endl; - std::cerr << e.what() << std::endl; - std::cerr << "Please check the correct syntax of the TOML file in the ReadBouncer User Guide!" << std::endl; - } + catch (const spdlog::spdlog_ex& e) { std::cerr << "Log initialization failed: " << e.what() << std::endl; @@ -311,110 +300,101 @@ double cputime() } #endif -/** -* Set the configuration structs for classify/target -* @param : config object, Toml output file, usage, a list of target and deplete files -*/ - -void inline configurationReader(configReader config, std::string const tomlFile, std::string subcommand, std::fstream& tomlOutput, - std::vector& target_files_, - std::vector& deplete_files_) -{ - - toml::value target_files(toml::array{}); - for (std::filesystem::path file : target_files_) - target_files.push_back(file.string()); - - toml::value deplete_files(toml::array{}); - for (std::filesystem::path file : deplete_files_) - deplete_files.push_back(file.string()); - - if (subcommand == "build") { - - configReader::IBF_Build_Params struct_ = config.ibfReader(tomlOutput, subcommand, target_files_, deplete_files_); - - // Create a log of toml usage - auto tbl = toml::value{ { - {subcommand, toml::table{{ - { "target_files", target_files }, - { "deplete_files", deplete_files }, - { "kmer-size", struct_.size_k }, - { "threads", struct_.threads }, - { "fragment-size", struct_.fragment_size } - }} - }, - } }; +// [Error] +/* +/usr/include/c++/9/bits/stl_uninitialized.h: In instantiation of ‘_ForwardIterator std::uninitialized_copy(_InputIterator, _InputIterator, _ForwardIterator) [with _InputIterator = __gnu_cxx::__normal_iterator >; _ForwardIterator = interleave::IBFMeta*]’: +/usr/include/c++/9/bits/stl_uninitialized.h:307:37: required from ‘_ForwardIterator std::__uninitialized_copy_a(_InputIterator, _InputIterator, _ForwardIterator, std::allocator<_Tp>&) [with _InputIterator = __gnu_cxx::__normal_iterator >; _ForwardIterator = interleave::IBFMeta*; _Tp = interleave::IBFMeta]’ +/usr/include/c++/9/bits/stl_vector.h:555:31: required from ‘std::vector<_Tp, _Alloc>::vector(const std::vector<_Tp, _Alloc>&) [with _Tp = interleave::IBFMeta; _Alloc = std::allocator]’ +/mnt/c/bug29/ReadBouncer/src/main/classify.hpp:66:76: required from here +/usr/include/c++/9/bits/stl_uninitialized.h:127:72: error: static assertion failed: result type must be constructible from value type of input range + 127 | static_assert(is_constructible<_ValueType2, decltype(*__first)>::value, + | ^~~~~ - // chrono: https://en.cppreference.com/w/cpp/chrono - auto start = std::chrono::system_clock::now(); - auto end = std::chrono::system_clock::now(); +*/ +/*std::vector getIBF (ConfigReader config){ - std::chrono::duration elapsed_seconds = end - start; - std::time_t end_time = std::chrono::system_clock::to_time_t(end); + std::vector DepletionFilters{}; + std::vector TargetFilters{}; - tomlOutput << "# Computation time: " << std::ctime(&end_time) << '\n'; + return DepletionFilters; +}*/ - tomlOutput << toml::format(tbl) << '\n'; +void run_program(ConfigReader config){ - tomlOutput.close(); - } - else if (subcommand == "classify") { + config.parse(); // parse all params from the different Moduls (one time parse and stores in struct) + std::string subcommand = config.usage; - configReader::Classify_Params struct_{}; - try + if (subcommand == "build") { + + ibf_build_parser params; + config.createLog(config.usage); + + for (std::filesystem::path file : config.IBF_Parsed.target_files) { - struct_ = config.classifyReader(tomlOutput, subcommand, target_files_, deplete_files_); + if (!std::filesystem::exists(file)) + { + // TODO: write message in log file + throw ConfigReader("[Error] The following target file does not exist: " + file.string()); + } + + if (!config.filterException(file)) + { + // TODO: write in log file + std::cout << "The target file: " << file.filename() << " is a fasta file, start building ibf ......." << '\n'; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= file.filename(); + out.replace_extension("ibf"); + + params = { out, file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + buildIBF(params); + std::cout <<'\n'; + } } - catch (ConfigReaderException& e) + + for (std::filesystem::path file : config.IBF_Parsed.deplete_files) { - std::cerr << "Error in reading TOML configuration file!" << std::endl; - std::cerr << e.what() << std::endl; - throw; + if (!std::filesystem::exists(file)) + { + // TODO: write message in log file + throw ConfigReader("[Error] The following target file does not exist: " + file.string()); + } + + if (!config.filterException(file)) + { + // TODO: write in log file + std::cout << "The deplete file: " << file.filename() << " is a fasta file, start building ibf ......." << '\n'; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= file.filename(); + out.replace_extension("ibf"); + + params = { out, file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + buildIBF(params); + std::cout <<'\n'; + } } - classify_reads(struct_); - toml::value read_files(toml::array{}); - for (std::filesystem::path file : struct_.read_files) - read_files.push_back(file.string()); - - toml::value tbl = toml::value{ { - - {subcommand, toml::table{{ - { "deplete_files", deplete_files }, - { "target_files", target_files }, - { "read_files", read_files}, - { "exp_seq_error_rate", struct_.kmer_significance }, - { "threads", struct_.threads }, - { "chunk_length", struct_.preLen }, - { "max_chunks", struct_.max_chunks } - - }} - }, - } }; - - // chrono: https://en.cppreference.com/w/cpp/chrono - auto start = std::chrono::system_clock::now(); - auto end = std::chrono::system_clock::now(); - - std::chrono::duration elapsed_seconds = end - start; - std::time_t end_time = std::chrono::system_clock::to_time_t(end); - - tomlOutput << "# Computation time: " << std::ctime(&end_time) << '\n'; - - tomlOutput << toml::format(tbl) << '\n'; +} + + else if (subcommand == "classify") { - tomlOutput.close(); + config.createLog(config.usage); + classify_reads(config); + } else if (subcommand == "target") { - configReader::Target_Params struct_{}; + //config.createLog(config.usage); + } + + /* + ConfigReader::Target_Params struct_{}; try { - struct_ = config.targetReader(tomlOutput, subcommand, target_files_, deplete_files_); + struct_ = config.targetReader(subcommand, target_files, deplete_files); } catch (ConfigReaderException& e) { @@ -422,43 +402,10 @@ void inline configurationReader(configReader config, std::string const tomlFile, std::cerr << e.what() << std::endl; throw; } + */ - //connection_test_parser cT = { struct_.host, struct_.device, struct_.port, false, false, true, false }; - //test_connection(cT); - - auto tbl1 = toml::value{ { - {subcommand, toml::table{{ - { "flowcell", struct_.device }, - { "host-ip ", struct_.host }, - { "port", struct_.port }, - { "minChannel", struct_.minChannel}, - { "maxChannel", struct_.maxChannel}, - { "depletion-files", deplete_files }, - { "target-files", target_files }, - { "significance", struct_.kmer_significance }, - { "error-rate", struct_.error_rate }, - { "basecall-threads", struct_.basecall_threads }, - { "classification-th", struct_.classify_threads }, - { "caller", struct_.caller }, - { "guppy_host", struct_.guppy_host }, - { "guppy_port", struct_.guppy_port }, - { "guppy_config", struct_.guppy_config } - }} - }, - } }; - - // chrono: https://en.cppreference.com/w/cpp/chrono - auto start = std::chrono::system_clock::now(); - auto end = std::chrono::system_clock::now(); - - std::chrono::duration elapsed_seconds = end - start; - std::time_t end_time = std::chrono::system_clock::to_time_t(end); - - tomlOutput << "# Computation date: " << std::ctime(&end_time) << '\n'; - tomlOutput << toml::format(tbl1) << '\n'; - tomlOutput.close(); - try + /*try { adaptive_sampling(struct_); } @@ -468,11 +415,32 @@ void inline configurationReader(configReader config, std::string const tomlFile, return; } - } + }*/ + else if( subcommand == "test") { -} + try + { + config.createLog(config.usage); + test_connection(config); + } + + catch(std::exception& e) + { + std::cerr << e.what() << std::endl; + return; + } + + + } + + else{ + + std::cerr << "Please define one of the usages: [build, target, classify, test]" << '\n'; + exit(0); + } +} int main(int argc, char const **argv) { @@ -485,84 +453,11 @@ int main(int argc, char const **argv) NanoLiveRoot = binPath.substr(0, binPath.find("bin")); std::string const tomlFile = argv[1]; - initializeLogger(tomlFile); - std::ifstream tomlFileReadBouncer(tomlFile, std::ios_base::binary); - toml::value configuration_{}; - std::filesystem::path log_file{}; - std::filesystem::path output_fileTOML{}; - try - { - configuration_ = toml::parse(tomlFileReadBouncer, /*optional -> */ tomlFile); - log_file = toml::find(configuration_, "log_directory"); - log_file = log_file.make_preferred(); - output_fileTOML = toml::find(configuration_, "output_directory"); - output_fileTOML = output_fileTOML.make_preferred(); - } - catch (toml::exception& e) - { - std::cerr << "Could not parse " << tomlFile << std::endl; - std::cerr << e.what() << std::endl; - return 1; - } - catch (std::out_of_range& e) - { - std::cerr << "Error in " << tomlFile << std::endl; - std::cerr << e.what() << std::endl; - - return 1; - } - - if (!std::filesystem::is_directory(output_fileTOML) || !std::filesystem::exists(output_fileTOML)) - { - std::filesystem::create_directories(output_fileTOML); - } - - configReader config(tomlFile); - - //log files - readuntil::CSVFile = std::filesystem::path(output_fileTOML); - interleave::InterleavedBloomFilterLog = std::filesystem::path(log_file); - interleave::IbfClassificationLog = std::filesystem::path(log_file); - readuntil::ReadUntilClientLog = std::filesystem::path(log_file); - - std::string subcommand = config.usage(); - std::fstream tomlOutput = config.writeTOML(); - - if (subcommand.length() > 1) { - - std::cout << "The usage is: " << subcommand << '\n'; - std::cout << "\n"; - } + ConfigReader config(tomlFile); + config.parse_general(); - else { - - std::cerr << "No usage found in config.TOML file\nPlease define one of the usages: [build, target, classify]" << '\n'; - exit(0); - } - - std::vector target_files{}; - std::vector deplete_files{}; - try - { - const toml::value& IBF = toml::find(configuration_, "IBF"); - std::vector tmp = toml::find>(IBF, "target_files"); - for (std::string s : tmp) - target_files.emplace_back((std::filesystem::path(s)).make_preferred()); - tmp.clear(); - tmp = toml::find>(IBF, "deplete_files"); - for (std::string s : tmp) - deplete_files.emplace_back((std::filesystem::path(s)).make_preferred()); - } - catch (std::out_of_range& e) - { - std::cerr << "Error in " << tomlFile << std::endl; - std::cerr << e.what() << std::endl; - - return 1; - } - - - configurationReader(config, tomlFile, subcommand, tomlOutput, target_files, deplete_files); + initializeLogger(config); + run_program(config); NanoLiveTime.stop(); From 53c0fa057e203959cb744a9351e4bfa109f83157 Mon Sep 17 00:00:00 2001 From: lutfia95 Date: Mon, 31 Jan 2022 15:39:20 +0100 Subject: [PATCH 2/8] prepare for windows test --- config.toml | 2 +- src/main/adaptive_sampling.hpp | 160 ++++++++++----------------------- src/main/classify.hpp | 119 ++++-------------------- src/main/main.cpp | 141 +++++++++++++++++++++++++---- 4 files changed, 191 insertions(+), 231 deletions(-) diff --git a/config.toml b/config.toml index e083470..7c7387f 100644 --- a/config.toml +++ b/config.toml @@ -1,4 +1,4 @@ -usage = "classify" #["build", "target", "classify", "test"] +usage = "target" #["build", "target", "classify", "test"] output_directory = 'RB_out' log_directory = 'RB_out/log' diff --git a/src/main/adaptive_sampling.hpp b/src/main/adaptive_sampling.hpp index c592d25..c30a4e3 100644 --- a/src/main/adaptive_sampling.hpp +++ b/src/main/adaptive_sampling.hpp @@ -512,16 +512,16 @@ void checkRunning(Runner& runner, readuntil::Acquisition* acq) * @parser: input from the command line * @throws: IBFBuildException */ -void adaptive_sampling(ConfigReader::Target_Params& params) +void adaptive_sampling(ConfigReader config, std::vector DepletionFilters, std::vector TargetFilters) { std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); bool withTarget = false; #if !defined(ARM_BUILD) // first check if basecalling file exists std::filesystem::path weights_file = NanoLiveRoot; - weights_file.append("data"); - weights_file /= "rnn48.txt"; - //weights_file = "rnn48.txt"; + //weights_file.append("data"); + //weights_file /= "rnn48.txt"; + weights_file = "rnn48.txt"; if (!std::filesystem::exists(weights_file)) { nanolive_logger->error("Could not find DeepNano weights file : " + weights_file.string()); @@ -531,81 +531,16 @@ void adaptive_sampling(ConfigReader::Target_Params& params) throw basecall::BasecallerException(estr.str()); } #endif - std::vector DepletionFilters{}; - std::vector TargetFilters{}; - // first load IBFs of host reference sequence - if (params.verbose) - std::cout << "Loading Depletion Interleaved Bloom Filter(s)!" << ::std::endl; - - for (std::filesystem::path deplete_file : params.ibf_deplete_files) - { - interleave::IBFMeta filter{}; - filter.name = deplete_file.stem().string(); - interleave::IBF tf{}; - interleave::IBFConfig DepleteIBFconfig{}; - try - { - DepleteIBFconfig.input_filter_file = deplete_file.string(); - interleave::FilterStats stats = std::move(tf.load_filter(DepleteIBFconfig)); - filter.filter = std::move(tf.getFilter()); - if (params.verbose) - interleave::print_load_stats(stats); - } - catch (interleave::ParseIBFFileException& e) - { - nanolive_logger->error("Error parsing depletion IBF using the following parameters"); - nanolive_logger->error("Depletion IBF file : " + deplete_file.string()); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->flush(); - throw; - } - - DepletionFilters.emplace_back(std::move(filter)); - } - - if (params.verbose) - std::cout << "Loading Target Interleaved Bloom Filter(s)!" << ::std::endl; - // parse target IBF if given as parameter - for (std::filesystem::path target_file : params.ibf_target_files) - { - interleave::IBFMeta filter{}; - filter.name = target_file.stem().string(); - interleave::IBF tf{}; - interleave::IBFConfig TargetIBFconfig{}; - try - { - TargetIBFconfig.input_filter_file = target_file.string(); - interleave::FilterStats stats = std::move(tf.load_filter(TargetIBFconfig)); - filter.filter = std::move(tf.getFilter()); - if (params.verbose) - interleave::print_load_stats(stats); - } - catch (interleave::ParseIBFFileException& e) - { - nanolive_logger->error("Error parsing target IBF using the following parameters"); - nanolive_logger->error("Target IBF file : " + target_file.string()); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->flush(); - throw; - } - - TargetFilters.emplace_back(std::move(filter)); - } + std::cout << "Trying to connect to MinKNOW" << std::endl; + std::cout << "Host : " << config.MinKNOW_Parsed.host << std::endl; + std::cout << "Port : " << config.MinKNOW_Parsed.port << std::endl; - if (params.verbose) - { - std::cout << "Successfully loaded Interleaved Bloom Filter(s)!" << ::std::endl; - std::cout << "Trying to connect to MinKNOW" << std::endl; - std::cout << "Host : " << params.host << std::endl; - std::cout << "Port : " << params.port << std::endl; - } - - nanolive_logger->info("Successfully loaded Interleaved Bloom Filter(s)!"); + //nanolive_logger->info("Successfully loaded Interleaved Bloom Filter(s)!"); nanolive_logger->info("Trying to connect to MinKNOW"); - nanolive_logger->info("Host : " + params.host); + nanolive_logger->info("Host : " + config.MinKNOW_Parsed.host); std::stringstream sstr; - sstr << "Port : " << params.port; + sstr << "Port : " << config.MinKNOW_Parsed.port; nanolive_logger->info(sstr.str()); nanolive_logger->flush(); @@ -613,31 +548,31 @@ void adaptive_sampling(ConfigReader::Target_Params& params) // create ReadUntilClient object and connect to specified device readuntil::ReadUntilClient& client = readuntil::ReadUntilClient::getClient(); - client.setHost(params.host); - client.setPort(params.port); + client.setHost(config.MinKNOW_Parsed.host); + client.setPort(config.MinKNOW_Parsed.port); client.setRootPath(NanoLiveRoot); // TODO: throw exception if connection could not be established - if (client.connect(params.device)) + if (client.connect(config.MinKNOW_Parsed.flowcell)) { - if (params.verbose) - std::cout << "Connection successfully established!" << ::std::endl; - else - { - nanolive_logger->info("Connection successfully established!"); - nanolive_logger->flush(); - } + //if (params.verbose) + std::cout << "Connection successfully established!" << ::std::endl; + //else + //{ + nanolive_logger->info("Connection successfully established!"); + nanolive_logger->flush(); + //} } else { std::cerr << "Could not establish connection to MinKNOW or MinION device" << std::endl; - nanolive_logger->error("Could not establish connection to MinKNOW or MinION device (" + params.device + ")"); + nanolive_logger->error("Could not establish connection to MinKNOW or MinION device (" + config.MinKNOW_Parsed.flowcell + ")"); nanolive_logger->flush(); } // wait until sequencing run has been started - if (params.verbose) - std::cout << "Waiting for device to start sequencing!" << ::std::endl; + //if (params.verbose) + std::cout << "Waiting for device to start sequencing!" << ::std::endl; std::cout << "Please start the sequencing run now!" << ::std::endl; @@ -645,8 +580,8 @@ void adaptive_sampling(ConfigReader::Target_Params& params) if (runner.isRunning = acq->hasStarted()) { - if (params.verbose) - std::cout << "Sequencing has begun. Starting live signal processing!" << ::std::endl; + //if (params.verbose) + std::cout << "Sequencing has begun. Starting live signal processing!" << ::std::endl; nanolive_logger->info("Sequencing has begun. Starting live signal processing!"); nanolive_logger->flush(); @@ -657,25 +592,25 @@ void adaptive_sampling(ConfigReader::Target_Params& params) // seems to be overturned by TOML file configuration readuntil::AnalysisConfiguration* ana_conf = (readuntil::AnalysisConfiguration*)client.getMinKnowService(readuntil::MinKnowServiceType::ANALYSIS_CONFIGURATION); ana_conf->set_break_reads_after_seconds(0.4); - if (params.verbose) - { - nanolive_logger->info("Set break_reads_after_seconds = 0.4"); - nanolive_logger->flush(); - } + //if (params.verbose) + //{ + nanolive_logger->info("Set break_reads_after_seconds = 0.4"); + nanolive_logger->flush(); + //} //setup basecalling basecall::Basecaller* caller; #if defined(_WIN32) - if (stricmp(params.caller.c_str(), "guppy") == 0) + if (stricmp(config.Basecaller_Parsed.caller.c_str(), "guppy") == 0) #else - if (strcasecmp(params.caller.c_str(), "guppy") == 0) + if (strcasecmp(config.Basecaller_Parsed.caller.c_str(), "guppy") == 0) #endif { - std::string basecall_host = params.guppy_host + ":" + params.guppy_port; + std::string basecall_host = config.Basecaller_Parsed.guppy_host + ":" + config.Basecaller_Parsed.guppy_port; try { - caller = new basecall::GuppyBasecaller(basecall_host, params.guppy_config); + caller = new basecall::GuppyBasecaller(basecall_host, config.Basecaller_Parsed.guppy_config); } catch (basecall::BasecallerException& e) { @@ -687,14 +622,14 @@ void adaptive_sampling(ConfigReader::Target_Params& params) } #if !defined(ARM_BUILD) else - caller = new basecall::DeepNanoBasecaller(weights_file, params.basecall_threads); + caller = new basecall::DeepNanoBasecaller(weights_file, config.Basecaller_Parsed.basecall_threads); #endif // create Data Service object // used for streaming live nanopore signals from MinKNOW and sending action messages back data = (readuntil::Data*)client.getMinKnowService(readuntil::MinKnowServiceType::DATA); - data->setChannels(params.minChannel, params.maxChannel); + data->setChannels(config.MinKNOW_Parsed.minChannel, config.MinKNOW_Parsed.maxChannel); // start live streaming of data try { @@ -702,7 +637,7 @@ void adaptive_sampling(ConfigReader::Target_Params& params) } catch (readuntil::DataServiceException& e) { - nanolive_logger->error("Could not start streaming signals from device (" + params.device + ")"); + nanolive_logger->error("Could not start streaming signals from device (" + config.MinKNOW_Parsed.flowcell + ")"); nanolive_logger->error("Error message : " + std::string(e.what())); nanolive_logger->flush(); throw; @@ -728,13 +663,13 @@ void adaptive_sampling(ConfigReader::Target_Params& params) // start live signal streaming from ONT MinKNOW std::vector< std::future< void > > tasks; - if (params.verbose) - { - std::cout << "Start receiving live signals thread" << std::endl; - std::cout << "Start basecalling thread" << std::endl; - std::cout << "Start read classification thread" << std::endl; - std::cout << "Start sending unblock messages thread" << std::endl; - } + //if (params.verbose) + //{ + std::cout << "Start receiving live signals thread" << std::endl; + std::cout << "Start basecalling thread" << std::endl; + std::cout << "Start read classification thread" << std::endl; + std::cout << "Start sending unblock messages thread" << std::endl; + //} @@ -751,11 +686,12 @@ void adaptive_sampling(ConfigReader::Target_Params& params) // create classification config interleave::ClassifyConfig conf{}; conf.strata_filter = -1; - conf.significance = params.kmer_significance; - conf.error_rate = params.error_rate; + //conf.significance = params.kmer_significance; + conf.significance = 0.95; + conf.error_rate = config.IBF_Parsed.error_rate; // create thread/task for classification - for (uint8_t t = 0; t < params.classify_threads; ++t) + for (uint8_t t = 0; t < config.IBF_Parsed.threads; ++t) { tasks.emplace_back(std::async(std::launch::async, &classify_live_reads, std::ref(classification_queue), diff --git a/src/main/classify.hpp b/src/main/classify.hpp index d1707ca..b678c37 100644 --- a/src/main/classify.hpp +++ b/src/main/classify.hpp @@ -61,122 +61,34 @@ std::vector split(const string& s, char delim) { //void classify_reads(ConfigReader::Classify_Params& params) // Methods in main with vectorS // void classify_reads(ConfigReader config, std::vector DepletionFilters{}, std::vector TargetFilters{}) -void classify_reads(ConfigReader config) +void classify_reads(ConfigReader config, std::vector DepletionFilters, std::vector TargetFilters) { std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); // create classification config interleave::ClassifyConfig Conf{}; - for (std::filesystem::path read_file : config.IBF_Parsed.read_files) - { - // initialize depletion and target filters - std::vector DepletionFilters{}; - std::vector TargetFilters{}; - - bool deplete = false; - bool target = false; - - // parse depletion IBF if given as parameter - for (std::filesystem::path deplete_file : config.IBF_Parsed.deplete_files) - { - interleave::IBFMeta filter{}; - filter.name = deplete_file.stem().string(); - interleave::IBF tf{}; - interleave::IBFConfig DepleteIBFconfig{}; - - if (config.filterException(deplete_file)){ - try - { - DepleteIBFconfig.input_filter_file = deplete_file.string(); - interleave::FilterStats stats = tf.load_filter(DepleteIBFconfig); - filter.filter = std::move(tf.getFilter()); - interleave::print_load_stats(stats); - deplete = true; - } - catch (interleave::ParseIBFFileException& e) - { - nanolive_logger->error("Error parsing depletion IBF using the following parameters"); - nanolive_logger->error("Depletion IBF file : " + deplete_file.string()); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->flush(); - throw; - } + bool deplete = false; + bool target = false; - DepletionFilters.emplace_back(std::move(filter)); - } - - else - { - try - { - ibf_build_parser params; - std::filesystem::path out = std::filesystem::path(config.output_dir); - out /= deplete_file.filename(); - out.replace_extension("ibf"); - params = { out, deplete_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; - //tf = buildIBF(params); - filter.filter = buildIBF(params); - } + if(DepletionFilters.size() >= 1){ - catch (std::out_of_range& e) - { - throw ConfigReaderException(e.what()); - } - DepletionFilters.emplace_back(std::move(filter)); - } + deplete = true; } - - // parse target IBF if given as parameter - for (std::filesystem::path target_file : config.IBF_Parsed.target_files) - { - interleave::IBFMeta filter{}; - filter.name = target_file.stem().string(); - interleave::IBF tf{}; - interleave::IBFConfig TargetIBFconfig{}; - if (config.filterException(target_file)){ - try - { - TargetIBFconfig.input_filter_file = target_file.string(); - interleave::FilterStats stats = tf.load_filter(TargetIBFconfig); - filter.filter = std::move(tf.getFilter()); - interleave::print_load_stats(stats); - target = true; - } - catch (interleave::ParseIBFFileException& e) - { - nanolive_logger->error("Error building IBF for target file using the following parameters"); - nanolive_logger->error("Depletion IBF file : " + target_file.string()); - nanolive_logger->error("Error message : " + std::string(e.what())); - nanolive_logger->flush(); - throw; - } - - TargetFilters.emplace_back(std::move(filter)); - } - - else - { - try - { - ibf_build_parser params; - std::filesystem::path out = std::filesystem::path(config.output_dir); - out /= target_file.filename(); - out.replace_extension("ibf"); - params = { out, target_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; - //tf = buildIBF(params); - filter.filter = buildIBF(params); - } + else if(TargetFilters.size() >= 1){ - catch (std::out_of_range& e) - { - throw ConfigReaderException(e.what()); - } + target = true; + } + else{ - TargetFilters.emplace_back(std::move(filter)); - } + std::cerr<<"No depletion or target filters have been provided! "<<'\n'; + exit(1); } + //std::cout<< "Size of depletion filters: "<< DepletionFilters.size() << '\n'; + //std::cout<< "Size of target filters: "<< TargetFilters.size() << '\n'; + for (std::filesystem::path read_file : config.IBF_Parsed.read_files) + { // parse input reads //interleave::TReads reads; @@ -418,6 +330,7 @@ void classify_reads(ConfigReader config) //seqan::close(f.outfile); + f.classified=0; } std::cout << "Average Processing Time Read Classification : " << avgClassifyduration << std::endl; diff --git a/src/main/main.cpp b/src/main/main.cpp index 0b9075f..453eaad 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -301,24 +301,132 @@ double cputime() #endif -// [Error] -/* -/usr/include/c++/9/bits/stl_uninitialized.h: In instantiation of ‘_ForwardIterator std::uninitialized_copy(_InputIterator, _InputIterator, _ForwardIterator) [with _InputIterator = __gnu_cxx::__normal_iterator >; _ForwardIterator = interleave::IBFMeta*]’: -/usr/include/c++/9/bits/stl_uninitialized.h:307:37: required from ‘_ForwardIterator std::__uninitialized_copy_a(_InputIterator, _InputIterator, _ForwardIterator, std::allocator<_Tp>&) [with _InputIterator = __gnu_cxx::__normal_iterator >; _ForwardIterator = interleave::IBFMeta*; _Tp = interleave::IBFMeta]’ -/usr/include/c++/9/bits/stl_vector.h:555:31: required from ‘std::vector<_Tp, _Alloc>::vector(const std::vector<_Tp, _Alloc>&) [with _Tp = interleave::IBFMeta; _Alloc = std::allocator]’ -/mnt/c/bug29/ReadBouncer/src/main/classify.hpp:66:76: required from here -/usr/include/c++/9/bits/stl_uninitialized.h:127:72: error: static assertion failed: result type must be constructible from value type of input range - 127 | static_assert(is_constructible<_ValueType2, decltype(*__first)>::value, - | ^~~~~ +/** + * Build or load target/deplete IBF's for classify or target usage + * @param config ConfigReader constructor + * @param targetFilter bool to parse target filters/fasta files + * @param depleteFilter bool to parse deplete filters/fasta files + * @return vector of loaded/constructed IBF's + */ -*/ -/*std::vector getIBF (ConfigReader config){ +std::vector getIBF (ConfigReader config, bool targetFilter, bool depleteFilter){ std::vector DepletionFilters{}; std::vector TargetFilters{}; - return DepletionFilters; -}*/ + if(depleteFilter){ + // parse depletion IBF if given as parameter + for (std::filesystem::path deplete_file : config.IBF_Parsed.deplete_files) + { + interleave::IBFMeta filter{}; + filter.name = deplete_file.stem().string(); + interleave::IBF tf{}; + interleave::IBFConfig DepleteIBFconfig{}; + + if (config.filterException(deplete_file)){ + try + { + DepleteIBFconfig.input_filter_file = deplete_file.string(); + interleave::FilterStats stats = tf.load_filter(DepleteIBFconfig); + filter.filter = std::move(tf.getFilter()); + interleave::print_load_stats(stats); + //deplete = true; + } + catch (interleave::ParseIBFFileException& e) + { + nanolive_logger->error("Error parsing depletion IBF using the following parameters"); + nanolive_logger->error("Depletion IBF file : " + deplete_file.string()); + nanolive_logger->error("Error message : " + std::string(e.what())); + nanolive_logger->flush(); + throw; + } + + DepletionFilters.emplace_back(std::move(filter)); + } + + else + { + try + { + ibf_build_parser params; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= deplete_file.filename(); + out.replace_extension("ibf"); + params = { out, deplete_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + //tf = buildIBF(params); + filter.filter = buildIBF(params); + //deplete = true; + } + + catch (std::out_of_range& e) + { + throw ConfigReaderException(e.what()); + } + DepletionFilters.emplace_back(std::move(filter)); + } + } + return DepletionFilters; + } + + if(targetFilter){ + for (std::filesystem::path target_file : config.IBF_Parsed.target_files) + { + interleave::IBFMeta filter{}; + filter.name = target_file.stem().string(); + interleave::IBF tf{}; + interleave::IBFConfig TargetIBFconfig{}; + if (config.filterException(target_file)){ + try + { + TargetIBFconfig.input_filter_file = target_file.string(); + interleave::FilterStats stats = tf.load_filter(TargetIBFconfig); + filter.filter = std::move(tf.getFilter()); + interleave::print_load_stats(stats); + //target = true; + } + catch (interleave::ParseIBFFileException& e) + { + nanolive_logger->error("Error building IBF for target file using the following parameters"); + nanolive_logger->error("Depletion IBF file : " + target_file.string()); + nanolive_logger->error("Error message : " + std::string(e.what())); + nanolive_logger->flush(); + throw; + } + + TargetFilters.emplace_back(std::move(filter)); + } + + else + { + try + { + ibf_build_parser params; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= target_file.filename(); + out.replace_extension("ibf"); + params = { out, target_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + //tf = buildIBF(params); + filter.filter = buildIBF(params); + } + + catch (std::out_of_range& e) + { + throw ConfigReaderException(e.what()); + } + + TargetFilters.emplace_back(std::move(filter)); + } + } + + return TargetFilters; + } + + } + +/** + * Run ReadBouncer using the provided parameters in config.toml file + * @param config ConfigReader constructor + */ void run_program(ConfigReader config){ @@ -380,14 +488,17 @@ void run_program(ConfigReader config){ else if (subcommand == "classify") { config.createLog(config.usage); - classify_reads(config); + //std::vector DepletionFilters = getIBF(config, false, true); + //std::vector TargetFilters = getIBF(config, true, false); + classify_reads(config, getIBF(config, false, true), getIBF(config, true, false)); } else if (subcommand == "target") { - //config.createLog(config.usage); + config.createLog(config.usage); + adaptive_sampling(config, getIBF(config, false, true), getIBF(config, true, false)); } /* From f8064e8086c7eefbc2db757399ad9fb3155abbd4 Mon Sep 17 00:00:00 2001 From: lutfia95 Date: Mon, 31 Jan 2022 15:51:36 +0100 Subject: [PATCH 3/8] minor comments --- src/config/configReader.cpp | 148 +----------------------------------- src/config/configReader.hpp | 25 ------ src/main/classify.hpp | 7 +- src/main/main.cpp | 44 +++++------ 4 files changed, 20 insertions(+), 204 deletions(-) diff --git a/src/config/configReader.cpp b/src/config/configReader.cpp index 45bf299..330c668 100644 --- a/src/config/configReader.cpp +++ b/src/config/configReader.cpp @@ -44,7 +44,7 @@ ConfigReader::ConfigReader(std::string const tomlFile) { }; /** - * Parse output dir with usage + * Parse main parameters from toml file [log_directory, output_directory and usage] * */ @@ -194,7 +194,7 @@ void ConfigReader::createLog(std::string& usage){ /** - * Check if the target/deplete input files are IBF or not + * Check if the target/deplete input files are IBF or fasta files * @param file input deplete/target file * @return Bool due to decision * @throw seqan::Exception if fasta file @@ -369,147 +369,3 @@ void ConfigReader::parse(){ } - -//@TODO -/** - * Parse parameters from toml file for live reads targeting -* @param : Toml output file, usage, a list of target and deplete files -* @return: Struct with the needed parameters for live target ((Targeted Sequencing)) -*/ - -/* - - -ConfigReader::Target_Params ConfigReader::targetReader( std::string& usage, - std::vector& target_files_, - std::vector& deplete_files_) { - - std::string target, deplete; - IBF_Build_Params build_IBF; - - int k, classifyThreads, f, l, m, basecallThreads; - double e; - double significance = 0.95; - std::filesystem::path output_fileTOML{}; - std::vector rf_tmp{}; - std::vector channels; - std::string device{}; - std::string MinKNOW_host{}; - std::string MinKNOW_port{}; - std::string weights = "48"; - std::string caller{}; - std::string hostCaller{}; - std::string portBasecaller{}; - std::string guppyConfig{}; - try - { - output_fileTOML = std::filesystem::path(toml::find(this->toml, "output_directory")); - output_fileTOML = output_fileTOML.make_preferred(); - toml::value IBF = toml::find(this->toml, "IBF"); - toml::value MinKNOW = toml::find(this->toml, "MinKNOW"); - toml::value basecaller = toml::find(this->toml, "Basecaller"); - - k = toml::find_or(IBF, "kmer_size", 13); - classifyThreads = toml::find_or(IBF, "threads", 1); - f = toml::find_or(IBF, "fragment_size", 100000); - e = toml::find_or(IBF, "exp_seq_error_rate", 0.1); - - device = toml::find(MinKNOW, "flowcell"); - MinKNOW_host = toml::find_or(MinKNOW, "host", "127.0.0.1"); - MinKNOW_port = toml::find_or(MinKNOW, "port", "9501"); - channels = toml::find_or>(MinKNOW, "channels", std::vector{}); - - caller = toml::find_or(basecaller, "caller", "DeepNano"); - basecallThreads = toml::find_or(basecaller, "threads", 3); -#if defined(_WIN32) - if (stricmp(caller.c_str(), "guppy") == 0) -#else - if (strcasecmp(caller.c_str(), "guppy") == 0) -#endif - { - hostCaller = toml::find(basecaller, "host"); - portBasecaller = toml::find_or(basecaller, "port", "5555"); - guppyConfig = toml::find_or(basecaller, "config", "dna_r9.4.1_450bps_fast"); - // TODO: check if guppyConfig is correct configuration file - - } - } - catch (std::out_of_range& e) - { - // TODO: write message in log file - throw ConfigReaderException(e.what()); - } - - std::vector target_holder{}; - std::vector deplete_holder{}; - - for (std::filesystem::path file : target_files_) - { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); - } - - if (ConfigReader::filterException(file)) - { - target_holder.emplace_back(std::move(file));// If ibf then the target in the same dir - } - else - { - - // TODO: write in log file - std::cout << "The target file is a fasta file, start building ibf ......." << '\n'; - - - std::filesystem::path target = std::filesystem::path(output_fileTOML); - target /= file.filename(); - target.replace_extension("ibf"); - - build_IBF = { target, file, false, false, k, classifyThreads, f, 0, true }; - buildIBF_(build_IBF); - target_holder.emplace_back(std::move(target)); - } - - } - - for (std::filesystem::path file : deplete_files_) - { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); - } - - if (ConfigReader::filterException(file)) - { - deplete_holder.emplace_back(std::move(file));// If ibf then the target in the same dir - } - else - { - - // TODO: write in log file - std::cout << "The deplete file is a fasta file, start building ibf ......." << '\n'; - - std::filesystem::path deplete = std::filesystem::path(output_fileTOML); - deplete /= file.filename(); - deplete.replace_extension("ibf"); - - build_IBF = { deplete, file, false, false, k, classifyThreads, f, 0, true }; - buildIBF_(build_IBF); - - deplete_holder.emplace_back(std::move(deplete)); - } - - } - - Target_Params targetStruct; - if (channels.size() == 2 ) - targetStruct = { MinKNOW_host, device, deplete_holder, target_holder, output_fileTOML, hostCaller, portBasecaller, guppyConfig, caller, MinKNOW_port, basecallThreads, classifyThreads, - significance, e, false, false, false, (uint8_t)channels[0], (uint8_t)channels[1] }; - else - targetStruct = { MinKNOW_host, device, deplete_holder, target_holder, output_fileTOML, hostCaller, portBasecaller, guppyConfig, caller, MinKNOW_port, basecallThreads, classifyThreads, - significance, e, false, false, false }; - - return targetStruct; -};*/ \ No newline at end of file diff --git a/src/config/configReader.hpp b/src/config/configReader.hpp index c898d92..dea015b 100644 --- a/src/config/configReader.hpp +++ b/src/config/configReader.hpp @@ -51,28 +51,6 @@ class ConfigReader { std::filesystem::path log_dir{}; std::string usage; - struct Target_Params// TODO - { - std::string host = "127.0.0.1"; - std::string device{}; - std::vector ibf_deplete_files{ }; - std::vector ibf_target_files{ }; - std::filesystem::path out_dir{}; - std::string guppy_host = "127.0.0.1"; - std::string guppy_port = "5555"; - std::string guppy_config = "dna_r9.4.1_450bps_fast"; - std::string caller = "DeepNano"; - std::string port = "9501"; - int basecall_threads = 3; - int classify_threads = 3; - double kmer_significance = 0.95; - double error_rate = 0.1; - bool command = false; - bool show_help = false; - bool verbose = false; - uint16_t minChannel = 1; - uint16_t maxChannel = 512; - }; struct IBF_Params { @@ -112,9 +90,6 @@ class ConfigReader { void parse(); void createLog(std::string& usage); - /*Target_Params targetReader( std::string& usage, - std::vector& target_files_, - std::vector& deplete_files_);*/ private: diff --git a/src/main/classify.hpp b/src/main/classify.hpp index b678c37..e7e3b45 100644 --- a/src/main/classify.hpp +++ b/src/main/classify.hpp @@ -56,11 +56,8 @@ std::vector split(const string& s, char delim) { /** * classify reads from an input file based on given depletion and/or target filters -* @parser : command line input parameters +* @parser : toml input parameters */ -//void classify_reads(ConfigReader::Classify_Params& params) -// Methods in main with vectorS -// void classify_reads(ConfigReader config, std::vector DepletionFilters{}, std::vector TargetFilters{}) void classify_reads(ConfigReader config, std::vector DepletionFilters, std::vector TargetFilters) { std::shared_ptr nanolive_logger = spdlog::get("ReadBouncerLog"); @@ -94,7 +91,6 @@ void classify_reads(ConfigReader config, std::vector Deplet //interleave::TReads reads; //parse_reads(parser.read_file, reads, parser.preLen); - Conf.strata_filter = -1; //Conf.significance = params.kmer_significance; Conf.significance = 0.95; @@ -330,7 +326,6 @@ void classify_reads(ConfigReader config, std::vector Deplet //seqan::close(f.outfile); - f.classified=0; } std::cout << "Average Processing Time Read Classification : " << avgClassifyduration << std::endl; diff --git a/src/main/main.cpp b/src/main/main.cpp index 453eaad..555ad53 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -487,46 +487,36 @@ void run_program(ConfigReader config){ else if (subcommand == "classify") { - config.createLog(config.usage); - //std::vector DepletionFilters = getIBF(config, false, true); - //std::vector TargetFilters = getIBF(config, true, false); - classify_reads(config, getIBF(config, false, true), getIBF(config, true, false)); - + try + { + + config.createLog(config.usage); + //std::vector DepletionFilters = getIBF(config, false, true);// avoid copying the IBF's + //std::vector TargetFilters = getIBF(config, true, false);// avoid copying the IBF's + classify_reads(config, getIBF(config, false, true), getIBF(config, true, false)); + } + catch(std::exception& e) + { + std::cerr << e.what() << std::endl; + return; + } } else if (subcommand == "target") { - config.createLog(config.usage); - adaptive_sampling(config, getIBF(config, false, true), getIBF(config, true, false)); - } - - /* - ConfigReader::Target_Params struct_{}; try { - struct_ = config.targetReader(subcommand, target_files, deplete_files); - } - catch (ConfigReaderException& e) - { - std::cerr << "Error in reading TOML configuration file!" << std::endl; - std::cerr << e.what() << std::endl; - throw; - } - */ - - - /*try - { - adaptive_sampling(struct_); + config.createLog(config.usage); + adaptive_sampling(config, getIBF(config, false, true), getIBF(config, true, false)); } catch(std::exception& e) { std::cerr << e.what() << std::endl; return; } + } - }*/ else if( subcommand == "test") { @@ -548,7 +538,7 @@ void run_program(ConfigReader config){ else{ std::cerr << "Please define one of the usages: [build, target, classify, test]" << '\n'; - exit(0); + exit(1); } } From 94d4587f3612a13aa95a3abde6cd7330e76e4c8e Mon Sep 17 00:00:00 2001 From: Ahmad lutfi Date: Mon, 31 Jan 2022 18:21:49 +0100 Subject: [PATCH 4/8] filesystem path --- src/main/main.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/main.cpp b/src/main/main.cpp index 555ad53..9ed225e 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -348,11 +348,11 @@ std::vector getIBF (ConfigReader config, bool targetFilter, { try { - ibf_build_parser params; + //ibf_build_parser params; std::filesystem::path out = std::filesystem::path(config.output_dir); out /= deplete_file.filename(); out.replace_extension("ibf"); - params = { out, deplete_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + ibf_build_parser params = { out.string(), deplete_file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; //tf = buildIBF(params); filter.filter = buildIBF(params); //deplete = true; @@ -400,11 +400,11 @@ std::vector getIBF (ConfigReader config, bool targetFilter, { try { - ibf_build_parser params; + //ibf_build_parser params; std::filesystem::path out = std::filesystem::path(config.output_dir); out /= target_file.filename(); out.replace_extension("ibf"); - params = { out, target_file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + ibf_build_parser params = { out.string(), target_file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; //tf = buildIBF(params); filter.filter = buildIBF(params); } @@ -436,7 +436,7 @@ void run_program(ConfigReader config){ if (subcommand == "build") { - ibf_build_parser params; + //ibf_build_parser params; config.createLog(config.usage); for (std::filesystem::path file : config.IBF_Parsed.target_files) @@ -455,7 +455,7 @@ void run_program(ConfigReader config){ out /= file.filename(); out.replace_extension("ibf"); - params = { out, file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + ibf_build_parser params = { out.string(), file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; buildIBF(params); std::cout <<'\n'; } @@ -477,7 +477,7 @@ void run_program(ConfigReader config){ out /= file.filename(); out.replace_extension("ibf"); - params = { out, file, false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + ibf_build_parser params = { out.string(), file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; buildIBF(params); std::cout <<'\n'; } From c4f0c987130a2dd83f2e4480c4be4ab520041342 Mon Sep 17 00:00:00 2001 From: Ahmad Lutfi <62063481+lutfia95@users.noreply.github.com> Date: Mon, 31 Jan 2022 23:18:43 +0100 Subject: [PATCH 5/8] weights file --- src/main/adaptive_sampling.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/adaptive_sampling.hpp b/src/main/adaptive_sampling.hpp index c30a4e3..d6d811b 100644 --- a/src/main/adaptive_sampling.hpp +++ b/src/main/adaptive_sampling.hpp @@ -519,9 +519,9 @@ void adaptive_sampling(ConfigReader config, std::vector Dep #if !defined(ARM_BUILD) // first check if basecalling file exists std::filesystem::path weights_file = NanoLiveRoot; - //weights_file.append("data"); - //weights_file /= "rnn48.txt"; - weights_file = "rnn48.txt"; + weights_file.append("data"); + weights_file /= "rnn48.txt"; + //weights_file = "rnn48.txt"; if (!std::filesystem::exists(weights_file)) { nanolive_logger->error("Could not find DeepNano weights file : " + weights_file.string()); From 2abf06507dc21fc3f412ab95e9b78e9ee47d9f56 Mon Sep 17 00:00:00 2001 From: lutfia95 Date: Wed, 2 Feb 2022 00:19:58 +0100 Subject: [PATCH 6/8] exceptions and windows test --- config.toml | 12 ++++++------ src/config/configReader.cpp | 6 +++--- src/main/main.cpp | 21 +++++++++++++-------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/config.toml b/config.toml index 7c7387f..cbe8935 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,6 @@ -usage = "target" #["build", "target", "classify", "test"] +usage = "test" #["build", "target", "classify", "test"] output_directory = 'RB_out' -log_directory = 'RB_out/log' +log_directory = 'RB_out/logs' [IBF] @@ -8,7 +8,7 @@ kmer_size = 15 #(unsigned integer with default 13) only require fragment_size = 100000 #(unsigned integer with default 100000) only required for 'usage = "build" or if target_file/deplete_file is a fasta formate file threads = 3 #(unsigned integer with default 3) target_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Listeria_monocytogenes_ATCC_19115_.fasta','/mnt/c/ReadBouncerToml/build/main/Release/Pseudomonas_aeruginosa_complete_genome.fasta'] -deplete_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Bacillus_subtilis_complete_genome.fasta','/mnt/c/ReadBouncerToml/build/main/Release/Enterococcus_faecalis_complete_genome.fasta'] +deplete_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Bacillus_subtilis_complete_genome.ibf','/mnt/c/ReadBouncerToml/build/main/Release/Enterococcus_faecalis_complete_genome.fasta'] read_files = ['/mnt/c/ReadBouncerToml/build/main/Release/Listeria.fastq','/mnt/c/ReadBouncerToml/build/main/Release/SaccharomycesReal.fasta'] exp_seq_error_rate = 0.1 #(unsigned float between 0 and 1 default 0.1) chunk_length = 350 #(unsigned integer with default 250) @@ -17,12 +17,12 @@ max_chunks = 1 #(unsigned integer with default 5) [MinKNOW] host = "localhost" #(ip address or name of the computer hosting MinKNOW) -port = "9501" #(port number used fo grpc communication by by MinKNOW instance) -flowcell = "MS00000" #(name of the flowcell used) +port = "9501" #(port number used fo grpc communication by by MinKNOW instance) +flowcell = "MS00000" #(name of the flowcell used) [Basecaller] -caller = "DeepNano" #DeepNano/Guppy (default is DeepNano) +caller = "DeepNano" #DeepNano/Guppy (default is DeepNano) host = "127.0.0.1"#(ip address or name of the computer hosting Guppy Basecall Server) port = "9501" #(port number on which the basecall server is running on the host) threads = 3 # (unsigned integer with default 3) diff --git a/src/config/configReader.cpp b/src/config/configReader.cpp index 330c668..f404ae3 100644 --- a/src/config/configReader.cpp +++ b/src/config/configReader.cpp @@ -262,7 +262,7 @@ void ConfigReader::readIBF(){ if (!std::filesystem::exists(file)) { // TODO: write message in log file - throw ConfigReader("[Error] The following target file does not exist: " + file.string()); + throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); } } @@ -271,7 +271,7 @@ void ConfigReader::readIBF(){ if (!std::filesystem::exists(file)) { // TODO: write message in log file - throw ConfigReader("[Error] The following deplete file does not exist: " + file.string()); + throw ConfigReaderException("[Error] The following deplete file does not exist: " + file.string()); } } @@ -294,7 +294,7 @@ void ConfigReader::readIBF(){ if (!std::filesystem::exists(rf)) { // TODO: write message in log file - throw ConfigReader("[Error] The following read file does not exist: " + rf.string()); + throw ConfigReaderException("[Error] The following read file does not exist: " + rf.string()); } else { diff --git a/src/main/main.cpp b/src/main/main.cpp index 9ed225e..32b81f5 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -330,7 +330,6 @@ std::vector getIBF (ConfigReader config, bool targetFilter, interleave::FilterStats stats = tf.load_filter(DepleteIBFconfig); filter.filter = std::move(tf.getFilter()); interleave::print_load_stats(stats); - //deplete = true; } catch (interleave::ParseIBFFileException& e) { @@ -353,9 +352,7 @@ std::vector getIBF (ConfigReader config, bool targetFilter, out /= deplete_file.filename(); out.replace_extension("ibf"); ibf_build_parser params = { out.string(), deplete_file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; - //tf = buildIBF(params); filter.filter = buildIBF(params); - //deplete = true; } catch (std::out_of_range& e) @@ -382,7 +379,7 @@ std::vector getIBF (ConfigReader config, bool targetFilter, interleave::FilterStats stats = tf.load_filter(TargetIBFconfig); filter.filter = std::move(tf.getFilter()); interleave::print_load_stats(stats); - //target = true; + } catch (interleave::ParseIBFFileException& e) { @@ -405,7 +402,6 @@ std::vector getIBF (ConfigReader config, bool targetFilter, out /= target_file.filename(); out.replace_extension("ibf"); ibf_build_parser params = { out.string(), target_file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; - //tf = buildIBF(params); filter.filter = buildIBF(params); } @@ -444,7 +440,7 @@ void run_program(ConfigReader config){ if (!std::filesystem::exists(file)) { // TODO: write message in log file - throw ConfigReader("[Error] The following target file does not exist: " + file.string()); + throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); } if (!config.filterException(file)) @@ -459,6 +455,11 @@ void run_program(ConfigReader config){ buildIBF(params); std::cout <<'\n'; } + + else + { + std::cout<< "[INFO] The following target file is a IBF file: " << file.string() << '\n'; + } } for (std::filesystem::path file : config.IBF_Parsed.deplete_files) @@ -466,7 +467,7 @@ void run_program(ConfigReader config){ if (!std::filesystem::exists(file)) { // TODO: write message in log file - throw ConfigReader("[Error] The following target file does not exist: " + file.string()); + throw ConfigReaderException("[Error] The following deplete file does not exist: " + file.string()); } if (!config.filterException(file)) @@ -480,7 +481,11 @@ void run_program(ConfigReader config){ ibf_build_parser params = { out.string(), file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; buildIBF(params); std::cout <<'\n'; - } + } + else + { + std::cout<< "[INFO] The following deplete file is a IBF file: " << file.string() << '\n'; + } } } From 3c50e0d46ccb51d0664d4371f6f75fbdb1fc26ae Mon Sep 17 00:00:00 2001 From: lutfia95 Date: Wed, 2 Feb 2022 00:32:20 +0100 Subject: [PATCH 7/8] remove unused headers --- src/config/configReader.hpp | 2 +- src/main/main.cpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/config/configReader.hpp b/src/config/configReader.hpp index dea015b..1edf6e0 100644 --- a/src/config/configReader.hpp +++ b/src/config/configReader.hpp @@ -8,7 +8,7 @@ #include #include #include "../toml11/toml.hpp" -#include "IBF.hpp" + diff --git a/src/main/main.cpp b/src/main/main.cpp index 32b81f5..69fcde0 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -28,10 +28,8 @@ #include "IBF.hpp" // tomel parser -//#include "parsertoml.hpp" #include "configReader.hpp" -// toml library -#include "../toml11/toml.hpp" + // Basecalling library #if !defined(ARM_BUILD) From 0f719d319f59ae22aea16c6f774c4a673d0bacee Mon Sep 17 00:00:00 2001 From: Jens-Uwe Ulrich Date: Thu, 3 Feb 2022 15:53:44 +0100 Subject: [PATCH 8/8] minor bug fixing around toml parsing --- src/config/configReader.cpp | 144 ++++++++++++++++++++++++++---------- src/config/configReader.hpp | 5 +- src/main/main.cpp | 106 +++++++++++++++----------- src/minknow/Data.cpp | 4 +- src/minknow/Data.hpp | 6 +- 5 files changed, 176 insertions(+), 89 deletions(-) diff --git a/src/config/configReader.cpp b/src/config/configReader.cpp index f404ae3..d61de4c 100644 --- a/src/config/configReader.cpp +++ b/src/config/configReader.cpp @@ -35,8 +35,16 @@ ConfigReader::ConfigReader(std::string const tomlFile) { this->tomlInputFile = tomlFile; std::ifstream tomlFileReadBouncer(tomlInputFile, std::ios_base::binary); - this->configuration_ = toml::parse(tomlFileReadBouncer, /*optional -> */ tomlInputFile); + try + { + this->configuration_ = toml::parse(tomlFileReadBouncer, /*optional -> */ tomlInputFile); + } + catch (toml::exception& e) + { + throw ConfigReaderException(e.what()); + } + // TODO: throw ConfigReaderException if (!tomlFileReadBouncer.is_open()) { std::cerr << "Error parsing the toml file: " << tomlInputFile << '\n'; } @@ -72,13 +80,11 @@ void ConfigReader::parse_general(){ } catch (const toml::exception& e) { - std::cerr << "Could not parse " << tomlInputFile << std::endl; - std::cerr << e.what() << std::endl; + throw ConfigReaderException(e.what()); } catch (std::out_of_range& e) { - std::cerr << "Error in " << tomlInputFile << std::endl; - std::cerr << e.what() << std::endl; + throw ConfigReaderException(e.what()); } } @@ -154,12 +160,13 @@ void ConfigReader::createLog(std::string& usage){ { "threads", IBF_Parsed.threads }, { "fragment-size", IBF_Parsed.fragment_size}, { "exp_seq_error_rate", IBF_Parsed.error_rate}, - { "chunk_length", IBF_Parsed.chunk_length}, - { "max_chunks", IBF_Parsed.max_chunks}, {"host" , MinKNOW_Parsed.host}, {"port" , MinKNOW_Parsed.port}, {"flowcell" , MinKNOW_Parsed.flowcell}, + {"MinChannel", MinKNOW_Parsed.minChannel}, + {"MaxChannel", MinKNOW_Parsed.maxChannel}, {"caller", Basecaller_Parsed.caller}, + {"GuppyConfig", Basecaller_Parsed.guppy_config}, {"host", Basecaller_Parsed.guppy_host}, {"port", Basecaller_Parsed.guppy_port}, {"threads", Basecaller_Parsed.basecall_threads}, @@ -238,7 +245,7 @@ void ConfigReader::readIBF(){ catch (std::out_of_range& e) { // TODO: write message in log file - throw ConfigReader(e.what()); + throw ConfigReaderException(e.what()); } try @@ -246,35 +253,59 @@ void ConfigReader::readIBF(){ std::vector tmp = toml::find>(this->configuration_, "IBF", "target_files"); for (std::string s : tmp) IBF_Parsed.target_files.emplace_back((std::filesystem::path(s)).make_preferred()); - tmp.clear(); - tmp = toml::find>(this->configuration_, "IBF", "deplete_files"); - for (std::string s : tmp) - IBF_Parsed.deplete_files.emplace_back((std::filesystem::path(s)).make_preferred()); } catch (toml::exception& e) { - throw ConfigReader(e.what()); - + throw ConfigReaderException(e.what()); } + catch (std::out_of_range& e) + { + // Do nothing + // sometimes we only want to specify deplete files + } + + + try + { + std::vector tmp = toml::find>(this->configuration_, "IBF", "deplete_files"); + for (std::string s : tmp) + IBF_Parsed.deplete_files.emplace_back((std::filesystem::path(s)).make_preferred()); + } + catch (toml::exception& e) + { + throw ConfigReaderException(e.what()); + } + catch (std::out_of_range& e) + { + // Do nothing + // sometimes we only want to specify target files + } + + if (!(this->usage.compare("test") == 0)) + { + if (IBF_Parsed.deplete_files.size() + IBF_Parsed.target_files.size() == 0) + { + throw ConfigReaderException("[Error] At least one target or deplete file has to be specified!"); + } + } for (std::filesystem::path file : IBF_Parsed.target_files) + { + if (!std::filesystem::exists(file)) { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); - } + // TODO: write message in log file + throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); } + } - for (std::filesystem::path file : IBF_Parsed.deplete_files) + for (std::filesystem::path file : IBF_Parsed.deplete_files) + { + if (!std::filesystem::exists(file)) { - if (!std::filesystem::exists(file)) - { - // TODO: write message in log file - throw ConfigReaderException("[Error] The following deplete file does not exist: " + file.string()); - } + // TODO: write message in log file + throw ConfigReaderException("[Error] The following deplete file does not exist: " + file.string()); } - + } try { @@ -282,9 +313,15 @@ void ConfigReader::readIBF(){ } catch (toml::exception& e) { - - throw ConfigReader(e.what()); + throw ConfigReaderException(e.what()); } + catch (std::out_of_range& e) + { + if (this->usage.compare("classify") == 0) + { + throw ConfigReaderException(e.what()); + } + } for (std::string file : rf_tmp) { @@ -313,15 +350,29 @@ void ConfigReader::readIBF(){ void ConfigReader::readMinKNOW(){ + toml::value MinKNOW; + try + { + MinKNOW = toml::find(this->configuration_, "MinKNOW"); + } + catch (std::out_of_range& e) + { + // Do nothing and use default values + return; + } try { - toml::value MinKNOW = toml::find(this->configuration_, "MinKNOW"); + MinKNOW_Parsed.flowcell = toml::find(MinKNOW, "flowcell"); MinKNOW_Parsed.host = toml::find_or(MinKNOW, "host", "127.0.0.1"); MinKNOW_Parsed.port = toml::find_or(MinKNOW, "port", "9501"); - //channels = toml::find_or>(MinKNOW, "channels", std::vector{}); - + std::vector channels = toml::find_or>(MinKNOW, "channels", std::vector{}); + if (channels.size() == 2) + { + MinKNOW_Parsed.minChannel = (uint16_t) channels[0]; + MinKNOW_Parsed.maxChannel = (uint16_t) channels[1]; + } } catch (std::out_of_range& e) { @@ -338,14 +389,24 @@ void ConfigReader::readMinKNOW(){ void ConfigReader::readBasecaller(){ + toml::value basecaller; + try + { + basecaller = toml::find(this->configuration_, "Basecaller"); + } + catch (std::out_of_range& e) + { + // Do nothing and use default values + return; + } + try { - toml::value basecaller = toml::find(this->configuration_, "Basecaller"); Basecaller_Parsed.caller = toml::find_or(basecaller, "caller", "DeepNano"); - Basecaller_Parsed.guppy_host = toml::find(basecaller, "host"); + Basecaller_Parsed.guppy_host = toml::find_or(basecaller, "host", "127.0.0.1"); Basecaller_Parsed.guppy_port = toml::find_or(basecaller, "port", "5555"); Basecaller_Parsed.basecall_threads = toml::find_or(basecaller, "threads", 3); - //Basecaller_Parsed.guppy_config = toml::find_or(basecaller, "config", "dna_r9.4.1_450bps_fast"); + Basecaller_Parsed.guppy_config = toml::find_or(basecaller, "config", "dna_r9.4.1_450bps_fast"); } catch (std::out_of_range& e) { @@ -362,10 +423,15 @@ void ConfigReader::readBasecaller(){ void ConfigReader::parse(){ - - ConfigReader::readIBF(); - ConfigReader::readMinKNOW(); - ConfigReader::readBasecaller(); - + try + { + ConfigReader::readIBF(); + ConfigReader::readMinKNOW(); + ConfigReader::readBasecaller(); + } + catch (ConfigReaderException& e) + { + throw; + } } diff --git a/src/config/configReader.hpp b/src/config/configReader.hpp index 1edf6e0..857279c 100644 --- a/src/config/configReader.hpp +++ b/src/config/configReader.hpp @@ -70,8 +70,8 @@ class ConfigReader { std::string host = "127.0.0.1"; std::string port = "9501"; std::string flowcell{}; - uint8_t minChannel = 1; - uint8_t maxChannel = 512; + uint16_t minChannel = 1; + uint16_t maxChannel = 512; }MinKNOW_Parsed; struct Basecaller_Params @@ -83,6 +83,7 @@ class ConfigReader { std::string guppy_config = "dna_r9.4.1_450bps_fast"; }Basecaller_Parsed; + ConfigReader() = default; ConfigReader(std::string const); void parse_general(); diff --git a/src/main/main.cpp b/src/main/main.cpp index 69fcde0..146f74f 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -359,27 +359,29 @@ std::vector getIBF (ConfigReader config, bool targetFilter, } DepletionFilters.emplace_back(std::move(filter)); } - } + } return DepletionFilters; } - if(targetFilter){ - for (std::filesystem::path target_file : config.IBF_Parsed.target_files) + if(targetFilter) { - interleave::IBFMeta filter{}; - filter.name = target_file.stem().string(); - interleave::IBF tf{}; - interleave::IBFConfig TargetIBFconfig{}; - if (config.filterException(target_file)){ - try + for (std::filesystem::path target_file : config.IBF_Parsed.target_files) + { + interleave::IBFMeta filter{}; + filter.name = target_file.stem().string(); + interleave::IBF tf{}; + interleave::IBFConfig TargetIBFconfig{}; + if (config.filterException(target_file)) { - TargetIBFconfig.input_filter_file = target_file.string(); - interleave::FilterStats stats = tf.load_filter(TargetIBFconfig); - filter.filter = std::move(tf.getFilter()); - interleave::print_load_stats(stats); + try + { + TargetIBFconfig.input_filter_file = target_file.string(); + interleave::FilterStats stats = tf.load_filter(TargetIBFconfig); + filter.filter = std::move(tf.getFilter()); + interleave::print_load_stats(stats); - } - catch (interleave::ParseIBFFileException& e) + } + catch (interleave::ParseIBFFileException& e) { nanolive_logger->error("Error building IBF for target file using the following parameters"); nanolive_logger->error("Depletion IBF file : " + target_file.string()); @@ -388,34 +390,34 @@ std::vector getIBF (ConfigReader config, bool targetFilter, throw; } - TargetFilters.emplace_back(std::move(filter)); - } + TargetFilters.emplace_back(std::move(filter)); + } - else - { - try + else { - //ibf_build_parser params; - std::filesystem::path out = std::filesystem::path(config.output_dir); - out /= target_file.filename(); - out.replace_extension("ibf"); - ibf_build_parser params = { out.string(), target_file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; - filter.filter = buildIBF(params); - } + try + { + //ibf_build_parser params; + std::filesystem::path out = std::filesystem::path(config.output_dir); + out /= target_file.filename(); + out.replace_extension("ibf"); + ibf_build_parser params = { out.string(), target_file.string(), false, false, config.IBF_Parsed.size_k, config.IBF_Parsed.threads, config.IBF_Parsed.fragment_size, 0, true }; + filter.filter = buildIBF(params); + } - catch (std::out_of_range& e) - { - throw ConfigReaderException(e.what()); - } + catch (std::out_of_range& e) + { + throw ConfigReaderException(e.what()); + } - TargetFilters.emplace_back(std::move(filter)); - } - } + TargetFilters.emplace_back(std::move(filter)); + } + } return TargetFilters; } - } +} /** * Run ReadBouncer using the provided parameters in config.toml file @@ -424,8 +426,14 @@ std::vector getIBF (ConfigReader config, bool targetFilter, void run_program(ConfigReader config){ - - config.parse(); // parse all params from the different Moduls (one time parse and stores in struct) + try + { + config.parse(); // parse all params from the different Moduls (one time parse and stores in struct) + } + catch (ConfigReaderException& e) + { + std::cerr << e.what() << std::endl; + } std::string subcommand = config.usage; if (subcommand == "build") { @@ -438,7 +446,8 @@ void run_program(ConfigReader config){ if (!std::filesystem::exists(file)) { // TODO: write message in log file - throw ConfigReaderException("[Error] The following target file does not exist: " + file.string()); + std::cerr << "[Error] The following target file does not exist: " << file.string() << std::endl; + return; } if (!config.filterException(file)) @@ -456,7 +465,7 @@ void run_program(ConfigReader config){ else { - std::cout<< "[INFO] The following target file is a IBF file: " << file.string() << '\n'; + std::cout<< "[INFO] The following target file is an IBF file: " << file.string() << '\n'; } } @@ -465,7 +474,8 @@ void run_program(ConfigReader config){ if (!std::filesystem::exists(file)) { // TODO: write message in log file - throw ConfigReaderException("[Error] The following deplete file does not exist: " + file.string()); + std::cerr << "[Error] The following deplete file does not exist: " << file.string() << std::endl; + return; } if (!config.filterException(file)) @@ -482,7 +492,7 @@ void run_program(ConfigReader config){ } else { - std::cout<< "[INFO] The following deplete file is a IBF file: " << file.string() << '\n'; + std::cout<< "[INFO] The following deplete file is an IBF file: " << file.string() << '\n'; } } @@ -557,8 +567,18 @@ int main(int argc, char const **argv) NanoLiveRoot = binPath.substr(0, binPath.find("bin")); std::string const tomlFile = argv[1]; - ConfigReader config(tomlFile); - config.parse_general(); + ConfigReader config{}; + try + { + config = ConfigReader(tomlFile); + config.parse_general(); + } + catch (ConfigReaderException& e) + { + std::cerr << "Error in " << tomlFile << std::endl; + std::cerr << e.what() << std::endl; + return 1; + } initializeLogger(config); run_program(config); diff --git a/src/minknow/Data.cpp b/src/minknow/Data.cpp index fa14c66..f2086ed 100644 --- a/src/minknow/Data.cpp +++ b/src/minknow/Data.cpp @@ -305,8 +305,8 @@ namespace readuntil // we want to receive signals from all 512 channels of the MinION // has to be changed in case Flongle or PromethION is used // TODO: set last channel based on device type - setup->set_first_channel((uint32) minChannel); - setup->set_last_channel((uint32) maxChannel); + setup->set_first_channel((int)minChannel); + setup->set_last_channel((int)maxChannel); // we only want to receive calibrated data setup->set_raw_data_type(GetLiveReadsRequest_RawDataType_CALIBRATED); diff --git a/src/minknow/Data.hpp b/src/minknow/Data.hpp index 82583a6..0940df0 100644 --- a/src/minknow/Data.hpp +++ b/src/minknow/Data.hpp @@ -79,8 +79,8 @@ namespace readuntil bool runs = false; bool unblock_all = false; uint8_t actionBatchSize = 50; - uint8_t minChannel = 1; - uint8_t maxChannel = 512; + uint16_t minChannel = 1; + uint16_t maxChannel = 512; void addUnblockAction(GetLiveReadsRequest_Actions* actionList, uint32_t channelNr, uint32_t readNr, const double unblock_duration); void addStopReceivingDataAction(GetLiveReadsRequest_Actions* actionList, uint32_t channelNr, uint32_t readNr); @@ -151,7 +151,7 @@ namespace readuntil unblock_all = unblock; } - inline void setChannels(const uint8_t minC, const uint8_t maxC) + inline void setChannels(const uint16_t minC, const uint16_t maxC) { minChannel = minC; maxChannel = maxC;