From f3a091fa882f18a7f4734df0d878888e4e076b18 Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Mon, 25 Mar 2019 23:38:30 -0700 Subject: [PATCH 1/5] enable static build; but static link of pthread is dirty --- CMakeLists.txt | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 439adb1..ae02453 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ set(CMAKE_CXX_STANDARD 11) option(USE_POPCNT "Use popcnt hardware instructions" ON) option(USE_BMI2 "Use bmi2 hardware instructions" ON) option(COVERAGE "Generate coverage report" OFF) +option(STATIC_BUILD "Build static executation" OFF) include_directories(src) include_directories(src/sparsepp) @@ -27,17 +28,20 @@ LIST(APPEND OTHER_SOURCE src/sequence_manager.cpp ) +if (STATIC_BUILD) + set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") +endif(STATIC_BUILD) + find_package(ZLIB REQUIRED) find_package(OpenMP REQUIRED) -#set(STATIC_LIBS -lz -ldl -lpthread -lgomp -static-libstdc++ -static-libgcc -static) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DXXH_INLINE_ALL -D__STDC_FORMAT_MACROS") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprefetch-loop-arrays -funroll-loops") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__XFILE__='\"$(subst ${CMAKE_SOURCE_DIR}/,,$(abspath $<))\"'") +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ZLIB_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") if (USE_POPCNT) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt") @@ -49,16 +53,26 @@ endif (USE_BMI2) if (COVERAGE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") + set(COV_PY "coverage run") endif(COVERAGE) -set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O1") +set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") message(STATUS "Build type: ${CMAKE_BUILD_TYPE}: ${CMAKE_CXX_FLAGS}") add_executable(megahit_core ${OTHER_SOURCE} ${ASMBL_SOURCE} ${IDBA_SOURCE} ${SDBG_SOURCE} ${LCASM_SOURCE} ${CX1_SOURCE} ${TOOLKIT_SOURCE}) -target_link_libraries(megahit_core ${ZLIB_LIBRARIES} ${OpenMP_CXX_LIBRARIES}) + + +if (STATIC_BUILD) + # TODO too dirty + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -static") + set_target_properties(megahit_core PROPERTIES LINK_SEARCH_START_STATIC ON) + set_target_properties(megahit_core PROPERTIES LINK_SEARCH_END_STATIC ON) +endif(STATIC_BUILD) + +target_link_libraries(megahit_core ${ZLIB_LIBRARIES}) add_custom_target( megahit @@ -71,7 +85,6 @@ add_custom_target( simple_test COMMAND rm -rf megahit_out* COMMAND ./megahit --12 ${TEST_DATA}/r1.il.fa.gz,${TEST_DATA}/r2.il.fa.bz2 -1 ${TEST_DATA}/r3_1.fa -2 ${TEST_DATA}/r3_2.fa -r ${TEST_DATA}/r4.fa -o megahit_out1 -t 2 --keep-tmp-files - COMMAND rm -rf megahit_out* COMMAND ./megahit --12 ${TEST_DATA}/r1.il.fa.gz,${TEST_DATA}/r2.il.fa.bz2 -1 ${TEST_DATA}/r3_1.fa -2 ${TEST_DATA}/r3_2.fa -r ${TEST_DATA}/r4.fa -o megahit_out2 -t 2 --keep-tmp-files --kmin-1pass ) From efee59835a6d1f59c245917810dbeb33c4ecc45d Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Wed, 27 Mar 2019 00:02:42 -0700 Subject: [PATCH 2/5] removed unused idba functions --- src/idba/contig_graph.cpp | 590 -------------------------------------- src/idba/contig_graph.h | 44 --- src/idba/contig_info.cpp | 62 ---- src/idba/contig_info.h | 10 +- src/idba/hash_graph.cpp | 347 +--------------------- src/idba/hash_graph.h | 67 +---- src/idba/sequence.cpp | 47 --- src/idba/sequence.h | 5 - 8 files changed, 8 insertions(+), 1164 deletions(-) delete mode 100644 src/idba/contig_info.cpp diff --git a/src/idba/contig_graph.cpp b/src/idba/contig_graph.cpp index f110111..2c813ec 100644 --- a/src/idba/contig_graph.cpp +++ b/src/idba/contig_graph.cpp @@ -22,17 +22,6 @@ using namespace std; -double ContigGraph::Binormial(int n, int m) -{ - double product = 1; - for (int i = 1; i <= n; ++i) - product *= i; - for (int i = 1; i <= m; ++i) - product /= i; - return product; -} - - void ContigGraph::Initialize(const deque &contigs, const deque &contig_infos) { vertices_.clear(); @@ -106,113 +95,12 @@ void ContigGraph::RefreshEdges() num_edges_ = total_degree / 2; } -void ContigGraph::AddAllEdges() -{ - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - vertices_[i].in_edges() = 15; - vertices_[i].out_edges() = 15; - } - RefreshEdges(); -} - -void ContigGraph::RemoveAllEdges() -{ - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - vertices_[i].in_edges() = 0; - vertices_[i].out_edges() = 0; - } - RefreshEdges(); -} - void ContigGraph::ClearStatus() { for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) vertices_[i].status().clear(); } -void ContigGraph::MergeSimilarPath() -{ - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - for (int strand = 0; strand < 2; ++strand) - { - ContigGraphVertexAdaptor current(&vertices_[i], strand); - - if (current.status().IsDead()) - continue; - - if (current.out_edges().size() > 1) - { - deque neighbors; - GetNeighbors(current, neighbors); - sort(neighbors.begin(), neighbors.end(), CompareContigCoverage); - - for (unsigned j = 0; j < neighbors.size(); ++j) - { - if (neighbors[j].status().IsDead()) - continue; - - for (unsigned k = j+1; k < neighbors.size(); ++k) - { - if (!neighbors[k].status().IsDead() - && neighbors[j].in_edges() == neighbors[k].in_edges() - && neighbors[j].out_edges() == neighbors[k].out_edges() - && neighbors[j].begin_kmer(kmer_size_-1) == neighbors[k].begin_kmer(kmer_size_-1) - && neighbors[j].end_kmer(kmer_size_-1) == neighbors[k].end_kmer(kmer_size_-1) - && GetSimilarity(neighbors[j], neighbors[k]) > 0.98) - { - neighbors[k].status().SetDeadFlag(); - } - } - } - } - } - } - Refresh(); - MergeSimplePaths(); - -} - -int64_t ContigGraph::Prune(int min_length) -{ - uint64_t old_num_vertices = vertices_.size(); - - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - for (int strand = 0; strand < 2; ++strand) - { - ContigGraphVertexAdaptor current(&vertices_[i], strand); - - if (current.status().IsDead()) - continue; - - if (current.out_edges().size() <= 1) - continue; - - int maximum = 0; - int depth = GetDepth(current, kmer_size_ - 1, maximum, min_length + kmer_size_ - 1); - if (depth > min_length + (int)kmer_size_ - 1) - depth = min_length + (int)kmer_size_ - 1; - - deque neighbors; - GetNeighbors(current, neighbors); - for (unsigned j = 0; j < neighbors.size(); ++j) - { - if (neighbors[j].in_edges().size() == 1 - && neighbors[j].out_edges().size() == 0 - && (int)neighbors[j].contig_size() < depth) - neighbors[j].status().SetDeadFlag(); - } - } - } - Refresh(); - MergeSimplePaths(); - - return old_num_vertices - vertices_.size(); -} - int64_t ContigGraph::Trim(int min_length) { uint64_t old_num_vertices = vertices_.size(); @@ -262,29 +150,6 @@ int64_t ContigGraph::Trim(int min_length, double min_cover) return old_num_vertices - vertices_.size(); } -int64_t ContigGraph::RemoveStandAlone(int min_length) -{ - uint64_t old_num_vertices = vertices_.size(); - - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - if (vertices_[i].contig().size() == kmer_size_ - && vertices_[i].contig().IsPalindrome()) - continue; - - if ((vertices_[i].in_edges().empty() && vertices_[i].out_edges().empty()) - && vertices_[i].contig().size() < min_length + kmer_size_ - 1 - ) - { - vertices_[i].status().SetDeadFlag(); - } - } - Refresh(); - MergeSimplePaths(); - - return old_num_vertices - vertices_.size(); -} - int64_t ContigGraph::RemoveDeadEnd(int min_length) { uint64_t num_deadend = 0; @@ -301,22 +166,6 @@ int64_t ContigGraph::RemoveDeadEnd(int min_length) return num_deadend; } -int64_t ContigGraph::RemoveDeadEnd(int min_length, double min_cover) -{ - uint64_t num_deadend = 0; - int l = 1; - while (true) - { - l = min(2*l, min_length); - num_deadend += Trim(l, min_cover); - - if (l == min_length) - break; - } - num_deadend += Trim(min_length); - return num_deadend; -} - int64_t ContigGraph::RemoveBubble() { deque candidates; @@ -397,46 +246,6 @@ double ContigGraph::IterateCoverage(int min_length, double min_cover, double max return min_cover; } -double ContigGraph::IterateLocalCoverage(int min_length, double ratio, double min_cover, double max_cover, double factor) -{ - in_kmer_count_table_.reserve(vertices_.size()); - - min_cover = min(min_cover, max_cover); - while (true) - { - bool is_changed = RemoveLocalLowCoverage(min_cover, min_length, ratio); - - if (!is_changed) - break; - - if (min_cover >= max_cover) - break; - - min_cover *= factor; - } - return min_cover; -} - -double ContigGraph::IterateComponentCoverage(int min_length, double ratio, double min_cover, double max_cover, double factor, int max_component_size) -{ - in_kmer_count_table_.reserve(vertices_.size()); - - min_cover = min(min_cover, max_cover); - while (true) - { - bool is_changed = RemoveComponentLowCoverage(min_cover, min_length, ratio, max_component_size); - - if (!is_changed) - break; - - if (min_cover >= max_cover) - break; - - min_cover *= factor; - } - return min_cover; -} - bool ContigGraph::RemoveLowCoverage(double min_cover, int min_length) { bool is_changed = false; @@ -465,207 +274,6 @@ bool ContigGraph::RemoveLowCoverage(double min_cover, int min_length) return is_changed; } -bool ContigGraph::RemoveLocalLowCoverage(double min_cover, int min_length, double ratio) -{ - int region_length = 1000; - //int region_length = 100; - bool is_changed = false; - - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - ContigGraphVertexAdaptor current(&vertices_[i]); - - if (current.contig_size() < min_length + kmer_size_ - 1 - && ((current.in_edges().size() <= 1 && current.out_edges().size() <= 1) - || current.in_edges().size() == 0 || current.out_edges().size() == 0) - ) - { - if (is_changed && current.coverage() > min_cover) - continue; - - double mean = LocalCoverage(current, region_length); - double threshold = min_cover; - if (min_cover < mean * ratio) - is_changed = true; - else - threshold = mean * ratio; - - if (current.coverage() < threshold) - { - is_changed = true; - current.status().SetDeadFlag(); - } - } - } - - Refresh(); - //Trim(min_length); - MergeSimplePaths(); - - return is_changed; -} - -bool ContigGraph::RemoveComponentLowCoverage(double min_cover, int min_length, double ratio, int max_component_size) -{ - int region_length = 300; - deque > components; - deque component_strings; - GetComponents(components, component_strings); - - deque average_coverage(components.size()); - deque component_id_table(vertices_.size()); - - for (int64_t i = 0; i < (int64_t)components.size(); ++i) - { - double total_kmer_count = 0; - double total = 0; - - for (unsigned j = 0; j < components[i].size(); ++j) - { - total_kmer_count += components[i][j].kmer_count(); - total += components[i][j].contig_size() - kmer_size_ + 1; - component_id_table[components[i][j].id()] = i; - } - - average_coverage[i] = total_kmer_count / total; - } - - bool is_changed = false; - //int max_component_size = 30; - - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - ContigGraphVertexAdaptor current(&vertices_[i]); - int id = component_id_table[current.id()]; - - if (components[id].size() <= 10) - continue; - - if (current.contig_size() < min_length + kmer_size_ - 1 - && (current.in_edges().size() <= 1 && current.out_edges().size() <= 1) - //|| current.in_edges().size() == 0 || current.out_edges().size() == 0) - ) - { - if (is_changed && current.coverage() > min_cover) - continue; - - double threshold = min_cover; - double mean = LocalCoverage(current, region_length); - //double mean = average_coverage[id]; - if (min_cover < ratio * mean || ((int)components[id].size() > max_component_size && min_cover < average_coverage[id])) - is_changed = true; - else - threshold = ratio * mean; - - if (current.coverage() < threshold - || ((int)components[id].size() > max_component_size && current.coverage() < average_coverage[id])) - { - is_changed = true; - current.status().SetDeadFlag(); - } - } - } - - Refresh(); - MergeSimplePaths(); - - return is_changed; -} - -double ContigGraph::LocalCoverage(ContigGraphVertexAdaptor current, int region_length) -{ - double num_count = 0; - int num_kmer = 0; - LocalCoverageSingle(current, region_length, num_count, num_kmer); - LocalCoverageSingle(current.ReverseComplement(), region_length, num_count, num_kmer); - - if (num_kmer == 0) - //return 1e100; - return 0; - else - return num_count / num_kmer; -} - -double ContigGraph::LocalCoverageSingle(ContigGraphVertexAdaptor current, int region_length, double &total_count, int &total_kmer) -{ - map visited; - deque qu; - qu.push_back(current); - visited[current.id()] = 0; - - int index = 0; - int num_added = 0; - int num_count = 0; - int num_kmer = 0; - while (index < (int)qu.size()) - { - current = qu[index++]; - - if (num_added >= 4 * region_length) - break; - - if (visited.size() > 32) - break; - - if (visited[current.id()] >= region_length) - continue; - - int dist = visited[current.id()]; - - for (int x = 0; x < 4; ++x) - { - if (current.out_edges()[x]) - { - ContigGraphVertexAdaptor next = GetNeighbor(current, x); - if (visited.find(next.id()) == visited.end()) - { - visited[next.id()] = dist + next.num_kmer(); - qu.push_back(next); - - if ((int)next.num_kmer() + dist > region_length) - { - if ((int)next.num_kmer() < region_length) - { - num_count += (int64_t)next.kmer_count() * (region_length - dist) / next.num_kmer(); - num_kmer += region_length - dist; - num_added += region_length - dist; - } - else - { - IdbaKmer begin = next.begin_kmer(kmer_size_); - if (in_kmer_count_table_.find(begin) == in_kmer_count_table_.end()) - { - int in_kmer_count = 0; - for (int i = 0; i < region_length; ++i) - in_kmer_count += next.get_count(i); - in_kmer_count_table_[begin] = in_kmer_count; - } - - num_count += (int64_t)in_kmer_count_table_[begin] * (region_length - dist) / region_length; - num_kmer += region_length - dist; - num_added += region_length - dist; - } - } - else - { - num_count += next.kmer_count(); - num_kmer += next.num_kmer(); - num_added += next.num_kmer(); - } - } - } - } - } - - total_count += num_count; - total_kmer += num_kmer; - - if (num_kmer == 0) - return 0; - else - return num_count * 1.0 / num_kmer; -} - void ContigGraph::MergeSimplePaths() { deque contigs; @@ -699,8 +307,6 @@ int64_t ContigGraph::Assemble(deque &contigs, deque &conti } } - //cout << "palindrome " << contigs.size() << endl; - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) { if (!vertices_[i].status().Lock(0)) @@ -787,7 +393,6 @@ bool ContigGraph::IsConverged(ContigGraphVertexAdaptor current) reachable[search_node.node] |= (1 << search_node.label); - //cout << (reachable[search_node.node] == current.out_edges()) << " " << reachable[search_node.node] << " " << (int)current.out_edges() << endl; if (reachable[search_node.node] == (int)current.out_edges()) { return true; @@ -886,20 +491,6 @@ int64_t ContigGraph::SplitBranches() return count; } -void ContigGraph::Decomposite() -{ - int64_t last = 0; - for (int i = 0; i < 100; ++i) - { - int64_t split = SplitBranches(); - //cout << split << " " << 2*vertices_.size() << endl; - - if (last == split) - break; - last = split; - } -} - void ContigGraph::GetComponents(deque > &components, deque &component_strings) { components.clear(); @@ -960,98 +551,6 @@ void ContigGraph::GetComponents(deque > &compone ClearStatus(); } -void ContigGraph::GetConsensus(deque &contigs) -{ - deque > components; - deque component_strings; - - GetComponents(components, component_strings); - for (unsigned i = 0; i < components.size(); ++i) - { - ContigGraphVertexAdaptor begin = GetBeginVertexAdaptor(components[i]); - ContigGraphVertexAdaptor end = GetEndVertexAdaptor(components[i]); - - if (begin.is_null() || end.is_null() || !IsValid(components[i])) - { - for (unsigned j = 0; j < components[i].size(); ++j) - contigs.push_back(components[i][j].contig()); - } - else - { - ContigGraphPath path; - FindLongestPath(components[i], path); - Sequence contig; - ContigInfo contig_info; - path.Assemble(contig, contig_info); - contigs.push_back(contig); - } - } -} - -bool ContigGraph::FindPath(ContigGraphVertexAdaptor from, ContigGraphVertexAdaptor to, ContigGraphPath &path) -{ - path.clear(); - map is_used; - map prev; - deque qu; - qu.push_back(from); - prev[from] = ContigGraphVertexAdaptor(NULL); - is_used[from.id()] = true; - - int time = 0; - while (!qu.empty()) - { - if (++time >= 100) - break; - - if (prev.find(to) != prev.end()) - break; - - ContigGraphVertexAdaptor current = qu.front(); - qu.pop_front(); - - deque neighbors; - GetNeighbors(current, neighbors); - for (unsigned i = 0; i < neighbors.size(); ++i) - { - ContigGraphVertexAdaptor next = neighbors[i]; - //if (prev.find(next) == prev.end()) - if (!is_used[next.id()]) - { - is_used[next.id()] = true; - prev[next] = current; - qu.push_back(next); - } - } - } - - if (prev.find(to) != prev.end()) - { - deque tmp; - tmp.push_back(to); - while (!prev[tmp.back()].is_null()) - tmp.push_back(prev[tmp.back()]); - reverse(tmp.begin(), tmp.end()); - for (unsigned i = 0; i < tmp.size(); ++i) - path.Append(tmp[i], -kmer_size_ + 1); - return true; - } - else - return false; -} - -void ContigGraph::GetContigs(deque &contigs, deque &contig_infos) -{ - contigs.resize(vertices_.size()); - contig_infos.resize(vertices_.size()); - - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - contigs[i] = vertices_[i].contig(); - contig_infos[i] = vertices_[i].contig_info(); - } -} - double ContigGraph::GetSimilarity(const Sequence &a, const Sequence &b) { vector > table; @@ -1123,81 +622,6 @@ bool ContigGraph::CycleDetect(ContigGraphVertexAdaptor current, map &s return false; } -bool ContigGraph::IsValid(deque &component) -{ - ContigGraphVertexAdaptor begin = GetBeginVertexAdaptor(component); - ContigGraphVertexAdaptor end = GetEndVertexAdaptor(component); - - map status; - if (CycleDetect(begin, status)) - return false; - - if (status.size() != component.size()) - return false; - - status.clear(); - end.ReverseComplement(); - if (CycleDetect(end, status)) - return false; - - if (status.size() != component.size()) - return false; - - return true; -} - -void ContigGraph::FindLongestPath(deque &component, ContigGraphPath &path) -{ - ContigGraphVertexAdaptor begin = GetBeginVertexAdaptor(component); - ContigGraphVertexAdaptor end = GetEndVertexAdaptor(component); - - deque order; - TopSort(component, order); - - map dist; - map prev; - dist[begin] = 0; - prev[begin] = ContigGraphVertexAdaptor(NULL); - - for (unsigned i = 0; i < order.size(); ++i) - { - ContigGraphVertexAdaptor current = order[i]; - for (int x = 0; x < 4; ++x) - { - if (current.out_edges()[x]) - { - ContigGraphVertexAdaptor next = GetNeighbor(current, x); - int tmp = dist[current] + (int)current.contig_size() - (int)kmer_size_ + 1; - if (current.id() != next.id() && tmp > dist[next]) - { - dist[next] = tmp; - prev[next] = current; - } - } - } - } - - deque v; - v.push_back(end); - - while (!prev[v.back()].is_null()) - v.push_back(prev[v.back()]); - reverse(v.begin(), v.end()); - - path.clear(); - for (unsigned i = 0; i < v.size(); ++i) - path.Append(v[i], -(int)kmer_size_ + 1); -} - -void ContigGraph::TopSort(deque &component, deque &order) -{ - ContigGraphVertexAdaptor begin = GetBeginVertexAdaptor(component); - - map status; - TopSortDFS(order, begin, status); - reverse(order.begin(), order.end()); -} - void ContigGraph::TopSortDFS(deque &order, ContigGraphVertexAdaptor current, map &status) { if (status[current.id()] == 0) @@ -1233,20 +657,6 @@ int ContigGraph::GetDepth(ContigGraphVertexAdaptor current, int depth, int &maxi return min(maximum, min_length); } -double ContigGraph::FindSimilarPath(ContigGraphVertexAdaptor target, ContigGraphVertexAdaptor start) -{ - if (start.status().IsDead() - || target.begin_kmer(kmer_size_-1) != start.begin_kmer(kmer_size_-1) - || target.in_edges() != start.in_edges()) - return 0; - - ContigGraphPath path; - path.Append(start, 0); - - int time = 0; - return FindSimilarPath(target, path, time); -} - double ContigGraph::FindSimilarPath(ContigGraphVertexAdaptor target, ContigGraphPath &path, int &time) { if (++time > 100) diff --git a/src/idba/contig_graph.h b/src/idba/contig_graph.h index c519474..fd49ff6 100644 --- a/src/idba/contig_graph.h +++ b/src/idba/contig_graph.h @@ -34,34 +34,11 @@ class ContigGraph explicit ContigGraph(uint32_t kmer_size = 0) : num_edges_(0), kmer_size_(kmer_size) {} - explicit ContigGraph(uint32_t kmer_size, const std::deque &contigs) - : num_edges_(0), kmer_size_(kmer_size) - { Initialize(contigs); } - - explicit ContigGraph(uint32_t kmer_size, const std::deque &contigs, - const std::deque &contig_infos) - : num_edges_(0), kmer_size_(kmer_size) - { Initialize(contigs, contig_infos); } ~ContigGraph() { clear(); } - double Binormial(int n, int m); - void InitializeTable(); - double Threshold(double k, double mean, double sd, double p_false); - - void Initialize(const std::deque &contigs) - { - std::deque contig_infos(contigs.size()); - Initialize(contigs, contig_infos); - } - void Initialize(const std::deque &contigs, const std::deque &contig_infos); - void BuildEdgeCountTable(); - - HashGraph &edge_count_table() { return edge_count_table_; } - const HashGraph &edge_count_table() const { return edge_count_table_; } - void Refresh(); void RefreshVertices(); void RefreshEdges(); @@ -83,31 +60,21 @@ class ContigGraph next.out_edges().Remove(3 - current.contig()[0]); } - void AddAllEdges(); - void RemoveAllEdges(); void ClearStatus(); void MergeSimplePaths(); - void MergeSimilarPath(); - int64_t Prune(int min_length); int64_t Trim(int min_length); int64_t Trim(int min_length, double min_cover); - int64_t RemoveStandAlone(int min_length); int64_t RemoveDeadEnd(int min_length); - int64_t RemoveDeadEnd(int min_length, double min_cover); int64_t RemoveBubble(); double IterateCoverage(int min_length, double min_cover, double max_cover, double factor = 1.1); - double IterateLocalCoverage(int min_length, double ratio, double min_cover, double max_cover, double factor = 1.1); - double IterateComponentCoverage(int min_length, double ratio, double min_cover, double max_cover, double factor = 1.1, int max_component_size = 30); - double IterateComponentCoverage2(int min_length, double ratio, double min_cover, double max_cover, double factor = 1.1, int max_component_size = 30); bool RemoveLowCoverage(double min_cover, int min_length); bool RemoveLocalLowCoverage(double min_cover, int min_length, double ratio); bool RemoveComponentLowCoverage(double min_cover, int min_length, double ratio, int max_component_size); - bool RemoveComponentLowCoverage2(double min_cover, int min_length, double ratio, int max_component_size); double LocalCoverage(ContigGraphVertexAdaptor current, int region_length); double LocalCoverageSingle(ContigGraphVertexAdaptor current, int region_length, double &num_count, int &num_kmer); @@ -133,16 +100,7 @@ class ContigGraph bool IsConverged(ContigGraphVertexAdaptor current); int64_t SplitBranches(); - void Decomposite(); void GetComponents(std::deque > &components, std::deque &component_strings); - void GetConsensus(std::deque &consensus); - - bool FindPath(ContigGraphVertexAdaptor from, ContigGraphVertexAdaptor to, ContigGraphPath &path); - - void SortVertices() - { std::sort(vertices_.begin(), vertices_.end(), CompareContigLength); Refresh(); } - - void GetContigs(std::deque &contigs, std::deque &contig_infos); std::deque &vertices() { return vertices_; } const std::deque &vertices() const { return vertices_; } @@ -260,7 +218,6 @@ class ContigGraph void TopSort(std::deque &component, std::deque &order); void TopSortDFS(std::deque &order, ContigGraphVertexAdaptor current, std::map &status); int GetDepth(ContigGraphVertexAdaptor current, int length, int &maximum, int min_length); - double FindSimilarPath(ContigGraphVertexAdaptor target, ContigGraphVertexAdaptor start); double FindSimilarPath(ContigGraphVertexAdaptor target, ContigGraphPath &path, int &time); HashMap begin_kmer_map_; @@ -269,7 +226,6 @@ class ContigGraph uint32_t kmer_size_; HashMap in_kmer_count_table_; - HashGraph edge_count_table_; }; #endif diff --git a/src/idba/contig_info.cpp b/src/idba/contig_info.cpp deleted file mode 100644 index 2b9acaa..0000000 --- a/src/idba/contig_info.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "contig_info.h" - -#include -#include -#include -#include - -#include "bit_edges.h" - -using namespace std; - - -istream &operator >>(istream &is, ContigInfo &contig_info) -{ - is.read((char *)&contig_info.in_edges_, sizeof(BitEdges)); - is.read((char *)&contig_info.out_edges_, sizeof(BitEdges)); - is.read((char *)&contig_info.kmer_size_, sizeof(uint16_t)); - is.read((char *)&contig_info.kmer_count_, sizeof(uint32_t)); - - int size = 0; - if (!is.read((char *)&size, sizeof(int))) - return is; - - contig_info.counts_.resize(size); - for (int i = 0; i < size; ++i) - is.read((char *)&contig_info.counts_[i], sizeof(SequenceCountUnitType)); - - return is; -} - -ostream &operator <<(ostream &os, const ContigInfo &contig_info) -{ - os.write((char *)&contig_info.in_edges_, sizeof(BitEdges)); - os.write((char *)&contig_info.out_edges_, sizeof(BitEdges)); - os.write((char *)&contig_info.kmer_size_, sizeof(uint16_t)); - os.write((char *)&contig_info.kmer_count_, sizeof(uint32_t)); - - int size = contig_info.counts_.size(); - os.write((char *)&size, sizeof(int)); - for (int i = 0; i < size; ++i) - os.write((char *)&contig_info.counts_[i], sizeof(SequenceCountUnitType)); - - return os; -} - -void ReadContigInfo(const string &filename, deque &contig_infos) -{ - contig_infos.clear(); - ifstream fin(filename.c_str(), ios_base::in | ios_base::binary); - ContigInfo contig_info; - while (fin >> contig_info) - contig_infos.push_back(contig_info); - -} - -void WriteContigInfo(const string &filename, const deque &contig_infos) -{ - ofstream fout(filename.c_str(), ios_base::out | ios_base::binary); - for (unsigned i = 0; i < contig_infos.size(); ++i) - fout << contig_infos[i]; -} - diff --git a/src/idba/contig_info.h b/src/idba/contig_info.h index 2560330..f4bdf49 100644 --- a/src/idba/contig_info.h +++ b/src/idba/contig_info.h @@ -30,9 +30,7 @@ class ContigBuilder; */ class ContigInfo { - friend class ContigBuilder; - friend std::istream &operator >>(std::istream &is, ContigInfo &contig_info); - friend std::ostream &operator <<(std::ostream &os, const ContigInfo &contig_info); + friend class ContigBuilder; public: ContigInfo() @@ -103,11 +101,5 @@ namespace std template <> inline void swap(ContigInfo &x, ContigInfo &y) { x.swap(y); } } -std::istream &operator >>(std::istream &is, ContigInfo &contig_info); -std::ostream &operator <<(std::ostream &os, const ContigInfo &contig_info); - -void ReadContigInfo(const std::string &filename, std::deque &contig_infos); -void WriteContigInfo(const std::string &filename, const std::deque &contig_infos); - #endif diff --git a/src/idba/hash_graph.cpp b/src/idba/hash_graph.cpp index 7e5af19..d692dd9 100644 --- a/src/idba/hash_graph.cpp +++ b/src/idba/hash_graph.cpp @@ -92,271 +92,6 @@ int64_t HashGraph::InsertUncountKmers(const Sequence &seq) return num_kmers; } -int64_t HashGraph::InsertInternalKmers(const Sequence &seq, int min_count) -{ - if (seq.size() < kmer_size_) - return 0; - - IdbaKmer kmer(kmer_size_); - int length = 0; - int64_t num_kmers = 0; - deque found_index; - deque found_kmer; - for (uint64_t i = 0; i < seq.size(); ++i) - { - kmer.ShiftAppend(seq[i]); - length = (seq[i] < 4) ? length + 1 : 0; - - if (length < (int)kmer_size_) - continue; - - HashGraphVertexAdaptor adaptor = FindVertexAdaptor(kmer); - if (adaptor.is_null()) - continue; - - if (length > (int)kmer_size_ && seq[i-kmer_size_] < 4) - adaptor.in_edges().Add(3 - seq[i-kmer_size_] + 4); - if (i+1 < seq.size() && seq[i+1] < 4) - adaptor.out_edges().Add(seq[i+1] + 4); - - if (adaptor.count() >= min_count) - { - found_index.push_back(i); - found_kmer.push_back(adaptor); - } - } - - deque flags(seq.size(), 0); - for (uint64_t i = 0; i+1 < found_index.size(); ++i) - { - HashGraphVertexAdaptor from = found_kmer[i]; //FindVertexAdaptor(found_kmer[i]); - HashGraphVertexAdaptor to = found_kmer[i+1]; //FindVertexAdaptor(found_kmer[i+1]); - - if (from.is_null() || to.is_null()) - { - cout << "error" << endl; - continue; - } - - if ((from.out_edges() & 15) == 0 && (to.in_edges() & 15) == 0) - { - for (int j = found_index[i] + 1; j < found_index[i+1]; ++j) - flags[j] = 1; - } - } - - if (found_index.size() > 0) - { - if (found_kmer.front().in_edges() == 0) - { - for (int j = kmer_size_ - 1; j < found_index.front(); ++j) - flags[j] = 1; - } - - if (found_kmer.back().out_edges() == 0) - { - for (int j = found_index.back() + 1; j < (int)seq.size(); ++j) - flags[j] = 1; - } - } - - length = 0; - for (uint64_t i = 0; i < seq.size(); ++i) - { - kmer.ShiftAppend(seq[i]); - length = (seq[i] < 4) ? length + 1 : 0; - - if (length < (int)kmer_size_) - continue; - - if (flags[i]) - { - IdbaKmer key = kmer.unique_format(); - - HashGraphVertex &vertex = vertex_table_.find_or_insert(HashGraphVertex(key)); - vertex.count() += 1; - HashGraphVertexAdaptor adaptor(&vertex, kmer != key); - - if (length > (int)kmer_size_ && seq[i-kmer_size_] < 4) - adaptor.in_edges().Add(3 - seq[i-kmer_size_] + 4); - if (i+1 < seq.size() && seq[i+1] < 4) - adaptor.out_edges().Add(seq[i+1] + 4); - - ++num_kmers; - } - } - - return num_kmers; -} - -int64_t HashGraph::InsertEdges(const Sequence &seq) -{ - if (seq.size() < kmer_size_) - return 0; - - IdbaKmer kmer(kmer_size_); - int length = 0; - int64_t num_kmers = 0; - for (uint64_t i = 0; i < seq.size(); ++i) - { - kmer.ShiftAppend(seq[i]); - length = (seq[i] < 4) ? length + 1 : 0; - - if (length < (int)kmer_size_) - continue; - - HashGraphVertexAdaptor adaptor = FindVertexAdaptor(kmer); - - if (adaptor.is_null()) - continue; - - if (length > (int)kmer_size_ && seq[i-kmer_size_] < 4) - adaptor.in_edges().Add(3 - seq[i-kmer_size_]); - if (i+1 < seq.size() && seq[i+1] < 4) - adaptor.out_edges().Add(seq[i+1]); - } - - return num_kmers; -} - -int64_t HashGraph::InsertExistKmers(const Sequence &seq) -{ - if (seq.size() < kmer_size_) - return 0; - - IdbaKmer kmer(kmer_size_); - int length = 0; - int64_t num_kmers = 0; - for (uint64_t i = 0; i < seq.size(); ++i) - { - kmer.ShiftAppend(seq[i]); - length = (seq[i] < 4) ? length + 1 : 0; - - if (length < (int)kmer_size_) - continue; - - HashGraphVertexAdaptor adaptor = FindVertexAdaptor(kmer); - - if (adaptor.is_null()) - continue; - - adaptor.count() += 1; - if (length > (int)kmer_size_ && seq[i-kmer_size_] < 4) - adaptor.in_edges().Add(3 - seq[i-kmer_size_]); - if (i+1 < seq.size() && seq[i+1] < 4) - adaptor.out_edges().Add(seq[i+1]); - } - - return num_kmers; -} - - -int64_t HashGraph::RemoveKmers(const Sequence &seq) -{ - if (seq.size() < kmer_size_) - return 0; - - IdbaKmer kmer(kmer_size_); - int length = 0; - int64_t num_kmers = 0; - for (uint64_t i = 0; i < seq.size(); ++i) - { - kmer.ShiftAppend(seq[i]); - length = (seq[i] < 4) ? length + 1 : 0; - - if (length < (int)kmer_size_) - continue; - - IdbaKmer key = kmer.unique_format(); - HashGraphVertex &vertex = *vertex_table_.find(key); - vertex.status().SetDeadFlag(); - - ++num_kmers; - } - - return num_kmers; -} - -int64_t HashGraph::ErodeEnd(int min_cover) -{ - ErodeFunc func(this, min_cover); - vertex_table_.for_each(func); - uint64_t num_eroded_vertice = RefreshVertices(); - RefreshEdges(); - ClearStatus(); - return num_eroded_vertice; -} - -int64_t HashGraph::Trim(int min_length) -{ - deque contigs; - deque contig_infos; - Assemble(contigs, contig_infos); - - for (int64_t i = 0; i < (int64_t)contigs.size(); ++i) - { - if ((contig_infos[i].out_edges() == 0 || contig_infos[i].in_edges() == 0) - && (int)contigs[i].size() < min_length + (int)kmer_size_ - 1) - RemoveKmers(contigs[i]); - } - - uint64_t old_num_vertices = vertex_table_.size(); - Refresh(); - - return old_num_vertices - vertex_table_.size(); -} - -int64_t HashGraph::RemoveDeadEnd(int min_length) -{ - uint64_t num_deadend = 0; - int l = 1; - while (true) - { - l = min(2*l, min_length); - num_deadend += Trim(l); - - if (l == min_length) - break; - } - num_deadend += Trim(min_length); - return num_deadend; -} - -int64_t HashGraph::RemoveLowCoverage(double min_cover, int min_length) -{ - uint64_t old_num_vertices = vertex_table_.size(); - - int l = 1; - while (true) - { - l = min(2*l, min_length); - - deque contigs; - deque contig_infos; - Assemble(contigs, contig_infos); - - for (int64_t i = 0; i < (int64_t)contigs.size(); ++i) - { - if (contig_infos[i].kmer_count() * 1.0 / (contigs[i].size() - kmer_size_ + 1) < min_cover - && (int)contigs[i].size() < l + (int)kmer_size_ - 1) - RemoveKmers(contigs[i]); - } - Refresh(); - Trim(l); - - if (l == min_length) - break; - } - - return old_num_vertices - vertex_table_.size(); -} - -int64_t HashGraph::Assemble(std::deque &contigs) -{ - deque contig_infos; - return Assemble(contigs, contig_infos); -} - int64_t HashGraph::Assemble(std::deque &contigs, std::deque &contig_infos) { contigs.clear(); @@ -369,86 +104,6 @@ int64_t HashGraph::Assemble(std::deque &contigs, std::deque 0 && vertex.out_edges().size() > 0) - return; - - if (vertex.status().IsDead()) - return; - - if (vertex.count() < min_cover_) - { - vertex.status().SetDeadFlag(); - - for (int strand = 0; strand < 2; ++strand) - { - HashGraphVertexAdaptor current(&vertex, strand); - for (int x = 0; x < 4; ++x) - { - if (current.out_edges()[x]) - { - current.out_edges().Remove(x); - IdbaKmer kmer = current.kmer(); - kmer.ShiftAppend(x); - HashGraphVertexAdaptor next = hash_graph_->FindVertexAdaptor(kmer); - if (!next.is_null()) - { - next.in_edges().Remove(3 - current.kmer()[0]); - (*this)(next.vertex()); - } - } - } - } - } -} - -void HashGraph::TrimFunc::operator ()(HashGraphVertex &vertex) -{ - if (vertex.in_edges().size() > 0 && vertex.out_edges().size() > 0) - return; - - if (vertex.kmer().IsPalindrome()) - return; - - if (!vertex.status().Lock(0)) - return; - - for (int strand = 0; strand < 2; ++strand) - { - HashGraphVertexAdaptor current(&vertex, strand); - - if (current.in_edges().size() > 0) - continue; - - deque path; - path.push_back(current); - for (int i = 0; i < min_length_; ++i) - { - if (current.out_edges().size() != 1) - return; - - IdbaKmer next_kmer = current.kmer(); - next_kmer.ShiftAppend(bit_operation::BitToIndex(current.out_edges())); - HashGraphVertexAdaptor next = hash_graph_->FindVertexAdaptor(next_kmer); - if (next.in_edges().size() != 1) - break; - - if (!next.status().LockPreempt(0)) - return; - - current = next; - path.push_back(current); - } - - if ((int)path.size() < min_length_) - { - for (unsigned i = 0; i < path.size(); ++i) - path[i].status().SetDeadFlag(); - } - } -} - void HashGraph::AssembleFunc::operator ()(HashGraphVertex &vertex) { if (!vertex.status().Lock(0)) @@ -462,7 +117,7 @@ void HashGraph::AssembleFunc::operator ()(HashGraphVertex &vertex) for (int strand = 0; strand < 2; ++strand) { HashGraphVertexAdaptor current(&vertex, strand); - + while (true) { HashGraphVertexAdaptor next; diff --git a/src/idba/hash_graph.h b/src/idba/hash_graph.h index b545faf..15356b4 100644 --- a/src/idba/hash_graph.h +++ b/src/idba/hash_graph.h @@ -51,7 +51,7 @@ class HashGraph iterator end() { return vertex_table_.end(); } HashGraphVertex *InsertVertex(const IdbaKmer &kmer, int count = 1) - { + { IdbaKmer key = kmer.unique_format(); HashGraphVertex &vertex = vertex_table_.find_or_insert(HashGraphVertex(key)); vertex.count() += count; @@ -93,11 +93,6 @@ class HashGraph int64_t InsertKmers(const Sequence &seq) { return InsertKmersWithPrefix(seq, 0, 0); } int64_t InsertKmersWithPrefix(const Sequence &seq, uint64_t prefix, uint64_t umask); int64_t InsertUncountKmers(const Sequence &seq); - int64_t InsertInternalKmers(const Sequence &seq, int min_count = 0); - int64_t InsertEdges(const Sequence &seq); - int64_t InsertExistKmers(const Sequence &seq); - - int64_t RemoveKmers(const Sequence &seq); void RemoveEdge(HashGraphVertexAdaptor &node, int x) { node.out_edges().Remove(x); @@ -136,27 +131,20 @@ class HashGraph void SetCountCap(int cap) { SetCountCapFunc func(cap); vertex_table_.for_each(func); } - void Refresh(int min_count = 0) + void Refresh(int min_count = 0) { RefreshVertices(min_count); RefreshEdges(); } int64_t RefreshVertices(int min_count = 0) { RefreshVerticesFunc func(min_count); return vertex_table_.remove_if(func); } void RefreshEdges() { RefreshEdgesFunc func(this); vertex_table_.for_each(func); num_edges_ = func.num_edges(); } - int64_t ErodeEnd(int min_cover); - int64_t Trim(int min_length); - int64_t RemoveDeadEnd(int min_length); - int64_t RemoveLowCoverage(double min_cover, int min_contig = (1 << 20)); - int64_t RemoveBubble(); + int64_t Assemble(std::deque &contigs, std::deque &contig_infos); - int64_t Assemble(std::deque &contigs); - int64_t Assemble(std::deque &contigs, std::deque &contig_infos); - // int64_t TrimSequentially(int min_length); // int64_t RemoveDeadEndSequentially(int min_length); // int64_t RemoveLowCoverageSequentially(double min_cover); // int64_t AssembleSequentially(std::deque &contigs); -// int64_t AssembleSequentially(std::deque &contigs, std::deque &contig_infos); +// int64_t AssembleSequentially(std::deque &contigs, std::deque &contig_infos); void reserve(uint64_t capacity) { vertex_table_.reserve(capacity); } @@ -221,7 +209,7 @@ class HashGraph IdbaKmer rev_comp = kmer; rev_comp.ReverseComplement(); - return contig.GetIdbaKmer(contig.size() - kmer_size_, kmer_size_) == rev_comp; + return contig.GetIdbaKmer(contig.size() - kmer_size_, kmer_size_) == rev_comp; } class BackupEdgesFunc @@ -349,49 +337,6 @@ class HashGraph uint64_t total_degree_; }; - class ErodeFunc - { - public: - ErodeFunc(HashGraph *hash_graph, int min_cover) - { hash_graph_ = hash_graph; min_cover_ = min_cover; } - - void operator ()(HashGraphVertex &vertex); - - private: - HashGraph *hash_graph_; - int min_cover_; - }; - - class TrimFunc - { - public: - TrimFunc(HashGraph *hash_graph, int min_length) - { hash_graph_ = hash_graph; min_length_ = min_length; } - - void operator ()(HashGraphVertex &vertex); - - private: - HashGraph *hash_graph_; - int min_length_; - }; - - class BubbleFunc - { - public: - BubbleFunc(HashGraph *hash_graph) - { hash_graph_ = hash_graph; } - ~BubbleFunc() - { } - - void operator ()(HashGraphVertex &vertex); - - std::deque &candidates() { return candidates_; } - - private: - HashGraph *hash_graph_; - std::deque candidates_; - }; - class AssembleFunc { public: @@ -400,7 +345,7 @@ class HashGraph { } ~AssembleFunc() { } - + void operator ()(HashGraphVertex &vertex); std::deque &contigs() { return contigs_; } diff --git a/src/idba/sequence.cpp b/src/idba/sequence.cpp index ee1fc8e..b78c8ba 100644 --- a/src/idba/sequence.cpp +++ b/src/idba/sequence.cpp @@ -116,14 +116,6 @@ bool Sequence::IsPalindrome() const return true; } -void Sequence::TrimN() -{ - int len = size(); - while (len > 0 && !IsValid(bases_[len-1])) - --len; - bases_.resize(len); -} - IdbaKmer Sequence::GetIdbaKmer(uint32_t offset, uint32_t kmer_size) const { IdbaKmer kmer(kmer_size); @@ -203,49 +195,10 @@ void Sequence::Decode() } } -istream &ReadFasta(istream &is, Sequence &seq, string &comment) -{ - string line; - getline(is, line); - - if (!is) - return is; - - comment = line.substr(1); - - return is >> seq; -} ostream &WriteFasta(ostream &os, const Sequence &seq, const string &comment) { return os << ">" << comment << "\n" << seq << "\n"; } -istream &ReadFastq(istream &is, Sequence &seq, string &comment, string &quality) -{ - string line; - getline(is, line); - if (!is) - return is; - - comment = line.substr(1); - - comment = line.substr(1); - is >> seq; - getline(is, line); - - quality = ""; - getline(is, line); - quality = line; -// while (is && is.peek() != '@' && getline(is, line)) -// quality += line; - is.clear(); - - return is; -} - -ostream &WriteFastq(ostream &os, const Sequence &seq, const string &comment, const string &quality) -{ - return os << "@" << comment << "\n" << seq << "\n" << "+" << "\n" << quality << "\n"; -} diff --git a/src/idba/sequence.h b/src/idba/sequence.h index 34d84ef..eebaa7f 100644 --- a/src/idba/sequence.h +++ b/src/idba/sequence.h @@ -72,7 +72,6 @@ class Sequence const Sequence &ReverseComplement(); bool IsValid() const; bool IsPalindrome() const; - void TrimN(); IdbaKmer GetIdbaKmer(uint32_t offset, uint32_t kmer_size) const; @@ -107,12 +106,8 @@ template <> inline void swap(Sequence &seq1, Sequence &seq2) { seq1.swap(seq2); } } -std::istream &ReadFasta(std::istream &is, Sequence &seq, std::string &comment); std::ostream &WriteFasta(std::ostream &os, const Sequence &seq, const std::string &comment); -std::istream &ReadFastq(std::istream &is, Sequence &seq, std::string &comment, std::string &quality); -std::ostream &WriteFastq(std::ostream &os, const Sequence &seq, const std::string &comment, const std::string &quality); - #endif From ff6fc02d6ec4f139b05eed0bbc7ffbc096cd6226 Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Fri, 29 Mar 2019 01:24:31 -0700 Subject: [PATCH 3/5] added hw accel and test option --- CHANGELOG.md | 5 + CMakeLists.txt | 43 +++++---- Dockerfile | 2 + DockerfileNoBMI2 | 12 --- README.md | 72 +++++---------- src/assembly/low_depth_remover.cpp | 3 +- src/definitions.h | 2 +- src/megahit | 142 +++++++++++++++++------------ 8 files changed, 142 insertions(+), 139 deletions(-) delete mode 100644 DockerfileNoBMI2 diff --git a/CHANGELOG.md b/CHANGELOG.md index f0da50d..762191b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +### 1.2.1-beta / 2019-03-29 PST +- Added `--no-hw-accel` option for users whose CPUs do not support BMI2/POPCNT +- Added `--test` option for testing +- Compilable with CMake 2.8 and g++4.8 + ### 1.2.0-beta / 2019-03-24 PST Heavily refactored the whole project: diff --git a/CMakeLists.txt b/CMakeLists.txt index ae02453..877eeaa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,13 @@ cmake_minimum_required(VERSION 2.8) project(megahit) set(CMAKE_VERBOSE_MAKEFILE ON) -set(CMAKE_CXX_STANDARD 11) -option(USE_POPCNT "Use popcnt hardware instructions" ON) -option(USE_BMI2 "Use bmi2 hardware instructions" ON) +if (CMAKE_VERSION VERSION_LESS "3.1") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +else () + set(CMAKE_CXX_STANDARD 11) +endif () + option(COVERAGE "Generate coverage report" OFF) option(STATIC_BUILD "Build static executation" OFF) @@ -30,7 +33,7 @@ LIST(APPEND OTHER_SOURCE if (STATIC_BUILD) set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") -endif(STATIC_BUILD) +endif (STATIC_BUILD) find_package(ZLIB REQUIRED) find_package(OpenMP REQUIRED) @@ -43,19 +46,12 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__XFILE__='\"$(subst ${CMAKE_SOURCE_DI set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ZLIB_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") -if (USE_POPCNT) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt") -endif (USE_POPCNT) - -if (USE_BMI2) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2 -DUSE_BMI2") -endif (USE_BMI2) - if (COVERAGE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") set(COV_PY "coverage run") -endif(COVERAGE) +endif (COVERAGE) +set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") @@ -63,16 +59,19 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}: ${CMAKE_CXX_FLAGS}") add_executable(megahit_core ${OTHER_SOURCE} ${ASMBL_SOURCE} ${IDBA_SOURCE} ${SDBG_SOURCE} ${LCASM_SOURCE} ${CX1_SOURCE} ${TOOLKIT_SOURCE}) - +add_executable(megahit_core_no_hw_accel ${OTHER_SOURCE} ${ASMBL_SOURCE} ${IDBA_SOURCE} ${SDBG_SOURCE} ${LCASM_SOURCE} + ${CX1_SOURCE} ${TOOLKIT_SOURCE}) +set_target_properties(megahit_core PROPERTIES COMPILE_FLAGS "-mbmi2 -DUSE_BMI2 -mpopcnt") if (STATIC_BUILD) # TODO too dirty set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -static") - set_target_properties(megahit_core PROPERTIES LINK_SEARCH_START_STATIC ON) - set_target_properties(megahit_core PROPERTIES LINK_SEARCH_END_STATIC ON) -endif(STATIC_BUILD) + set_target_properties(PROPERTIES LINK_SEARCH_START_STATIC ON) + set_target_properties(PROPERTIES LINK_SEARCH_END_STATIC ON) +endif (STATIC_BUILD) target_link_libraries(megahit_core ${ZLIB_LIBRARIES}) +target_link_libraries(megahit_core_no_hw_accel ${ZLIB_LIBRARIES}) add_custom_target( megahit @@ -83,14 +82,14 @@ set(TEST_DATA ${CMAKE_SOURCE_DIR}/test_data) add_custom_target( simple_test - COMMAND rm -rf megahit_out* - COMMAND ./megahit --12 ${TEST_DATA}/r1.il.fa.gz,${TEST_DATA}/r2.il.fa.bz2 -1 ${TEST_DATA}/r3_1.fa -2 ${TEST_DATA}/r3_2.fa -r ${TEST_DATA}/r4.fa -o megahit_out1 -t 2 --keep-tmp-files - COMMAND ./megahit --12 ${TEST_DATA}/r1.il.fa.gz,${TEST_DATA}/r2.il.fa.bz2 -1 ${TEST_DATA}/r3_1.fa -2 ${TEST_DATA}/r3_2.fa -r ${TEST_DATA}/r4.fa -o megahit_out2 -t 2 --keep-tmp-files --kmin-1pass + COMMAND ./megahit --test -t 2 --keep-tmp-files + COMMAND ./megahit --test -t 2 --keep-tmp-files --no-hw-accel + COMMAND ./megahit --test -t 2 --keep-tmp-files --kmin-1pass ) -add_dependencies(megahit megahit_core) +add_dependencies(megahit megahit_core megahit_core_no_hw_accel) add_dependencies(simple_test megahit) -install(TARGETS megahit_core DESTINATION bin) +install(TARGETS megahit_core megahit_core_no_hw_accel DESTINATION bin) install(PROGRAMS src/megahit DESTINATION bin) install(DIRECTORY test_data DESTINATION share/${PROJECT_NAME}) diff --git a/Dockerfile b/Dockerfile index 5734ec1..1004f82 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,3 +9,5 @@ WORKDIR build RUN cmake -DCMAKE_BUILD_TYPE=Release .. RUN make -j4 RUN make install +RUN megahit --test --no-hw-accel +RUN megahit --test --no-hw-accel --kmin-1pass diff --git a/DockerfileNoBMI2 b/DockerfileNoBMI2 deleted file mode 100644 index ee25a71..0000000 --- a/DockerfileNoBMI2 +++ /dev/null @@ -1,12 +0,0 @@ -FROM ubuntu:18.04 -RUN apt-get update -RUN apt-get install -y g++ make zlib1g-dev gzip bzip2 cmake python --no-install-recommends -COPY . /root/megahit -WORKDIR /root/megahit -RUN rm -rf build -RUN mkdir -p build -WORKDIR build -RUN cmake -DCMAKE_BUILD_TYPE=Release -DUSE_BMI2=OFF -DUSE_POPCNT=OFF .. -RUN make -j4 -RUN make simple_test -RUN make install diff --git a/README.md b/README.md index 93a3c70..71ded29 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,62 @@ MEGAHIT ======= -[![BioConda Install](https://img.shields.io/conda/dn/bioconda/megahit.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/megahit) -[![Build Status](https://travis-ci.org/voutcn/megahit.svg?branch=master)](https://travis-ci.org/voutcn/megahit) -[![codecov](https://codecov.io/gh/voutcn/megahit/branch/master/graph/badge.svg)](https://codecov.io/gh/voutcn/megahit) +[![BioConda Install](https://img.shields.io/conda/dn/bioconda/megahit.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/megahit) [![Build Status](https://travis-ci.org/voutcn/megahit.svg?branch=master)](https://travis-ci.org/voutcn/megahit) [![codecov](https://codecov.io/gh/voutcn/megahit/branch/master/graph/badge.svg)](https://codecov.io/gh/voutcn/megahit) MEGAHIT is an ultra-fast and memory-efficient NGS assembler. It is optimized for metagenomes, but also works well on generic single genome assembly (small or mammalian size) and single-cell assembly. *News* ------ -MEGAHIT v1.2.0-beta is released. Main changes include +MEGAHIT v1.2.1-beta is released. Compared to v1.1.x, its changes include - faster and more memory-efficient than before, by using [BMI2 instructions](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets), [sparsepp](https://github.com/greg7mdp/sparsepp) and [xxhash](https://github.com/Cyan4973/xxHash) - refactored with C++11 features - use CMake to build the project - removal of GPU support -It is highly recommended to use v1.2.0-beta. Past versions can be found at the [release](https://github.com/voutcn/megahit/releases) page. +It is highly recommended to use v1.2.1-beta. Past versions can be found at the [release](https://github.com/voutcn/megahit/releases) page. Getting Started --------------- -### Run with docker (recommended) +### Running with Linux binaries or docker images (recommended) ``` bash -# in the directory with your input reads -docker run -v $(pwd):/workspace -w /workspace --user $(id -u):$(id -g) vout/megahit \ - megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR +https://github.com/voutcn/megahit/releases/download/v1.2.0-beta/MEGAHIT-1.2.0-beta-Linux-static.tar.gz +tar zvxf MEGAHIT-1.2.1-beta-Linux-static +cd MEGAHIT-1.2.1-beta-Linux-static/bin/ +./megahit --test # run on a toy dataset +./megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR ``` -If your CPU does not support BMI2 or POPCNT instructions (typically you will see exit code -4), please use the docker images from `vout/megahit-no-popcnt-bmi2`. i.e. +If your CPU does not support BMI2 and/or POPCNT, you may see "exit code -4". In this case, run MEGAHIT with `--no-hw-accel` option. + +You can also run MEGAHIT with its docker images. ``` bash # in the directory with your input reads -docker run -v $(pwd):/workspace -w /workspace --user $(id -u):$(id -g) vout/megahit-no-popcnt-bmi2 \ +docker run -v $(pwd):/workspace -w /workspace --user $(id -u):$(id -g) vout/megahit \ megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR ``` -### Build from source +### Building from source #### Prerequisites -- For building: zlib, cmake, gcc/g++ >= 5 +- For building: zlib, cmake >= 2.8, g++ >= 4.8.4 - For running: gzip and bzip2 -#### Build and test - -1. Obtain the source code - - ``` bash - git clone https://github.com/voutcn/megahit.git - cd megahit - git submodule update --init - ``` - -2. Create the build directory - - ``` bash - mkdir build && cd build - ``` - -3. Run cmake - - ``` bash - cmake -DCMAKE_BUILD_TYPE=release .. - ``` - - If your CPU does not support BMI2 instructions (uncommon), run the following command instead - - cmake -DUSE_BMI2=OFF -DCMAKE_BUILD_TYPE=release .. - -4. Compile & test - - ``` bash - make -j4 - make simple_test # will test MEGAHIT with a toy dataset - ``` - - If you need to install Megahit to your PATH, run `make install` in the build directory. +``` bash +git clone https://github.com/voutcn/megahit.git +cd megahit +git submodule update --init +mkdir build && cd build +cmake .. -DCMAKE_BUILD_TYPE=Release # add -DCMAKE_INSTALL_PREFIX=YOUR_PREFIX if needed +make -j4 +make simple_test # will test MEGAHIT with a toy dataset +# make install if needed +``` Usage ----- diff --git a/src/assembly/low_depth_remover.cpp b/src/assembly/low_depth_remover.cpp index 902c52a..241e3c6 100644 --- a/src/assembly/low_depth_remover.cpp +++ b/src/assembly/low_depth_remover.cpp @@ -40,7 +40,7 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len, bool permanent_rm, uint32_t *num_removed) { bool is_changed = false; bool need_refresh = false; - auto &removed = *num_removed; + uint32_t removed = 0; #pragma omp parallel for reduction(+: removed) for (UnitigGraph::size_type i = 0; i < graph.size(); ++i) { @@ -79,6 +79,7 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len, bool set_changed = !permanent_rm; graph.Refresh(set_changed); } + *num_removed = removed; return is_changed; } diff --git a/src/definitions.h b/src/definitions.h index f3de3b0..ecac67e 100644 --- a/src/definitions.h +++ b/src/definitions.h @@ -24,7 +24,7 @@ #include #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "v1.2.0-beta" +#define PACKAGE_VERSION "v1.2.1-beta" #endif #include "sdbg/sdbg_def.h" diff --git a/src/megahit b/src/megahit index 9e218fe..d581f07 100755 --- a/src/megahit +++ b/src/megahit @@ -85,6 +85,7 @@ Optional Arguments: --mem-flag SdBG builder memory mode 0: minimum; 1: moderate; others: use all memory specified by '-m/--memory' [1] -t/--num-cpu-threads number of CPU threads [# of logical processors] + --no-hw-accel run MEGAHIT without BMI2 and POPCNT hardware instructions Output options: -o/--out-dir output directory [./megahit_out] @@ -96,6 +97,7 @@ Optional Arguments: Other Arguments: --continue continue a MEGAHIT run from its last available check point. please set the output directory correctly when using this option. + --test run MEGAHIT on a toy test dataset -h/--help print the usage message -v/--version print version ''' @@ -121,7 +123,7 @@ class Options: self.auto_k = True self.set_list_by_min_max_step = False self.min_count = 2 - self.bin_dir = sys.path[0] + "/" + self.megahit_core = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'megahit_core') self.max_tip_len = -1 self.no_mercy = False self.no_local = False @@ -149,6 +151,7 @@ class Options: self.graph_only = False self.force_overwrite = False self.lib = None + self.test = False opt = Options() @@ -157,13 +160,13 @@ cp = 0 def log_file_name(): if opt.out_prefix == "": - return opt.out_dir + "log" + return os.path.join(opt.out_dir, "log") else: - return opt.out_dir + opt.out_prefix + ".log" + return os.path.join(opt.out_dir, opt.out_prefix + ".log") def opt_file_name(): - return opt.out_dir + "opts.txt" + return os.path.join(opt.out_dir, "opts.txt") def make_out_dir(): @@ -171,11 +174,11 @@ def make_out_dir(): os.mkdir(opt.out_dir) if not opt.continue_mode: - if opt.temp_dir == opt.out_dir + "tmp/": + if opt.temp_dir == os.path.join(opt.out_dir, "tmp"): if not os.path.exists(opt.temp_dir): os.mkdir(opt.temp_dir) else: - opt.temp_dir = tempfile.mkdtemp(dir=opt.temp_dir, prefix='megahit_tmp_') + "/" + opt.temp_dir = tempfile.mkdtemp(dir=opt.temp_dir, prefix='megahit_tmp_') if not os.path.exists(opt.contig_dir): os.mkdir(opt.contig_dir) @@ -214,9 +217,11 @@ def parse_opt(argv): "version", "out-prefix=", "presets=", + "test", + "no-hw-accel", "force", - "graph-only", # deprecated + "graph-only", "max-read-len=", "no-low-local", "cpu-only", @@ -235,7 +240,7 @@ def parse_opt(argv): exit(0) elif option in ("-o", "--out-dir"): if opt.continue_mode == 0: - opt.out_dir = value + "/" + opt.out_dir = value elif option in ("-m", "--memory"): opt.host_mem = float(value) elif option == "--min-contig-len": @@ -294,7 +299,7 @@ def parse_opt(argv): opt.out_prefix = value elif option == "--tmp-dir": if opt.continue_mode == 0: - opt.temp_dir = value + "/" + opt.temp_dir = value elif option in ("--cpu-only", "-l", "--max-read-len", "--no-low-local", "--use-gpu", "--gpu-mem"): print("option {} is deprecated!".format(option), file=sys.stderr) @@ -315,13 +320,24 @@ def parse_opt(argv): opt.graph_only = True elif option in ("-f", "--force"): opt.force_overwrite = True - + elif option == "--test": + opt.test = True + elif option == "--no-hw-accel": + opt.megahit_core = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'megahit_core_no_hw_accel') else: raise Usage("Invalid option {}".format(option)) + if opt.test: + test_data_dir = find_test_data_path() + opt.pe12 = [os.path.join(test_data_dir, "r1.il.fa.gz"), os.path.join(test_data_dir, "r2.il.fa.bz2")] + opt.pe1 = [os.path.join(test_data_dir, "r3_1.fa")] + opt.pe2 = [os.path.join(test_data_dir, "r3_2.fa")] + opt.se = [os.path.join(test_data_dir, "r4.fa")] + opt.out_dir = os.path.join(tempfile.mkdtemp(prefix='megahit_test'), 'out') + if opt.temp_dir == "": - opt.temp_dir = opt.out_dir + "tmp/" - opt.contig_dir = opt.out_dir + "intermediate_contigs/" + opt.temp_dir = os.path.join(opt.out_dir, "tmp") + opt.contig_dir = os.path.join(opt.out_dir, "intermediate_contigs") if need_continue: prepare_continue() @@ -422,6 +438,16 @@ def check_opt(): opt.bubble_level = 2 +def find_test_data_path(): + script_path = os.path.dirname(os.path.realpath(__file__)) + for path in [os.path.join(script_path, '..'), os.path.join(script_path, '../share/megahit')]: + test_data_dir = os.path.join(path, 'test_data') + if os.path.isdir(test_data_dir) and all( + f in os.listdir(test_data_dir) for f in ["r1.il.fa.gz", "r2.il.fa.bz2", "r3_1.fa", "r3_2.fa", "r4.fa"]): + return test_data_dir + raise Usage("Test data not found! Script path = {}".format(script_path)) + + def check_reads(): # reads global cp @@ -462,7 +488,7 @@ def write_opt(argv): def prepare_continue(): global opt # out_dir is already set if not os.path.exists(opt_file_name()): - print("Cannot find " + opt.out_dir + "opts.txt", file=sys.stderr) + print("Cannot find " + opt_file_name(), file=sys.stderr) print("Please check whether the output directory is correctly set by \"-o\"", file=sys.stderr) print("Now switching to normal mode.", file=sys.stderr) return @@ -487,8 +513,8 @@ def prepare_continue(): parse_opt(argv) opt.last_cp = -1 - if os.path.exists(opt.temp_dir + "cp.txt"): - with open(opt.temp_dir + "cp.txt", "r") as cpf: + if os.path.exists(os.path.join(opt.temp_dir, "cp.txt")): + with open(os.path.join(opt.temp_dir, "cp.txt"), "r") as cpf: for line in cpf: a = line.strip().split() if len(a) == 2 and a[1] == "done": @@ -498,7 +524,7 @@ def prepare_continue(): def check_bin(): - if not os.path.exists(opt.bin_dir + "megahit_core"): + if not os.path.exists(opt.megahit_core): raise Usage("Cannot find megahit_core, please recompile.") @@ -507,21 +533,21 @@ def get_version(): global usage_message global max_k_allowed megahit_version_str = "MEGAHIT " + \ - subprocess.Popen([opt.bin_dir + "megahit_core", "dumpversion"], + subprocess.Popen([opt.megahit_core, "dumpversion"], stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8') - max_k_allowed = int(subprocess.Popen([opt.bin_dir + "megahit_core", "kmax"], + max_k_allowed = int(subprocess.Popen([opt.megahit_core, "kmax"], stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8')) usage_message = usage_message % (max_k_allowed, max_k_allowed, max_k_allowed) def graph_prefix(kmer_k): - if not os.path.exists(opt.temp_dir + "k" + str(kmer_k)): - os.mkdir(opt.temp_dir + "k" + str(kmer_k)) - return opt.temp_dir + "k" + str(kmer_k) + "/" + str(kmer_k) + if not os.path.exists(os.path.join(opt.temp_dir, "k" + str(kmer_k))): + os.mkdir(os.path.join(opt.temp_dir, "k" + str(kmer_k))) + return os.path.join(opt.temp_dir, "k" + str(kmer_k), str(kmer_k)) def contig_prefix(kmer_k): - return opt.contig_dir + "k" + str(kmer_k) + return os.path.join(opt.contig_dir, "k" + str(kmer_k)) def delect_file_if_exist(file_name): @@ -552,7 +578,7 @@ def delete_tmp_after_iter(kmer_k): def write_cp(): global cp - cpf = open(opt.temp_dir + "cp.txt", "a") + cpf = open(os.path.join(opt.temp_dir, "cp.txt"), "a") print(str(cp) + "\t" + "done", file=cpf) cp = cp + 1 cpf.close() @@ -568,24 +594,24 @@ def inpipe_cmd(file_name): def write_lib(): - opt.lib = opt.temp_dir + "reads.lib" + opt.lib = os.path.join(opt.temp_dir, "reads.lib") lib = open(opt.lib, "w") for i in range(0, len(opt.pe12)): print(opt.pe12[i], file=lib) if inpipe_cmd(opt.pe12[i]) != "": - print("interleaved " + opt.temp_dir + "inpipe.pe12." + str(i), file=lib) + print("interleaved " + os.path.join(opt.temp_dir, "inpipe.pe12." + str(i)), file=lib) else: print("interleaved " + opt.pe12[i], file=lib) for i in range(0, len(opt.pe1)): if inpipe_cmd(opt.pe1[i]) != "": - f1 = opt.temp_dir + "inpipe.pe1." + str(i) + f1 = os.path.join(opt.temp_dir, "inpipe.pe1." + str(i)) else: f1 = opt.pe1[i] if inpipe_cmd(opt.pe2[i]) != "": - f2 = opt.temp_dir + "inpipe.pe2." + str(i) + f2 = os.path.join(opt.temp_dir, "inpipe.pe2." + str(i)) else: f2 = opt.pe2[i] @@ -596,7 +622,7 @@ def write_lib(): print(opt.se[i], file=lib) if inpipe_cmd(opt.se[i]) != "": - print("se " + opt.temp_dir + "inpipe.se." + str(i), file=lib) + print("se " + os.path.join(opt.temp_dir, "inpipe.se." + str(i)), file=lib) else: print("se " + opt.se[i], file=lib) @@ -610,7 +636,7 @@ def write_lib(): def build_lib(): global cp if (not opt.continue_mode) or (cp > opt.last_cp): - build_lib_cmd = [opt.bin_dir + "megahit_core", "buildlib", + build_lib_cmd = [opt.megahit_core, "buildlib", opt.lib, opt.lib] @@ -620,26 +646,26 @@ def build_lib(): # create inpipe for i in range(len(opt.pe12)): if inpipe_cmd(opt.pe12[i]) != "": - delect_file_if_exist(opt.temp_dir + "inpipe.pe12." + str(i)) - os.mkfifo(opt.temp_dir + "inpipe.pe12." + str(i)) - fifos.append(opt.temp_dir + "inpipe.pe12." + str(i)) + delect_file_if_exist(os.path.join(opt.temp_dir, "inpipe.pe12." + str(i))) + os.mkfifo(os.path.join(opt.temp_dir, "inpipe.pe12." + str(i))) + fifos.append(os.path.join(opt.temp_dir, "inpipe.pe12." + str(i))) for i in range(len(opt.pe1)): if inpipe_cmd(opt.pe1[i]) != "": - delect_file_if_exist(opt.temp_dir + "inpipe.pe1." + str(i)) - os.mkfifo(opt.temp_dir + "inpipe.pe1." + str(i)) - fifos.append(opt.temp_dir + "inpipe.pe1." + str(i)) + delect_file_if_exist(os.path.join(opt.temp_dir, "inpipe.pe1." + str(i))) + os.mkfifo(os.path.join(opt.temp_dir, "inpipe.pe1." + str(i))) + fifos.append(os.path.join(opt.temp_dir, "inpipe.pe1." + str(i))) if inpipe_cmd(opt.pe2[i]) != "": - delect_file_if_exist(opt.temp_dir + "inpipe.pe2." + str(i)) - os.mkfifo(opt.temp_dir + "inpipe.pe2." + str(i)) - fifos.append(opt.temp_dir + "inpipe.pe2." + str(i)) + delect_file_if_exist(os.path.join(opt.temp_dir, "inpipe.pe2." + str(i))) + os.mkfifo(os.path.join(opt.temp_dir, "inpipe.pe2." + str(i))) + fifos.append(os.path.join(opt.temp_dir, "inpipe.pe2." + str(i))) for i in range(len(opt.se)): if inpipe_cmd(opt.se[i]) != "": - delect_file_if_exist(opt.temp_dir + "inpipe.se." + str(i)) - os.mkfifo(opt.temp_dir + "inpipe.se." + str(i)) - fifos.append(opt.temp_dir + "inpipe.se." + str(i)) + delect_file_if_exist(os.path.join(opt.temp_dir, "inpipe.se." + str(i))) + os.mkfifo(os.path.join(opt.temp_dir, "inpipe.se." + str(i))) + fifos.append(os.path.join(opt.temp_dir, "inpipe.se." + str(i))) logging.info("--- [%s] Converting reads to binary library ---" % datetime.now().strftime("%c")) logging.debug("%s" % " ".join(build_lib_cmd)) @@ -657,27 +683,28 @@ def build_lib(): for i in range(len(opt.pe12)): if inpipe_cmd(opt.pe12[i]) != "": ip_thread12 = subprocess.Popen( - inpipe_cmd(opt.pe12[i]) + " > " + opt.temp_dir + "inpipe.pe12." + str(i), shell=True, + inpipe_cmd(opt.pe12[i]) + " > " + os.path.join(opt.temp_dir, "inpipe.pe12." + str(i)), + shell=True, preexec_fn=os.setsid) pipes.append(ip_thread12) for i in range(len(opt.pe1)): if inpipe_cmd(opt.pe1[i]) != "": ip_thread1 = subprocess.Popen( - inpipe_cmd(opt.pe1[i]) + " > " + opt.temp_dir + "inpipe.pe1." + str(i), shell=True, + inpipe_cmd(opt.pe1[i]) + " > " + os.path.join(opt.temp_dir, "inpipe.pe1." + str(i)), shell=True, preexec_fn=os.setsid) pipes.append(ip_thread1) if inpipe_cmd(opt.pe2[i]) != "": ip_thread2 = subprocess.Popen( - inpipe_cmd(opt.pe2[i]) + " > " + opt.temp_dir + "inpipe.pe2." + str(i), shell=True, + inpipe_cmd(opt.pe2[i]) + " > " + os.path.join(opt.temp_dir, "inpipe.pe2." + str(i)), shell=True, preexec_fn=os.setsid) pipes.append(ip_thread2) for i in range(len(opt.se)): if inpipe_cmd(opt.se[i]) != "": ip_thread_se = subprocess.Popen( - inpipe_cmd(opt.se[i]) + " > " + opt.temp_dir + "inpipe.se." + str(i), shell=True, + inpipe_cmd(opt.se[i]) + " > " + os.path.join(opt.temp_dir, "inpipe.se." + str(i)), shell=True, preexec_fn=os.setsid) pipes.append(ip_thread_se) @@ -756,11 +783,11 @@ def build_first_graph(): "--read_lib_file", opt.lib] if opt.kmin_1pass: - cmd = [opt.bin_dir + "megahit_core", "read2sdbg"] + count_opt + cmd = [opt.megahit_core, "read2sdbg"] + count_opt if not opt.no_mercy: cmd.append("--need_mercy") else: - cmd = [opt.bin_dir + "megahit_core", "count"] + count_opt + cmd = [opt.megahit_core, "count"] + count_opt try: if opt.kmin_1pass: @@ -809,7 +836,7 @@ def build_graph(kmer_k, kmer_from): "-k", str(kmer_k), "--kmer_from", str(kmer_from)] - build_cmd = [opt.bin_dir + "megahit_core", "seq2sdbg"] + build_comm_opt + build_cmd = [opt.megahit_core, "seq2sdbg"] + build_comm_opt file_size = 0 @@ -875,7 +902,7 @@ def iterate(cur_k, step): if (not opt.continue_mode) or (cp > opt.last_cp): next_k = cur_k + step - iterate_cmd = [opt.bin_dir + "megahit_core", "iterate", + iterate_cmd = [opt.megahit_core, "iterate", "-c", contig_prefix(cur_k) + ".contigs.fa", "-b", contig_prefix(cur_k) + ".bubble_seq.fa", "-t", str(opt.num_cpu_threads), @@ -922,7 +949,7 @@ def assemble(cur_k): if opt.max_tip_len >= 0: min_standalone = max(opt.max_tip_len + opt.k_max - 1, opt.min_contig_len) - assembly_cmd = [opt.bin_dir + "megahit_core", "assemble", + assembly_cmd = [opt.megahit_core, "assemble", "-s", graph_prefix(cur_k), "-o", contig_prefix(cur_k), "-t", str(opt.num_cpu_threads), @@ -980,7 +1007,7 @@ def assemble(cur_k): def local_assemble(cur_k, kmer_to): global cp if (not opt.continue_mode) or (cp > opt.last_cp): - la_cmd = [opt.bin_dir + "megahit_core", "local", + la_cmd = [opt.megahit_core, "local", "-c", contig_prefix(cur_k) + ".contigs.fa", "-l", opt.lib, "-t", str(opt.num_cpu_threads), @@ -1017,14 +1044,14 @@ def merge_final(): global cp if (not opt.continue_mode) or (cp > opt.last_cp): logging.info("--- [%s] Merging to output final contigs ---" % (datetime.now().strftime("%c"))) - final_contig_name = opt.out_dir + "final.contigs.fa" + final_contig_name = os.path.join(opt.out_dir, "final.contigs.fa") if opt.out_prefix != "": - final_contig_name = opt.out_dir + opt.out_prefix + ".contigs.fa" + final_contig_name = os.path.join(opt.out_dir, opt.out_prefix + ".contigs.fa") with open(final_contig_name, "w") as final_contigs: - merge_cmd = "cat " + opt.contig_dir + "*.final.contigs.fa " + \ + merge_cmd = "cat " + opt.contig_dir + "/*.final.contigs.fa " + \ contig_prefix(opt.k_max) + ".contigs.fa | " + \ - opt.bin_dir + "megahit_core filterbylen " + str(opt.min_contig_len) + opt.megahit_core + " filterbylen " + str(opt.min_contig_len) p = subprocess.Popen(merge_cmd, shell=True, stdout=final_contigs, stderr=subprocess.PIPE) while True: @@ -1117,7 +1144,10 @@ def main(argv=None): if not opt.keep_tmp_files and os.path.exists(opt.temp_dir): shutil.rmtree(opt.temp_dir) - open(opt.out_dir + "done", "w").close() + open(os.path.join(opt.out_dir, "done"), "w").close() + + if not opt.keep_tmp_files and opt.test: + shutil.rmtree(opt.out_dir) logging.info("--- [%s] ALL DONE. Time elapsed: %f seconds ---" % ( datetime.now().strftime("%c"), time.time() - start_time)) From 162a3dd32fd283118bd8ecdf3ca160511711e080 Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Sun, 31 Mar 2019 00:18:34 -0700 Subject: [PATCH 4/5] further removed unused idba functions & polish readme for new release --- .travis.yml | 7 +- CHANGELOG.md | 2 +- CMakeLists.txt | 4 +- README.md | 21 +-- src/idba/contig_graph.cpp | 279 -------------------------------------- src/idba/contig_graph.h | 60 -------- src/megahit | 4 + 7 files changed, 23 insertions(+), 354 deletions(-) diff --git a/.travis.yml b/.travis.yml index 34f7b98..4831f08 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,12 +6,15 @@ language: python python: - "2.7" - "3.4" -script: git submodule update --init && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Debug -DCOVERAGE=ON && make -j2 simple_test +script: git submodule update --init && mkdir build && cd build && \ + cmake .. -DCMAKE_BUILD_TYPE=Debug -DCOVERAGE=ON && make -j2 simple_test && \ + sudo make install && megahit --test && megahit --test --kmin-1pass && \ + megahit --test --no-hw-accel after_success: # Create lcov report - lcov --capture --directory . --output-file coverage.info - lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files - - lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files + - lcov --remove coverage.info '*xxHash/*' --output-file coverage.info # filter xxhash-files - lcov --remove coverage.info '*sparsepp/*' --output-file coverage.info # filter sparsepp-files - lcov --list coverage.info # debug info # Uploading report to CodeCov diff --git a/CHANGELOG.md b/CHANGELOG.md index 762191b..634d38b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -### 1.2.1-beta / 2019-03-29 PST +### 1.2.1-beta / 2019-03-30 PST - Added `--no-hw-accel` option for users whose CPUs do not support BMI2/POPCNT - Added `--test` option for testing - Compilable with CMake 2.8 and g++4.8 diff --git a/CMakeLists.txt b/CMakeLists.txt index 877eeaa..15b083f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,8 +66,8 @@ set_target_properties(megahit_core PROPERTIES COMPILE_FLAGS "-mbmi2 -DUSE_BMI2 - if (STATIC_BUILD) # TODO too dirty set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -static") - set_target_properties(PROPERTIES LINK_SEARCH_START_STATIC ON) - set_target_properties(PROPERTIES LINK_SEARCH_END_STATIC ON) + set_target_properties(megahit_core megahit_core_no_hw_accel PROPERTIES LINK_SEARCH_START_STATIC ON) + set_target_properties(megahit_core megahit_core_no_hw_accel PROPERTIES LINK_SEARCH_END_STATIC ON) endif (STATIC_BUILD) target_link_libraries(megahit_core ${ZLIB_LIBRARIES}) diff --git a/README.md b/README.md index 71ded29..bc97232 100644 --- a/README.md +++ b/README.md @@ -5,25 +5,26 @@ MEGAHIT MEGAHIT is an ultra-fast and memory-efficient NGS assembler. It is optimized for metagenomes, but also works well on generic single genome assembly (small or mammalian size) and single-cell assembly. -*News* +*News: try v1.2.x!* ------ -MEGAHIT v1.2.1-beta is released. Compared to v1.1.x, its changes include +MEGAHIT v1.2.x (beta) is released. Compared to v1.1.x, its changes include - faster and more memory-efficient than before, by using [BMI2 instructions](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets), [sparsepp](https://github.com/greg7mdp/sparsepp) and [xxhash](https://github.com/Cyan4973/xxHash) - refactored with C++11 features - use CMake to build the project - removal of GPU support -It is highly recommended to use v1.2.1-beta. Past versions can be found at the [release](https://github.com/voutcn/megahit/releases) page. +Please follow the instructions in [Getting Started](#gst) to try this new version. +Past versions can be found at the [release](https://github.com/voutcn/megahit/releases) page. -Getting Started +Getting Started --------------- ### Running with Linux binaries or docker images (recommended) -``` bash -https://github.com/voutcn/megahit/releases/download/v1.2.0-beta/MEGAHIT-1.2.0-beta-Linux-static.tar.gz +``` sh +https://github.com/voutcn/megahit/releases/download/v1.2.1-beta/MEGAHIT-1.2.1-beta-Linux-static.tar.gz tar zvxf MEGAHIT-1.2.1-beta-Linux-static cd MEGAHIT-1.2.1-beta-Linux-static/bin/ ./megahit --test # run on a toy dataset @@ -34,7 +35,7 @@ If your CPU does not support BMI2 and/or POPCNT, you may see "exit code -4". In You can also run MEGAHIT with its docker images. -``` bash +``` sh # in the directory with your input reads docker run -v $(pwd):/workspace -w /workspace --user $(id -u):$(id -g) vout/megahit \ megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR @@ -47,7 +48,7 @@ docker run -v $(pwd):/workspace -w /workspace --user $(id -u):$(id -g) vout/mega - For building: zlib, cmake >= 2.8, g++ >= 4.8.4 - For running: gzip and bzip2 -``` bash +``` sh git clone https://github.com/voutcn/megahit.git cd megahit git submodule update --init @@ -63,13 +64,13 @@ Usage To run MEGAHIT with default parameters: -``` bash +``` sh megahit -1 YOUR_PE_READ_1.fq.gz -2 YOUR_PE_READ_2.fq.gz -r YOUR_SE_READ.fq.gz -o YOUR_OUTPUT_DIR ``` If you did not install Megahit to your PATH, just run Megahit with full-path, e.g. -``` bash +``` sh /PATH/TO/MEGAHIT/build/megahit ``` diff --git a/src/idba/contig_graph.cpp b/src/idba/contig_graph.cpp index 2c813ec..ba94193 100644 --- a/src/idba/contig_graph.cpp +++ b/src/idba/contig_graph.cpp @@ -125,31 +125,6 @@ int64_t ContigGraph::Trim(int min_length) return old_num_vertices - vertices_.size(); } -int64_t ContigGraph::Trim(int min_length, double min_cover) -{ - uint64_t old_num_vertices = vertices_.size(); - - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - if (vertices_[i].contig().size() == kmer_size_ - && vertices_[i].contig().IsPalindrome()) - continue; - - if ((vertices_[i].in_edges().empty() || vertices_[i].out_edges().empty()) - && vertices_[i].contig().size() < min_length + kmer_size_ - 1 - && (vertices_[i].in_edges().size() + vertices_[i].out_edges().size() <= 1 - && vertices_[i].coverage() < min_cover) - ) - { - vertices_[i].status().SetDeadFlag(); - } - } - Refresh(); - MergeSimplePaths(); - - return old_num_vertices - vertices_.size(); -} - int64_t ContigGraph::RemoveDeadEnd(int min_length) { uint64_t num_deadend = 0; @@ -361,226 +336,6 @@ struct SearchNode int label; }; -bool ContigGraph::IsConverged(ContigGraphVertexAdaptor current) -{ - int TimeLimit = 1000; - int DistanceLimit = 300; - map reachable; - queue qu; - - for (int x = 0; x < 4; ++x) - { - if (current.out_edges()[x]) - { - SearchNode search_node; - search_node.node = GetNeighbor(current, x); - search_node.distance = -(int)kmer_size_ + 1; - search_node.label = x; - - //if (!search_node.node.status().IsDead()) - qu.push(search_node); - } - } - - int time = 0; - while (!qu.empty()) - { - if (time++ == TimeLimit) - break; - - SearchNode search_node = qu.front(); - qu.pop(); - - reachable[search_node.node] |= (1 << search_node.label); - - if (reachable[search_node.node] == (int)current.out_edges()) - { - return true; - } - - if (search_node.distance + (int)search_node.node.contig_size() - (int)kmer_size_ + 1 > DistanceLimit) - continue; - - for (int x = 0; x < 4; ++x) - { - if (search_node.node.out_edges()[x]) - { - ContigGraphVertexAdaptor next = GetNeighbor(search_node.node, x); - - SearchNode new_search_node; - new_search_node.node = next; - new_search_node.distance = search_node.distance + (int)search_node.node.contig_size() - (int)kmer_size_ + 1; - new_search_node.label = search_node.label; - -// if (new_search_node.node == current) -// continue; - - if (reachable[new_search_node.node] & (1 << new_search_node.label)) - continue; - - //if (!new_search_node.node.status().IsDead()) - qu.push(new_search_node); - } - } - } - - return false; -} - -int64_t ContigGraph::SplitBranches() -{ - //cout << num_vertices() << " " << num_edges() << endl; - - deque branches; - - int64_t count = 0; - for (int64_t i = 0; i < (int64_t)vertices_.size(); ++i) - { - ContigGraphVertexAdaptor current(&vertices_[i]); - for (int strand = 0; strand < 2; ++strand) - { - if (!IsConverged(current)) - { - ++count; - - branches.push_back(current); - } - - current.ReverseComplement(); - } - } - - set sources; - - for (unsigned i = 0; i < branches.size(); ++i) - sources.insert(branches[i]); - - for (unsigned i = 0; i < branches.size(); ++i) - { - ContigGraphVertexAdaptor u = branches[i]; - - for (int x = 0; x < 4; ++x) - { - if (u.out_edges()[x]) - { - ContigGraphVertexAdaptor v = GetNeighbor(u, x); - v.ReverseComplement(); - if (sources.find(v) == sources.end()) - { - sources.insert(v); - branches.push_back(v); - } - //RemoveEdge(u, x); - } - } - } - - for (unsigned i = 0; i < branches.size(); ++i) - { - ContigGraphVertexAdaptor u = branches[i]; - - for (int x = 0; x < 4; ++x) - { - if (u.out_edges()[x]) - RemoveEdge(u, x); - } - } - - RefreshEdges(); - - return count; -} - -void ContigGraph::GetComponents(deque > &components, deque &component_strings) -{ - components.clear(); - component_strings.clear(); - - for (unsigned i = 0; i < vertices().size(); ++i) - { - if (vertices()[i].status().IsUsed()) - continue; - - deque qu; - qu.push_back(ContigGraphVertexAdaptor(&vertices()[i], 0)); - vertices()[i].status().SetUsedFlag(); - - stringstream ss; - for (int index = 0; index < (int)qu.size(); ++index) - { - ContigGraphVertexAdaptor current = qu[index]; - - for (int strand = 0; strand < 2; ++strand) - { - //for (connection_list_iterator p = connections()[current].begin(); p != connections()[current].end(); ++p) - for (int x = 0; x < 4; ++x) - { - if (current.out_edges()[x]) - { - ContigGraphVertexAdaptor next = GetNeighbor(current, x); - - if (strand == 0) - { - ss << current.id() << "_" << current.is_reverse() << "_" << current.contig_size() << "_" << current.kmer_count() << " " - << next.id() << "_" << next.is_reverse() << "_" << next.contig_size() << "_" << next.kmer_count() << endl; - - if (!next.status().IsUsed()) - qu.push_back(next); - } - else - { - ss << next.id() << "_" << next.is_reverse() << "_" << next.contig_size() << "_" << next.kmer_count() << " " - << current.id() << "_" << current.is_reverse() << "_" << current.contig_size() << "_" << current.kmer_count() << endl; - - if (!next.status().IsUsed()) - qu.push_back(next.ReverseComplement()); - } - - next.status().SetUsedFlag(); - } - } - - current.ReverseComplement(); - } - } - - components.push_back(qu); - component_strings.push_back(ss.str()); - } - - ClearStatus(); -} - -double ContigGraph::GetSimilarity(const Sequence &a, const Sequence &b) -{ - vector > table; - table.resize(a.size() + 1); - for (unsigned i = 0; i < table.size(); ++i) - table[i].resize(b.size() + 1); - - for (int i = 0; i <= (int)a.size(); ++i) - table[i][0] = i; - - for (int j = 0; j <= (int)b.size(); ++j) - table[0][j] = j; - - for (int i = 1; i <= (int)a.size(); ++i) - { - for (int j = 1; j <= (int)b.size(); ++j) - { - table[i][j] = 1000000000; - if (table[i-1][j] + 1 < table[i][j]) - table[i][j] = table[i-1][j] + 1; - if (table[i][j-1] + 1 < table[i][j]) - table[i][j] = table[i][j-1] + 1; - if (table[i-1][j-1] + (a[i-1] != b[j-1]) < table[i][j]) - table[i][j] = table[i-1][j-1] + (a[i-1] != b[j-1]); - } - } - - return 1.0 - 1.0 * table[a.size()][b.size()] / max(a.size(), b.size()); -} - void ContigGraph::BuildBeginIdbaKmerMap() { begin_kmer_map_.clear(); @@ -657,37 +412,3 @@ int ContigGraph::GetDepth(ContigGraphVertexAdaptor current, int depth, int &maxi return min(maximum, min_length); } -double ContigGraph::FindSimilarPath(ContigGraphVertexAdaptor target, ContigGraphPath &path, int &time) -{ - if (++time > 100) - return 0; - - ContigGraphVertexAdaptor current = path.back(); - if (path.size() > 1.1 * target.contig_size()) - return 0; - else if (current.end_kmer(kmer_size_-1) == target.end_kmer(kmer_size_-1) - && current.out_edges() == target.out_edges()) - { - Sequence contig; - ContigInfo contig_info; - path.Assemble(contig, contig_info); - return GetSimilarity(target.contig(), contig); - } - else - { - double maximum = 0; - deque neighbors; - GetNeighbors(current, neighbors); - for (unsigned i = 0; i < neighbors.size(); ++i) - { - path.Append(neighbors[i], -kmer_size_+1); - double tmp = FindSimilarPath(target, path, time); - path.Pop(); - if (tmp > maximum) - maximum = tmp; - } - return maximum; - } -} - - diff --git a/src/idba/contig_graph.h b/src/idba/contig_graph.h index fd49ff6..b57c2da 100644 --- a/src/idba/contig_graph.h +++ b/src/idba/contig_graph.h @@ -65,7 +65,6 @@ class ContigGraph void MergeSimplePaths(); int64_t Trim(int min_length); - int64_t Trim(int min_length, double min_cover); int64_t RemoveDeadEnd(int min_length); int64_t RemoveBubble(); @@ -73,11 +72,6 @@ class ContigGraph double IterateCoverage(int min_length, double min_cover, double max_cover, double factor = 1.1); bool RemoveLowCoverage(double min_cover, int min_length); - bool RemoveLocalLowCoverage(double min_cover, int min_length, double ratio); - bool RemoveComponentLowCoverage(double min_cover, int min_length, double ratio, int max_component_size); - - double LocalCoverage(ContigGraphVertexAdaptor current, int region_length); - double LocalCoverageSingle(ContigGraphVertexAdaptor current, int region_length, double &num_count, int &num_kmer); int64_t Assemble(std::deque &contigs, std::deque &contig_infos); @@ -98,10 +92,6 @@ class ContigGraph } } - bool IsConverged(ContigGraphVertexAdaptor current); - int64_t SplitBranches(); - void GetComponents(std::deque > &components, std::deque &component_strings); - std::deque &vertices() { return vertices_; } const std::deque &vertices() const { return vertices_; } @@ -131,20 +121,6 @@ class ContigGraph ContigGraph(const ContigGraph &); const ContigGraph &operator =(const ContigGraph &); - static bool CompareContigLength(const ContigGraphVertex &x, const ContigGraphVertex &y) - { return x.contig_size() > y.contig_size(); } - - static bool CompareContigCoverage(const ContigGraphVertexAdaptor &x, const ContigGraphVertexAdaptor &y) - { return x.coverage() > y.coverage(); } - - static double GetSimilarity(ContigGraphVertexAdaptor &x, ContigGraphVertexAdaptor &y) - { - Sequence a = x.contig(); - Sequence b = y.contig(); - return GetSimilarity(a, b); - } - static double GetSimilarity(const Sequence &x, const Sequence &y); - void BuildBeginIdbaKmerMap(); bool GetNextVertexAdaptor(ContigGraphVertexAdaptor ¤t, ContigGraphVertexAdaptor &next) @@ -180,45 +156,9 @@ class ContigGraph return ContigGraphVertexAdaptor(); } - ContigGraphVertexAdaptor GetBeginVertexAdaptor(std::deque &component) - { - ContigGraphVertexAdaptor begin; - for (unsigned i = 0; i < component.size(); ++i) - { - if (component[i].in_edges() == 0) - { - if (begin.is_null()) - begin = component[i]; - else - return ContigGraphVertexAdaptor(NULL); - } - } - return begin; - } - - ContigGraphVertexAdaptor GetEndVertexAdaptor(std::deque &component) - { - ContigGraphVertexAdaptor end; - for (unsigned i = 0; i < component.size(); ++i) - { - if (component[i].out_edges() == 0) - { - if (end.is_null()) - end = component[i]; - else - return ContigGraphVertexAdaptor(NULL); - } - } - return end; - } - - bool IsValid(std::deque &component); bool CycleDetect(ContigGraphVertexAdaptor current, std::map &status); - void FindLongestPath(std::deque &component, ContigGraphPath &path); - void TopSort(std::deque &component, std::deque &order); void TopSortDFS(std::deque &order, ContigGraphVertexAdaptor current, std::map &status); int GetDepth(ContigGraphVertexAdaptor current, int length, int &maximum, int min_length); - double FindSimilarPath(ContigGraphVertexAdaptor target, ContigGraphPath &path, int &time); HashMap begin_kmer_map_; std::deque vertices_; diff --git a/src/megahit b/src/megahit index d581f07..2c14bab 100755 --- a/src/megahit +++ b/src/megahit @@ -459,6 +459,10 @@ def check_reads(): raise Usage("Cannot find file " + r) if opt.input_cmd == "" and len(opt.pe1 + opt.pe2 + opt.se + opt.pe12) == 0: raise Usage("No input files or input command!") + opt.pe1 = [os.path.abspath(f) for f in opt.pe1] + opt.pe2 = [os.path.abspath(f) for f in opt.pe2] + opt.pe12 = [os.path.abspath(f) for f in opt.pe12] + opt.se = [os.path.abspath(f) for f in opt.se] write_cp() From b790190edef5b96475075259893f7bc67f599c5c Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Sun, 31 Mar 2019 00:25:11 -0700 Subject: [PATCH 5/5] fix travis ci --- .travis.yml | 12 ++++++++---- CMakeLists.txt | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4831f08..80159e9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,10 +6,14 @@ language: python python: - "2.7" - "3.4" -script: git submodule update --init && mkdir build && cd build && \ - cmake .. -DCMAKE_BUILD_TYPE=Debug -DCOVERAGE=ON && make -j2 simple_test && \ - sudo make install && megahit --test && megahit --test --kmin-1pass && \ - megahit --test --no-hw-accel +script: + - mkdir build + - cd build && cmake .. -DCMAKE_BUILD_TYPE=Debug -DCOVERAGE=ON + - make -j2 simple_test + - sudo make install + - megahit --test + - megahit --test --kmin-1pass + - megahit --test --no-hw-accel after_success: # Create lcov report - lcov --capture --directory . --output-file coverage.info diff --git a/CMakeLists.txt b/CMakeLists.txt index 15b083f..0a2bd75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ if (COVERAGE) endif (COVERAGE) set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") -set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") +set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O1") message(STATUS "Build type: ${CMAKE_BUILD_TYPE}: ${CMAKE_CXX_FLAGS}")