Skip to content

Commit

Permalink
Set up CI with Azure Pipelines (#246)
Browse files Browse the repository at this point in the history
* Set up CI with Azure Pipelines

[skip ci]

* Update azure-pipelines.yml for Azure Pipelines

* Update azure-pipelines.yml for Azure Pipelines

* fix for azure-pipeline

* polish azure-pipelines

* try assembly on real data with azure pipelines

* fix segmentation fault when there exits length-0 sequences

* remove an incorrect assert in UnitigGraph::Refresh

* add partial assembly to azure pipelines

* add codecov report to azure pipeline

* add more tests and update codecov setting

* release 1.2.9
  • Loading branch information
voutcn authored Oct 14, 2019
1 parent d97f037 commit 36f5d45
Show file tree
Hide file tree
Showing 16 changed files with 160 additions and 33 deletions.
15 changes: 1 addition & 14 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,4 @@ script:
- sudo make install
- megahit --test
- megahit --test --kmin-1pass
- megahit --test --no-hw-accel
after_success:
# Create lcov report
- wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
- tar zvxf lcov-1.14.tar.gz
- export PATH=lcov-1.14/bin/:${PATH}
- lcov --capture --directory . --output-file coverage.info
- lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
- lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
- lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
- lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
- lcov --list coverage.info # debug info
# Uploading report to CodeCov
- bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports"
- megahit --test --no-hw-accelo || echo "Codecov did not collect coverage reports"
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
### 1.2.9 / 2019-10-13
- Fix segfault triggered by length-zero sequences
- Fix memory detection problem for some outdated MacOS versions
- Fix an incorrect assertion in unitig graph refreshing
- Added `--verbose` to output full log to the screen

### 1.2.8 / 2019-08-10
- Add intermediate `megahit_core_popcnt` for CPUs that have ABM but not BMI2
- Allow new assembly task with `--continue`
Expand Down
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,11 @@ add_custom_target(
simple_test
COMMAND ./megahit --test -t 2
COMMAND MEGAHIT_NUM_MERCY_FACTOR=1.5 ./megahit --test -t 4 --mem-flag 0 --no-hw-accel
COMMAND ./megahit --test -t 2 --kmin-1pass
COMMAND rm -rf test-random && python3 ../test_data/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
COMMAND ./megahit --test -t 2 --kmin-1pass --prune-level 3 --prune-depth 0
COMMAND rm -rf test-random && python3 ${TEST_DATA}/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
COMMAND rm -rf test-fastg && ./megahit --test -t 2 --mem-flag 2 --keep-tmp-files -o test-fastg
COMMAND rm -rf test-empty && ./megahit -r ${TEST_DATA}/empty.fa -o test-empty
COMMAND rm -rf test-no-contig && ./megahit -r ${TEST_DATA}/r4.fa -o test-no-contig
COMMAND ./megahit_toolkit contig2fastg 59 test-fastg/intermediate_contigs/k59.contigs.fa > 59.fastg
COMMAND ./megahit_toolkit readstat < test-fastg/intermediate_contigs/k59.contigs.fa
)
Expand Down
90 changes: 90 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
jobs:
- job: ubuntu_1604
pool:
vmImage: 'Ubuntu-16.04'
strategy:
matrix:
python36:
python.version: '3.6'
build.type: 'Debug'
sanitizer: 'ON'
static: 'OFF'
Python27:
python.version: '2.7'
build.type: 'Release'
sanitizer: 'OFF'
static: 'ON'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
addToPath: true
- script: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DSTATIC_BUILD=$(static) ..
make simple_test -j `nproc`
displayName: 'build and test'
- job: macos
strategy:
matrix:
1013:
image: macos-10.13
latest:
image: macos-latest
pool:
vmImage: $(image)
steps:
- script: |
brew install cmake gcc@9 zlib bzip2
displayName: 'install dependencies'
- script: |
mkdir build
cd build
CC=gcc-9 CXX=g++-9 cmake ..
make simple_test -j `sysctl -n hw.physicalcpu`
displayName: 'build and test'
- job: assembly
timeoutInMinutes: 0
strategy:
matrix:
codecov:
build.type: 'Release'
sanitizer: 'OFF'
coverage: 'ON'
sanitize:
build.type: 'Debug'
sanitizer: 'ON'
coverage: 'OFF'
pool:
vmImage: 'Ubuntu-16.04'
steps:
- script: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DCOVERAGE=$(coverage) ..
make -j `nproc`
make simple_test
sudo make install
displayName: 'build and test'
- script: |
curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_1.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 1.fq.gz
curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_2.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 2.fq.gz
megahit --presets meta-large -1 1.fq.gz -2 2.fq.gz -m5e9 --verbose
displayName: 'assemble'
- script: |
if [ $(coverage) = 'ON' ]; then
wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
tar zvxf lcov-1.14.tar.gz
export PATH=lcov-1.14/bin/:${PATH}
lcov --capture --directory . --output-file coverage.info
lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
lcov --list coverage.info # debug info
bash <(curl -s https://codecov.io/bash) -f coverage.info -t $(CODECOV_TOKEN) || echo "Codecov did not collect coverage reports"
fi
displayName: 'codecov'
8 changes: 8 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
coverage:
status:
patch:
default:
target: 0%
project:
default:
target: 0%
1 change: 0 additions & 1 deletion src/assembly/low_depth_remover.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,5 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len,
uint32_t IterateLocalLowDepth(UnitigGraph &graph, double min_depth,
uint32_t min_len, uint32_t local_width,
double local_ratio, bool permanent_rm = false);
uint32_t RemoveLowDepth(UnitigGraph &graph, double min_depth);

#endif // MEGAHIT_LOW_DEPTH_REMOVER_H
1 change: 0 additions & 1 deletion src/assembly/unitig_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ void UnitigGraph::Refresh(bool set_changed) {
while (true) {
next_adapter = NextSimplePathAdapter(next_adapter);
assert(next_adapter.IsValid());
assert(!(next_adapter.GetFlag() & kDeleted));
if (next_adapter.b() == adapter.b()) {
break;
}
Expand Down
2 changes: 1 addition & 1 deletion src/definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include <stdint.h>

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "v1.2.8"
#define PACKAGE_VERSION "v1.2.9"
#endif

#include "sdbg/sdbg_def.h"
Expand Down
6 changes: 4 additions & 2 deletions src/localasm/local_assemble.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,11 @@ void MapToContigs(const HashMapper &mapper,

void AssembleAndOutput(const HashMapper &mapper, const SeqPackage &read_pkg,
MappingResultCollector &result_collector,
const std::string &output_file, int32_t local_range,
const std::string &output_file,
const int32_t local_range,
const LocalAsmOption &opt) {
size_t min_num_reads = local_range / read_pkg.max_length();
const size_t min_num_reads = read_pkg.max_length() > 0 ?
local_range / read_pkg.max_length(): 1;
xinfo("Minimum number of reads to do local assembly: {}\n", min_num_reads);

Sequence seq, contig_end;
Expand Down
24 changes: 19 additions & 5 deletions src/megahit
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ class Options:
self.pe12 = []
self.se = []
self.presets = ''
self.verbose = False

@property
def log_file_name(self):
Expand Down Expand Up @@ -321,6 +322,7 @@ def parse_option(argv):
'mem-flag=',
'continue',
'version',
'verbose',
'out-prefix=',
'presets=',
'test',
Expand Down Expand Up @@ -398,6 +400,8 @@ def parse_option(argv):
elif option in ('-v', '--version'):
print(software_info.megahit_version)
exit(0)
elif option == '--verbose':
opt.verbose = True
elif option == '--continue':
opt.continue_mode = True
elif option == '--out-prefix':
Expand Down Expand Up @@ -591,11 +595,19 @@ def check_reads():


def detect_available_mem():
psize = os.sysconf('SC_PAGE_SIZE')
pcount = os.sysconf('SC_PHYS_PAGES')
if psize < 0 or pcount < 0:
raise SystemError
return psize * pcount
try:
psize = os.sysconf('SC_PAGE_SIZE')
pcount = os.sysconf('SC_PHYS_PAGES')
if psize < 0 or pcount < 0:
raise SystemError
return psize * pcount
except ValueError:
if sys.platform.find("darwin") != -1:
return int(float(os.popen("sysctl hw.memsize").readlines()[0].split()[1]))
elif sys.platform.find("linux") != -1:
return int(float(os.popen("free").readlines()[1].split()[1]) * 1024)
else:
raise


def cpu_dispatch():
Expand Down Expand Up @@ -926,6 +938,8 @@ def merge_final(final_k):


def run_sub_command(cmd, msg, verbose=False):
if opt.verbose:
verbose = True
logger.info(msg)
logger.debug('command %s' % ' '.join(cmd))

Expand Down
9 changes: 5 additions & 4 deletions src/sequence/io/binary_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

class BinaryReader : public BaseSequenceReader {
public:
explicit BinaryReader(const std::string &filename) : is_(filename) {
explicit BinaryReader(const std::string &filename)
: is_(filename), buf_(120) {
if (is_.bad()) {
throw std::invalid_argument("Failed to open file " + filename);
}
Expand All @@ -33,14 +34,14 @@ class BinaryReader : public BaseSequenceReader {
if (buf_.size() < num_words) {
buf_.resize(num_words);
}
auto bytes_read = reader_.read(&buf_[0], num_words);
auto bytes_read = reader_.read(buf_.data(), num_words);
assert(bytes_read == num_words * sizeof(buf_[0]));
(void)(bytes_read);

if (!reverse) {
pkg->AppendCompactSequence(&buf_[0], read_len);
pkg->AppendCompactSequence(buf_.data(), read_len);
} else {
pkg->AppendReversedCompactSequence(&buf_[0], read_len);
pkg->AppendReversedCompactSequence(buf_.data(), read_len);
}

num_bases += read_len;
Expand Down
7 changes: 5 additions & 2 deletions src/sequence/kmer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ class Kmer {
using word_type = TWord;
static const unsigned kNumWords = NWords;

Kmer() { std::memset(data_, 0, sizeof(data_)); }
Kmer() {
static_assert(sizeof(*this) == sizeof(TWord) * NWords, "");
std::memset(data_, 0, sizeof(data_));
}

Kmer(const Kmer &kmer) { std::memcpy(data_, kmer.data_, sizeof(data_)); }

Expand Down Expand Up @@ -214,7 +217,7 @@ class Kmer {

private:
word_type data_[kNumWords];
} __attribute__((packed));
};

namespace std {
template <const unsigned NumWords, typename T>
Expand Down
13 changes: 13 additions & 0 deletions src/sequence/sequence_package.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ class SequencePackage {
}

void AppendStringSequence(const char *from, const char *to, unsigned len) {
if (len == 0) {
// Fake a sequence whose length is 1, as we need all sequences' length > 0
// to make `GetSeqID` working
auto fake_sequence = "A";
return AppendStringSequence(fake_sequence, fake_sequence + 1, 1);
}
UpdateLength(len);
std::ptrdiff_t step = from < to ? 1 : -1;
for (auto ptr = from; ptr != to; ptr += step) {
Expand All @@ -267,7 +273,14 @@ class SequencePackage {
}

void AppendCompactSequence(const TWord *ptr, unsigned len, bool rev) {
if (len == 0) {
// Fake a sequence whose length is 1, as we need all sequences' length > 0
// to make `GetSeqID` working
TWord fake_sequence = 0;
return AppendCompactSequence(&fake_sequence, 1, false);
}
UpdateLength(len);

if (rev) {
auto rptr = ptr + DivCeiling(len, kBasesPerWord) - 1;
unsigned bases_in_last_word = len % kBasesPerWord;
Expand Down
3 changes: 2 additions & 1 deletion src/sorting/base_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ void BaseSequenceSortingEngine::Lv0PrepareThreadPartition() {
int64_t average = meta_.num_sequences / n_threads_;
meta.seq_from = t * average;
meta.seq_to = t < n_threads_ - 1 ? (t + 1) * average : meta_.num_sequences;
meta.offset_base = Lv0EncodeDiffBase(meta.seq_from);
meta.offset_base = meta.seq_from < meta_.num_sequences ?
Lv0EncodeDiffBase(meta.seq_from) : std::numeric_limits<int64_t>::max();
}

for (unsigned i = 0; i < kNumBuckets; ++i) {
Expand Down
Empty file added test_data/empty.fa
Empty file.
2 changes: 2 additions & 0 deletions test_data/r4.fa
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
>megahit_ref_example_42_498_1:0:0_2:0:0_12b/1
GGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGGGGGGGCAGAGATGACGGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCG
>1
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN

0 comments on commit 36f5d45

Please sign in to comment.