Skip to content

Commit

Permalink
Merge pull request #208 from voutcn/refactor
Browse files Browse the repository at this point in the history
cpu dispatch: detect BMI2 and POPCNT automatically
  • Loading branch information
voutcn committed Apr 14, 2019
2 parents 99bae17 + e2f8655 commit 6e5b638
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 18 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
### 1.2.2-beta / 2019-04-16 PST
- Automatically detect POPCNT/BMI2 and select the correct megahit_core binary

### 1.2.1-beta / 2019-03-30 PST
- Added `--no-hw-accel` option for users whose CPUs do not support BMI2/POPCNT
- Added `--test` option for testing
Expand Down
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ Past versions can be found at the [release](https://github.com/voutcn/megahit/re
### Running with Linux binaries or docker images (recommended)

``` sh
https://github.com/voutcn/megahit/releases/download/v1.2.1-beta/MEGAHIT-1.2.1-beta-Linux-static.tar.gz
tar zvxf MEGAHIT-1.2.1-beta-Linux-static
cd MEGAHIT-1.2.1-beta-Linux-static/bin/
https://github.com/voutcn/megahit/releases/download/v1.2.2-beta/MEGAHIT-1.2.2-beta-Linux-static.tar.gz
tar zvxf MEGAHIT-1.2.2-beta-Linux-static
cd MEGAHIT-1.2.2-beta-Linux-static/bin/
./megahit --test # run on a toy dataset
./megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR
```

If your CPU does not support BMI2 and/or POPCNT, you may see "exit code -4". In this case, run MEGAHIT with `--no-hw-accel` option.

You can also run MEGAHIT with its docker images.

``` sh
Expand Down
2 changes: 1 addition & 1 deletion src/definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <stdint.h>

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "v1.2.1-beta"
#define PACKAGE_VERSION "v1.2.2-beta"
#endif

#include "sdbg/sdbg_def.h"
Expand Down
4 changes: 4 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cstdlib>
#include <cstring>

#include "utils/cpu_dispatch.h"
#include "definitions.h"

int main_assemble(int argc, char **argv);
Expand Down Expand Up @@ -55,6 +56,7 @@ void show_help(const char *program_name) {
" trim trim low quality tail of fastq reads\n"
" filterbylen filter contigs by length\n"
" extractpe extract pe reads and se reads from fasta/fastq files\n"
" checkcpu check whether the run-time CPU supports POPCNT and BMI2"
" dumpversion dump version\n"
" kmax the largest k value supported\n",
program_name);
Expand Down Expand Up @@ -90,6 +92,8 @@ int main(int argc, char **argv) {
return main_filter_by_len(argc - 1, argv + 1);
} else if (strcmp(argv[1], "extractpe") == 0) {
return main_extract_pe(argc - 1, argv + 1);
} else if (strcmp(argv[1], "checkcpu") == 0) {
printf("%d\n", HasPopcnt() && HasBmi2());
} else if (strcmp(argv[1], "dumpversion") == 0) {
printf("%s\n", PACKAGE_VERSION);
} else if (strcmp(argv[1], "kmax") == 0) {
Expand Down
41 changes: 29 additions & 12 deletions src/megahit
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,21 @@ def prepare_continue():
print("Continue from check point " + str(opt.last_cp), file=sys.stderr)


def cpu_dispatch():
if opt.megahit_core.endswith("megahit_core_no_hw_accel"):
logging.info("--- [%s] Using megahit_core without POPCNT and BMI2 support, "
"because --no-hw-accel option manually specified" % (datetime.now().strftime("%c")))
else:
has_hw_accel = subprocess.Popen([opt.megahit_core, "checkcpu"],
stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8')
if has_hw_accel == '1':
logging.info("--- [%s] Using megahit_core with POPCNT and BMI2 support" % (datetime.now().strftime("%c")))
else:
logging.info("--- [%s] Using megahit_core without POPCNT and BMI2 support, "
"because the features not detected by CPUID " % (datetime.now().strftime("%c")))
opt.megahit_core = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'megahit_core_no_hw_accel')


def check_bin():
if not os.path.exists(opt.megahit_core):
raise Usage("Cannot find megahit_core, please recompile.")
Expand Down Expand Up @@ -671,7 +686,7 @@ def build_lib():
os.mkfifo(os.path.join(opt.temp_dir, "inpipe.se." + str(i)))
fifos.append(os.path.join(opt.temp_dir, "inpipe.se." + str(i)))

logging.info("--- [%s] Converting reads to binary library ---" % datetime.now().strftime("%c"))
logging.info("--- [%s] Converting reads to binary library " % datetime.now().strftime("%c"))
logging.debug("%s" % " ".join(build_lib_cmd))

if opt.input_cmd != "":
Expand Down Expand Up @@ -795,11 +810,11 @@ def build_first_graph():

try:
if opt.kmin_1pass:
logging.info("--- [%s] Extracting solid (k+1)-mers and building sdbg for k = %d ---" % (
logging.info("--- [%s] Extracting solid (k+1)-mers and building sdbg for k = %d " % (
datetime.now().strftime("%c"), opt.k_min))
else:
logging.info(
"--- [%s] Extracting solid (k+1)-mers for k = %d ---" % (datetime.now().strftime("%c"), opt.k_min))
"--- [%s] Extracting solid (k+1)-mers for k = %d " % (datetime.now().strftime("%c"), opt.k_min))

logging.debug("cmd: %s" % " ".join(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down Expand Up @@ -871,7 +886,7 @@ def build_graph(kmer_k, kmer_from):
build_cmd.append("--need_mercy")

try:
logging.info("--- [%s] Building graph for k = %d ---" % (datetime.now().strftime("%c"), kmer_k))
logging.info("--- [%s] Building graph for k = %d " % (datetime.now().strftime("%c"), kmer_k))
logging.debug("%s" % " ".join(build_cmd))

p = subprocess.Popen(build_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down Expand Up @@ -916,7 +931,7 @@ def iterate(cur_k, step):
"-r", opt.lib + ".bin"]

try:
logging.info("--- [%s] Extracting iterative edges from k = %d to %d ---" % (
logging.info("--- [%s] Extracting iterative edges from k = %d to %d " % (
datetime.now().strftime("%c"), cur_k, next_k))
logging.debug("cmd: %s" % " ".join(iterate_cmd))

Expand Down Expand Up @@ -981,7 +996,7 @@ def assemble(cur_k):

try:
logging.info(
"--- [%s] Assembling contigs from SdBG for k = %d ---" % (datetime.now().strftime("%c"), cur_k))
"--- [%s] Assembling contigs from SdBG for k = %d " % (datetime.now().strftime("%c"), cur_k))
logging.debug("cmd: %s" % " ".join(assembly_cmd))

p = subprocess.Popen(assembly_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down Expand Up @@ -1018,7 +1033,7 @@ def local_assemble(cur_k, kmer_to):
"-o", contig_prefix(cur_k) + ".local.fa",
"--kmax", str(kmer_to)]
try:
logging.info("--- [%s] Local assembling k = %d ---" % (datetime.now().strftime("%c"), cur_k))
logging.info("--- [%s] Local assembling k = %d " % (datetime.now().strftime("%c"), cur_k))
logging.debug("cmd: %s" % " ".join(la_cmd))

p = subprocess.Popen(la_cmd, stderr=subprocess.PIPE)
Expand Down Expand Up @@ -1047,7 +1062,7 @@ def local_assemble(cur_k, kmer_to):
def merge_final():
global cp
if (not opt.continue_mode) or (cp > opt.last_cp):
logging.info("--- [%s] Merging to output final contigs ---" % (datetime.now().strftime("%c")))
logging.info("--- [%s] Merging to output final contigs " % (datetime.now().strftime("%c")))
final_contig_name = os.path.join(opt.out_dir, "final.contigs.fa")
if opt.out_prefix != "":
final_contig_name = os.path.join(opt.out_dir, opt.out_prefix + ".contigs.fa")
Expand Down Expand Up @@ -1101,21 +1116,23 @@ def main(argv=None):
logging.getLogger('').addHandler(console)

logging.info(megahit_version_str)
logging.info("--- [%s] Start assembly. Number of CPU threads %d ---" % (
logging.info("--- [%s] Start assembly. Number of CPU threads %d " % (
datetime.now().strftime("%c"), opt.num_cpu_threads))
logging.info("--- [%s] Available memory: %d, used: %d" % (
datetime.now().strftime("%c"), detect_available_mem(), opt.host_mem))

cpu_dispatch()

if not opt.continue_mode:
write_opt(argv[1:]) # for --continue

write_lib()
build_lib()

if set_max_k_by_lib():
logging.info("--- [%s] k-max reset to: %d ---" % (datetime.now().strftime("%c"), opt.k_max))
logging.info("--- [%s] k-max reset to: %d " % (datetime.now().strftime("%c"), opt.k_max))

logging.info("--- [%s] k list: %s ---" % (datetime.now().strftime("%c"), ','.join(map(str, opt.k_list))))
logging.info("--- [%s] k list: %s " % (datetime.now().strftime("%c"), ','.join(map(str, opt.k_list))))

build_first_graph()

Expand Down Expand Up @@ -1153,7 +1170,7 @@ def main(argv=None):
if not opt.keep_tmp_files and opt.test:
shutil.rmtree(opt.out_dir)

logging.info("--- [%s] ALL DONE. Time elapsed: %f seconds ---" % (
logging.info("--- [%s] ALL DONE. Time elapsed: %f seconds " % (
datetime.now().strftime("%c"), time.time() - start_time))

except Usage as err:
Expand Down
33 changes: 33 additions & 0 deletions src/utils/cpu_dispatch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//
// Created by vout on 4/14/19.
//

#ifndef MEGAHIT_CPU_DISPATCH_H
#define MEGAHIT_CPU_DISPATCH_H

inline bool HasPopcnt() {
unsigned eax, ebx, ecx, edx;
#ifdef _MSC_VER
int cpuid[4];
__cpuid(cpuid, 1);
eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3];
#else
asm volatile("cpuid\n\t" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1));
#endif
return ecx >> 23U & 1U;
}

inline bool HasBmi2() {
unsigned eax, ebx, ecx, edx;
#ifdef _MSC_VER
int cpuid[4];
__cpuidex(cpuid, 7, 0);
eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3];
#else
asm volatile("cpuid\n\t" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (7), "2" (0));
#endif
return ebx >> 8U & 1U;
}


#endif //MEGAHIT_CPU_DISPATCH_H

0 comments on commit 6e5b638

Please sign in to comment.