From 6bb28c0eb18c1b50487a69cb37ac20ffb8fdb3a9 Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Sun, 14 Apr 2019 12:47:20 -0700 Subject: [PATCH 1/2] cpu dispatch: detect BMI2 and POPCNT automatically --- CHANGELOG.md | 3 +++ README.md | 2 -- src/definitions.h | 2 +- src/main.cpp | 4 ++++ src/megahit | 41 ++++++++++++++++++++++++++++------------ src/utils/cpu_dispatch.h | 33 ++++++++++++++++++++++++++++++++ 6 files changed, 70 insertions(+), 15 deletions(-) create mode 100644 src/utils/cpu_dispatch.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 634d38b..8ef1f13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +### 1.2.2-beta / 2019-04-16 PST +- Automatically detect POPCNT/BMI2 and select the correct megahit_core binary + ### 1.2.1-beta / 2019-03-30 PST - Added `--no-hw-accel` option for users whose CPUs do not support BMI2/POPCNT - Added `--test` option for testing diff --git a/README.md b/README.md index bc97232..3e64705 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,6 @@ cd MEGAHIT-1.2.1-beta-Linux-static/bin/ ./megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR ``` -If your CPU does not support BMI2 and/or POPCNT, you may see "exit code -4". In this case, run MEGAHIT with `--no-hw-accel` option. - You can also run MEGAHIT with its docker images. ``` sh diff --git a/src/definitions.h b/src/definitions.h index ecac67e..b5c451b 100644 --- a/src/definitions.h +++ b/src/definitions.h @@ -24,7 +24,7 @@ #include #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "v1.2.1-beta" +#define PACKAGE_VERSION "v1.2.2-beta" #endif #include "sdbg/sdbg_def.h" diff --git a/src/main.cpp b/src/main.cpp index acf7b16..29a68d2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,6 +22,7 @@ #include #include +#include "utils/cpu_dispatch.h" #include "definitions.h" int main_assemble(int argc, char **argv); @@ -55,6 +56,7 @@ void show_help(const char *program_name) { " trim trim low quality tail of fastq reads\n" " filterbylen filter contigs by length\n" " extractpe extract pe reads and se reads from fasta/fastq files\n" + " checkcpu check whether the run-time CPU supports POPCNT and BMI2" " dumpversion dump version\n" " kmax the largest k value supported\n", program_name); @@ -90,6 +92,8 @@ int main(int argc, char **argv) { return main_filter_by_len(argc - 1, argv + 1); } else if (strcmp(argv[1], "extractpe") == 0) { return main_extract_pe(argc - 1, argv + 1); + } else if (strcmp(argv[1], "checkcpu") == 0) { + printf("%d\n", HasPopcnt() && HasBmi2()); } else if (strcmp(argv[1], "dumpversion") == 0) { printf("%s\n", PACKAGE_VERSION); } else if (strcmp(argv[1], "kmax") == 0) { diff --git a/src/megahit b/src/megahit index 2c14bab..8cd1bad 100755 --- a/src/megahit +++ b/src/megahit @@ -527,6 +527,21 @@ def prepare_continue(): print("Continue from check point " + str(opt.last_cp), file=sys.stderr) +def cpu_dispatch(): + if opt.megahit_core.endswith("megahit_core_no_hw_accel"): + logging.info("--- [%s] Using megahit_core without POPCNT and BMI2 support, " + "because --no-hw-accel option manually specified" % (datetime.now().strftime("%c"))) + else: + has_hw_accel = subprocess.Popen([opt.megahit_core, "checkcpu"], + stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8') + if has_hw_accel == '1': + logging.info("--- [%s] Using megahit_core with POPCNT and BMI2 support" % (datetime.now().strftime("%c"))) + else: + logging.info("--- [%s] Using megahit_core without POPCNT and BMI2 support, " + "because the features not detected by CPUID " % (datetime.now().strftime("%c"))) + opt.megahit_core = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'megahit_core_no_hw_accel') + + def check_bin(): if not os.path.exists(opt.megahit_core): raise Usage("Cannot find megahit_core, please recompile.") @@ -671,7 +686,7 @@ def build_lib(): os.mkfifo(os.path.join(opt.temp_dir, "inpipe.se." + str(i))) fifos.append(os.path.join(opt.temp_dir, "inpipe.se." + str(i))) - logging.info("--- [%s] Converting reads to binary library ---" % datetime.now().strftime("%c")) + logging.info("--- [%s] Converting reads to binary library " % datetime.now().strftime("%c")) logging.debug("%s" % " ".join(build_lib_cmd)) if opt.input_cmd != "": @@ -795,11 +810,11 @@ def build_first_graph(): try: if opt.kmin_1pass: - logging.info("--- [%s] Extracting solid (k+1)-mers and building sdbg for k = %d ---" % ( + logging.info("--- [%s] Extracting solid (k+1)-mers and building sdbg for k = %d " % ( datetime.now().strftime("%c"), opt.k_min)) else: logging.info( - "--- [%s] Extracting solid (k+1)-mers for k = %d ---" % (datetime.now().strftime("%c"), opt.k_min)) + "--- [%s] Extracting solid (k+1)-mers for k = %d " % (datetime.now().strftime("%c"), opt.k_min)) logging.debug("cmd: %s" % " ".join(cmd)) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -871,7 +886,7 @@ def build_graph(kmer_k, kmer_from): build_cmd.append("--need_mercy") try: - logging.info("--- [%s] Building graph for k = %d ---" % (datetime.now().strftime("%c"), kmer_k)) + logging.info("--- [%s] Building graph for k = %d " % (datetime.now().strftime("%c"), kmer_k)) logging.debug("%s" % " ".join(build_cmd)) p = subprocess.Popen(build_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -916,7 +931,7 @@ def iterate(cur_k, step): "-r", opt.lib + ".bin"] try: - logging.info("--- [%s] Extracting iterative edges from k = %d to %d ---" % ( + logging.info("--- [%s] Extracting iterative edges from k = %d to %d " % ( datetime.now().strftime("%c"), cur_k, next_k)) logging.debug("cmd: %s" % " ".join(iterate_cmd)) @@ -981,7 +996,7 @@ def assemble(cur_k): try: logging.info( - "--- [%s] Assembling contigs from SdBG for k = %d ---" % (datetime.now().strftime("%c"), cur_k)) + "--- [%s] Assembling contigs from SdBG for k = %d " % (datetime.now().strftime("%c"), cur_k)) logging.debug("cmd: %s" % " ".join(assembly_cmd)) p = subprocess.Popen(assembly_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -1018,7 +1033,7 @@ def local_assemble(cur_k, kmer_to): "-o", contig_prefix(cur_k) + ".local.fa", "--kmax", str(kmer_to)] try: - logging.info("--- [%s] Local assembling k = %d ---" % (datetime.now().strftime("%c"), cur_k)) + logging.info("--- [%s] Local assembling k = %d " % (datetime.now().strftime("%c"), cur_k)) logging.debug("cmd: %s" % " ".join(la_cmd)) p = subprocess.Popen(la_cmd, stderr=subprocess.PIPE) @@ -1047,7 +1062,7 @@ def local_assemble(cur_k, kmer_to): def merge_final(): global cp if (not opt.continue_mode) or (cp > opt.last_cp): - logging.info("--- [%s] Merging to output final contigs ---" % (datetime.now().strftime("%c"))) + logging.info("--- [%s] Merging to output final contigs " % (datetime.now().strftime("%c"))) final_contig_name = os.path.join(opt.out_dir, "final.contigs.fa") if opt.out_prefix != "": final_contig_name = os.path.join(opt.out_dir, opt.out_prefix + ".contigs.fa") @@ -1101,11 +1116,13 @@ def main(argv=None): logging.getLogger('').addHandler(console) logging.info(megahit_version_str) - logging.info("--- [%s] Start assembly. Number of CPU threads %d ---" % ( + logging.info("--- [%s] Start assembly. Number of CPU threads %d " % ( datetime.now().strftime("%c"), opt.num_cpu_threads)) logging.info("--- [%s] Available memory: %d, used: %d" % ( datetime.now().strftime("%c"), detect_available_mem(), opt.host_mem)) + cpu_dispatch() + if not opt.continue_mode: write_opt(argv[1:]) # for --continue @@ -1113,9 +1130,9 @@ def main(argv=None): build_lib() if set_max_k_by_lib(): - logging.info("--- [%s] k-max reset to: %d ---" % (datetime.now().strftime("%c"), opt.k_max)) + logging.info("--- [%s] k-max reset to: %d " % (datetime.now().strftime("%c"), opt.k_max)) - logging.info("--- [%s] k list: %s ---" % (datetime.now().strftime("%c"), ','.join(map(str, opt.k_list)))) + logging.info("--- [%s] k list: %s " % (datetime.now().strftime("%c"), ','.join(map(str, opt.k_list)))) build_first_graph() @@ -1153,7 +1170,7 @@ def main(argv=None): if not opt.keep_tmp_files and opt.test: shutil.rmtree(opt.out_dir) - logging.info("--- [%s] ALL DONE. Time elapsed: %f seconds ---" % ( + logging.info("--- [%s] ALL DONE. Time elapsed: %f seconds " % ( datetime.now().strftime("%c"), time.time() - start_time)) except Usage as err: diff --git a/src/utils/cpu_dispatch.h b/src/utils/cpu_dispatch.h new file mode 100644 index 0000000..5871abe --- /dev/null +++ b/src/utils/cpu_dispatch.h @@ -0,0 +1,33 @@ +// +// Created by vout on 4/14/19. +// + +#ifndef MEGAHIT_CPU_DISPATCH_H +#define MEGAHIT_CPU_DISPATCH_H + +inline bool HasPopcnt() { + unsigned eax, ebx, ecx, edx; +#ifdef _MSC_VER + int cpuid[4]; +__cpuid(cpuid, 1); +eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3]; +#else + asm volatile("cpuid\n\t" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (1)); +#endif + return ecx >> 23U & 1U; +} + +inline bool HasBmi2() { + unsigned eax, ebx, ecx, edx; +#ifdef _MSC_VER + int cpuid[4]; +__cpuidex(cpuid, 7, 0); +eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3]; +#else + asm volatile("cpuid\n\t" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "0" (7), "2" (0)); +#endif + return ebx >> 8U & 1U; +} + + +#endif //MEGAHIT_CPU_DISPATCH_H From e2f8655f6d913e99f7069d7215d3b893c07654d4 Mon Sep 17 00:00:00 2001 From: Dinghua Li Date: Sun, 14 Apr 2019 12:51:48 -0700 Subject: [PATCH 2/2] prepare to release 1.2.2 --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3e64705..eccd077 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,9 @@ Past versions can be found at the [release](https://github.com/voutcn/megahit/re ### Running with Linux binaries or docker images (recommended) ``` sh -https://github.com/voutcn/megahit/releases/download/v1.2.1-beta/MEGAHIT-1.2.1-beta-Linux-static.tar.gz -tar zvxf MEGAHIT-1.2.1-beta-Linux-static -cd MEGAHIT-1.2.1-beta-Linux-static/bin/ +https://github.com/voutcn/megahit/releases/download/v1.2.2-beta/MEGAHIT-1.2.2-beta-Linux-static.tar.gz +tar zvxf MEGAHIT-1.2.2-beta-Linux-static +cd MEGAHIT-1.2.2-beta-Linux-static/bin/ ./megahit --test # run on a toy dataset ./megahit -1 YOUR_PE_READ_1.gz -2 YOUR_PE_READ_2.fq.gz -o YOUR_OUTPUT_DIR ```