diff --git a/.gitignore b/.gitignore index 6446e0a..2db5f96 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,8 @@ pprinseqc*.tar.gz src/*.fastq !src/test_F.fastq !src/test_R.fastq - +config.guess +config.sub # Prerequisites *.d diff --git a/Makefile.in b/Makefile.in index 8d2259c..9676e9d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -13,6 +13,7 @@ bindir = @bindir@ srcdir = @srcdir@ VPATH = @srcdir@ +ACLOCAL_AMFLAGS = -I m4 --install all test install uninstall pprinseqc: cd src && $(MAKE) $@ @@ -20,7 +21,7 @@ all test install uninstall pprinseqc: clean : FORCE cd src && $(MAKE) $@ -rm -rf autom4te.cache - -rm -f config.h config.log aclocal.m4 config.log config.status configure install-sh Makefile + -rm -f config.h config.log aclocal.m4 config.log config.status configure install-sh Makefile config.guess config.sub dist : $(distdir).tar.gz @@ -35,12 +36,16 @@ $(distdir): FORCE cp $(srcdir)/config.h.in $(distdir) cp $(srcdir)/install-sh $(distdir) cp $(srcdir)/Makefile.in $(distdir) + cp $(srcdir)/config.sub $(distdir) + cp $(srcdir)/config.guess $(distdir) cp $(srcdir)/src/Makefile.in $(distdir)/src cp $(srcdir)/src/main.cpp $(distdir)/src cp $(srcdir)/src/reads.cpp $(distdir)/src cp $(srcdir)/src/reads.h $(distdir)/src cp $(srcdir)/src/test_F.fastq $(distdir)/src cp $(srcdir)/src/test_R.fastq $(distdir)/src + cp $(srcdir)/src/test_F.fastq.gz $(distdir)/src + cp $(srcdir)/src/test_R.fastq.gz $(distdir)/src cp $(srcdir)/src/bloom_filter.hpp $(distdir)/src cp $(srcdir)/src/README $(distdir)/src diff --git a/README.md b/README.md index 7556401..0f9d266 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,5 @@ parallel c prinseq 1. automake 2. g++ 3. make +4. boost-devel diff --git a/configure.ac b/configure.ac index 26ef726..81cfe3d 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.69]) -AC_INIT([pprinseqc], [0.1], [garbanyo@gmail.com]) +AC_INIT([pprinseqc], [0.9], [garbanyo@gmail.com]) ##AM_INIT_AUTOMAKE AC_CONFIG_SRCDIR([src/main.cpp]) AC_LANG([C++]) @@ -15,6 +15,9 @@ AC_CONFIG_HEADERS([config.h]) # Checks for programs. AC_PROG_CXX AC_PROG_INSTALL +AX_BOOST_BASE([1.41], [], AC_MSG_ERROR([Could not find a useful version of boost])) +AX_BOOST_FILESYSTEM +AX_BOOST_IOSTREAMS # Checks for libraries. diff --git a/m4/ax_boost_base.m4 b/m4/ax_boost_base.m4 new file mode 100644 index 0000000..2bce519 --- /dev/null +++ b/m4/ax_boost_base.m4 @@ -0,0 +1,301 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_boost_base.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# +# DESCRIPTION +# +# Test for the Boost C++ libraries of a particular version (or newer) +# +# If no path to the installed boost library is given the macro searchs +# under /usr, /usr/local, /opt and /opt/local and evaluates the +# $BOOST_ROOT environment variable. Further documentation is available at +# . +# +# This macro calls: +# +# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) +# +# And sets: +# +# HAVE_BOOST +# +# LICENSE +# +# Copyright (c) 2008 Thomas Porschberg +# Copyright (c) 2009 Peter Adolphs +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 43 + +# example boost program (need to pass version) +m4_define([_AX_BOOST_BASE_PROGRAM], + [AC_LANG_PROGRAM([[ +#include +]],[[ +(void) ((void)sizeof(char[1 - 2*!!((BOOST_VERSION) < ($1))])); +]])]) + +AC_DEFUN([AX_BOOST_BASE], +[ +AC_ARG_WITH([boost], + [AS_HELP_STRING([--with-boost@<:@=ARG@:>@], + [use Boost library from a standard location (ARG=yes), + from the specified location (ARG=), + or disable it (ARG=no) + @<:@ARG=yes@:>@ ])], + [ + AS_CASE([$withval], + [no],[want_boost="no";_AX_BOOST_BASE_boost_path=""], + [yes],[want_boost="yes";_AX_BOOST_BASE_boost_path=""], + [want_boost="yes";_AX_BOOST_BASE_boost_path="$withval"]) + ], + [want_boost="yes"]) + + +AC_ARG_WITH([boost-libdir], + [AS_HELP_STRING([--with-boost-libdir=LIB_DIR], + [Force given directory for boost libraries. + Note that this will override library path detection, + so use this parameter only if default library detection fails + and you know exactly where your boost libraries are located.])], + [ + AS_IF([test -d "$withval"], + [_AX_BOOST_BASE_boost_lib_path="$withval"], + [AC_MSG_ERROR([--with-boost-libdir expected directory name])]) + ], + [_AX_BOOST_BASE_boost_lib_path=""]) + +BOOST_LDFLAGS="" +BOOST_CPPFLAGS="" +AS_IF([test "x$want_boost" = "xyes"], + [_AX_BOOST_BASE_RUNDETECT([$1],[$2],[$3])]) +AC_SUBST(BOOST_CPPFLAGS) +AC_SUBST(BOOST_LDFLAGS) +]) + + +# convert a version string in $2 to numeric and affect to polymorphic var $1 +AC_DEFUN([_AX_BOOST_BASE_TONUMERICVERSION],[ + AS_IF([test "x$2" = "x"],[_AX_BOOST_BASE_TONUMERICVERSION_req="1.20.0"],[_AX_BOOST_BASE_TONUMERICVERSION_req="$2"]) + _AX_BOOST_BASE_TONUMERICVERSION_req_shorten=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\.[[0-9]]*\)'` + _AX_BOOST_BASE_TONUMERICVERSION_req_major=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '\([[0-9]]*\)'` + AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_major" = "x"], + [AC_MSG_ERROR([You should at least specify libboost major version])]) + _AX_BOOST_BASE_TONUMERICVERSION_req_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.\([[0-9]]*\)'` + AS_IF([test "x$_AX_BOOST_BASE_TONUMERICVERSION_req_minor" = "x"], + [_AX_BOOST_BASE_TONUMERICVERSION_req_minor="0"]) + _AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` + AS_IF([test "X$_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor" = "X"], + [_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor="0"]) + _AX_BOOST_BASE_TONUMERICVERSION_RET=`expr $_AX_BOOST_BASE_TONUMERICVERSION_req_major \* 100000 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_minor \* 100 \+ $_AX_BOOST_BASE_TONUMERICVERSION_req_sub_minor` + AS_VAR_SET($1,$_AX_BOOST_BASE_TONUMERICVERSION_RET) +]) + +dnl Run the detection of boost should be run only if $want_boost +AC_DEFUN([_AX_BOOST_BASE_RUNDETECT],[ + _AX_BOOST_BASE_TONUMERICVERSION(WANT_BOOST_VERSION,[$1]) + succeeded=no + + + AC_REQUIRE([AC_CANONICAL_HOST]) + dnl On 64-bit systems check for system libraries in both lib64 and lib. + dnl The former is specified by FHS, but e.g. Debian does not adhere to + dnl this (as it rises problems for generic multi-arch support). + dnl The last entry in the list is chosen by default when no libraries + dnl are found, e.g. when only header-only libraries are installed! + AS_CASE([${host_cpu}], + [x86_64],[libsubdirs="lib64 libx32 lib lib64"], + [ppc64|s390x|sparc64|aarch64|ppc64le],[libsubdirs="lib64 lib lib64"], + [libsubdirs="lib"] + ) + + dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give + dnl them priority over the other paths since, if libs are found there, they + dnl are almost assuredly the ones desired. + AS_CASE([${host_cpu}], + [i?86],[multiarch_libsubdir="lib/i386-${host_os}"], + [multiarch_libsubdir="lib/${host_cpu}-${host_os}"] + ) + + dnl first we check the system location for boost libraries + dnl this location ist chosen if boost libraries are installed with the --layout=system option + dnl or if you install boost with RPM + AS_IF([test "x$_AX_BOOST_BASE_boost_path" != "x"],[ + AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) includes in "$_AX_BOOST_BASE_boost_path/include"]) + AS_IF([test -d "$_AX_BOOST_BASE_boost_path/include" && test -r "$_AX_BOOST_BASE_boost_path/include"],[ + AC_MSG_RESULT([yes]) + BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include" + for _AX_BOOST_BASE_boost_path_tmp in $multiarch_libsubdir $libsubdirs; do + AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION) lib path in "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"]) + AS_IF([test -d "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" && test -r "$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp" ],[ + AC_MSG_RESULT([yes]) + BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$_AX_BOOST_BASE_boost_path_tmp"; + break; + ], + [AC_MSG_RESULT([no])]) + done],[ + AC_MSG_RESULT([no])]) + ],[ + if test X"$cross_compiling" = Xyes; then + search_libsubdirs=$multiarch_libsubdir + else + search_libsubdirs="$multiarch_libsubdir $libsubdirs" + fi + for _AX_BOOST_BASE_boost_path_tmp in /usr /usr/local /opt /opt/local ; do + if test -d "$_AX_BOOST_BASE_boost_path_tmp/include/boost" && test -r "$_AX_BOOST_BASE_boost_path_tmp/include/boost" ; then + for libsubdir in $search_libsubdirs ; do + if ls "$_AX_BOOST_BASE_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi + done + BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path_tmp/$libsubdir" + BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path_tmp/include" + break; + fi + done + ]) + + dnl overwrite ld flags if we have required special directory with + dnl --with-boost-libdir parameter + AS_IF([test "x$_AX_BOOST_BASE_boost_lib_path" != "x"], + [BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_lib_path"]) + + AC_MSG_CHECKING([for boostlib >= $1 ($WANT_BOOST_VERSION)]) + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_REQUIRE([AC_PROG_CXX]) + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[ + AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ],[ + ]) + AC_LANG_POP([C++]) + + + + dnl if we found no boost with system layout we search for boost libraries + dnl built and installed without the --layout=system option or for a staged(not installed) version + if test "x$succeeded" != "xyes" ; then + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + BOOST_CPPFLAGS= + if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then + BOOST_LDFLAGS= + fi + _version=0 + if test -n "$_AX_BOOST_BASE_boost_path" ; then + if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path"; then + for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "x$V_CHECK" = "x1" ; then + _version=$_version_tmp + fi + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path/include/boost-$VERSION_UNDERSCORE" + done + dnl if nothing found search for layout used in Windows distributions + if test -z "$BOOST_CPPFLAGS"; then + if test -d "$_AX_BOOST_BASE_boost_path/boost" && test -r "$_AX_BOOST_BASE_boost_path/boost"; then + BOOST_CPPFLAGS="-I$_AX_BOOST_BASE_boost_path" + fi + fi + dnl if we found something and BOOST_LDFLAGS was unset before + dnl (because "$_AX_BOOST_BASE_boost_lib_path" = ""), set it here. + if test -n "$BOOST_CPPFLAGS" && test -z "$BOOST_LDFLAGS"; then + for libsubdir in $libsubdirs ; do + if ls "$_AX_BOOST_BASE_boost_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi + done + BOOST_LDFLAGS="-L$_AX_BOOST_BASE_boost_path/$libsubdir" + fi + fi + else + if test "x$cross_compiling" != "xyes" ; then + for _AX_BOOST_BASE_boost_path in /usr /usr/local /opt /opt/local ; do + if test -d "$_AX_BOOST_BASE_boost_path" && test -r "$_AX_BOOST_BASE_boost_path" ; then + for i in `ls -d $_AX_BOOST_BASE_boost_path/include/boost-* 2>/dev/null`; do + _version_tmp=`echo $i | sed "s#$_AX_BOOST_BASE_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` + V_CHECK=`expr $_version_tmp \> $_version` + if test "x$V_CHECK" = "x1" ; then + _version=$_version_tmp + best_path=$_AX_BOOST_BASE_boost_path + fi + done + fi + done + + VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` + BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" + if test -z "$_AX_BOOST_BASE_boost_lib_path" ; then + for libsubdir in $libsubdirs ; do + if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi + done + BOOST_LDFLAGS="-L$best_path/$libsubdir" + fi + fi + + if test -n "$BOOST_ROOT" ; then + for libsubdir in $libsubdirs ; do + if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi + done + if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then + version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` + stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` + stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` + V_CHECK=`expr $stage_version_shorten \>\= $_version` + if test "x$V_CHECK" = "x1" && test -z "$_AX_BOOST_BASE_boost_lib_path" ; then + AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) + BOOST_CPPFLAGS="-I$BOOST_ROOT" + BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" + fi + fi + fi + fi + + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_LANG_PUSH(C++) + AC_COMPILE_IFELSE([_AX_BOOST_BASE_PROGRAM($WANT_BOOST_VERSION)],[ + AC_MSG_RESULT(yes) + succeeded=yes + found_system=yes + ],[ + ]) + AC_LANG_POP([C++]) + fi + + if test "x$succeeded" != "xyes" ; then + if test "x$_version" = "x0" ; then + AC_MSG_NOTICE([[We could not detect the boost libraries (version $1 or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See http://randspringer.de/boost for more documentation.]]) + else + AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) + fi + # execute ACTION-IF-NOT-FOUND (if present): + ifelse([$3], , :, [$3]) + else + AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) + # execute ACTION-IF-FOUND (if present): + ifelse([$2], , :, [$2]) + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + +]) diff --git a/m4/ax_boost_filesystem.m4 b/m4/ax_boost_filesystem.m4 new file mode 100644 index 0000000..c392f9d --- /dev/null +++ b/m4/ax_boost_filesystem.m4 @@ -0,0 +1,118 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BOOST_FILESYSTEM +# +# DESCRIPTION +# +# Test for Filesystem library from the Boost C++ libraries. The macro +# requires a preceding call to AX_BOOST_BASE. Further documentation is +# available at . +# +# This macro calls: +# +# AC_SUBST(BOOST_FILESYSTEM_LIB) +# +# And sets: +# +# HAVE_BOOST_FILESYSTEM +# +# LICENSE +# +# Copyright (c) 2009 Thomas Porschberg +# Copyright (c) 2009 Michael Tindal +# Copyright (c) 2009 Roman Rybalko +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 27 + +AC_DEFUN([AX_BOOST_FILESYSTEM], +[ + AC_ARG_WITH([boost-filesystem], + AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@], + [use the Filesystem library from boost - it is possible to specify a certain library for the linker + e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]), + [ + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ax_boost_user_filesystem_lib="" + else + want_boost="yes" + ax_boost_user_filesystem_lib="$withval" + fi + ], + [want_boost="yes"] + ) + + if test "x$want_boost" = "xyes"; then + AC_REQUIRE([AC_PROG_CC]) + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + LIBS_SAVED=$LIBS + LIBS="$LIBS $BOOST_SYSTEM_LIB" + export LIBS + + AC_CACHE_CHECK(whether the Boost::Filesystem library is available, + ax_cv_boost_filesystem, + [AC_LANG_PUSH([C++]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], + [[using namespace boost::filesystem; + path my_path( "foo/bar/data.txt" ); + return 0;]])], + ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no) + AC_LANG_POP([C++]) + ]) + if test "x$ax_cv_boost_filesystem" = "xyes"; then + AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available]) + BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` + if test "x$ax_boost_user_filesystem_lib" = "x"; then + for libextension in `ls -r $BOOSTLIBDIR/libboost_filesystem* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do + ax_lib=${libextension} + AC_CHECK_LIB($ax_lib, exit, + [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], + [link_filesystem="no"]) + done + if test "x$link_filesystem" != "xyes"; then + for libextension in `ls -r $BOOSTLIBDIR/boost_filesystem* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do + ax_lib=${libextension} + AC_CHECK_LIB($ax_lib, exit, + [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], + [link_filesystem="no"]) + done + fi + else + for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do + AC_CHECK_LIB($ax_lib, exit, + [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], + [link_filesystem="no"]) + done + + fi + if test "x$ax_lib" = "x"; then + AC_MSG_ERROR(Could not find a version of the library!) + fi + if test "x$link_filesystem" != "xyes"; then + AC_MSG_ERROR(Could not link against $ax_lib !) + fi + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + LIBS="$LIBS_SAVED" + fi +]) diff --git a/m4/ax_boost_iostreams.m4 b/m4/ax_boost_iostreams.m4 new file mode 100644 index 0000000..8f27f85 --- /dev/null +++ b/m4/ax_boost_iostreams.m4 @@ -0,0 +1,116 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_boost_iostreams.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_BOOST_IOSTREAMS +# +# DESCRIPTION +# +# Test for IOStreams library from the Boost C++ libraries. The macro +# requires a preceding call to AX_BOOST_BASE. Further documentation is +# available at . +# +# This macro calls: +# +# AC_SUBST(BOOST_IOSTREAMS_LIB) +# +# And sets: +# +# HAVE_BOOST_IOSTREAMS +# +# LICENSE +# +# Copyright (c) 2008 Thomas Porschberg +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 21 + +AC_DEFUN([AX_BOOST_IOSTREAMS], +[ + AC_ARG_WITH([boost-iostreams], + AS_HELP_STRING([--with-boost-iostreams@<:@=special-lib@:>@], + [use the IOStreams library from boost - it is possible to specify a certain library for the linker + e.g. --with-boost-iostreams=boost_iostreams-gcc-mt-d-1_33_1 ]), + [ + if test "$withval" = "no"; then + want_boost="no" + elif test "$withval" = "yes"; then + want_boost="yes" + ax_boost_user_iostreams_lib="" + else + want_boost="yes" + ax_boost_user_iostreams_lib="$withval" + fi + ], + [want_boost="yes"] + ) + + if test "x$want_boost" = "xyes"; then + AC_REQUIRE([AC_PROG_CC]) + CPPFLAGS_SAVED="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" + export CPPFLAGS + + LDFLAGS_SAVED="$LDFLAGS" + LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" + export LDFLAGS + + AC_CACHE_CHECK(whether the Boost::IOStreams library is available, + ax_cv_boost_iostreams, + [AC_LANG_PUSH([C++]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include + @%:@include + ]], + [[std::string input = "Hello World!"; + namespace io = boost::iostreams; + io::filtering_istream in(boost::make_iterator_range(input)); + return 0; + ]])], + ax_cv_boost_iostreams=yes, ax_cv_boost_iostreams=no) + AC_LANG_POP([C++]) + ]) + if test "x$ax_cv_boost_iostreams" = "xyes"; then + AC_DEFINE(HAVE_BOOST_IOSTREAMS,,[define if the Boost::IOStreams library is available]) + BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` + if test "x$ax_boost_user_iostreams_lib" = "x"; then + for libextension in `ls $BOOSTLIBDIR/libboost_iostreams*.so* $BOOSTLIBDIR/libboost_iostream*.dylib* $BOOSTLIBDIR/libboost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_iostreams.*\)\.so.*$;\1;' -e 's;^lib\(boost_iostream.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_iostreams.*\)\.a.*$;\1;'` ; do + ax_lib=${libextension} + AC_CHECK_LIB($ax_lib, exit, + [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], + [link_iostreams="no"]) + done + if test "x$link_iostreams" != "xyes"; then + for libextension in `ls $BOOSTLIBDIR/boost_iostreams*.dll* $BOOSTLIBDIR/boost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_iostreams.*\)\.dll.*$;\1;' -e 's;^\(boost_iostreams.*\)\.a.*$;\1;'` ; do + ax_lib=${libextension} + AC_CHECK_LIB($ax_lib, exit, + [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], + [link_iostreams="no"]) + done + fi + + else + for ax_lib in $ax_boost_user_iostreams_lib boost_iostreams-$ax_boost_user_iostreams_lib; do + AC_CHECK_LIB($ax_lib, main, + [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], + [link_iostreams="no"]) + done + + fi + if test "x$ax_lib" = "x"; then + AC_MSG_ERROR(Could not find a version of the library!) + fi + if test "x$link_iostreams" != "xyes"; then + AC_MSG_ERROR(Could not link against $ax_lib !) + fi + fi + + CPPFLAGS="$CPPFLAGS_SAVED" + LDFLAGS="$LDFLAGS_SAVED" + fi +]) diff --git a/src/Makefile.in b/src/Makefile.in index dd4910f..3d1d9cd 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -12,7 +12,7 @@ bindir = @bindir@ srcdir = @srcdir@ VPATH = @srcdir@ -CXXFLAGS = -std=c++11 -Wall -O3 +CXXFLAGS = -std=c++11 -Wall -O3 -pthread -lboost_filesystem -lboost_system -lboost_iostreams -lz all: pprinseqc @@ -24,8 +24,8 @@ comp: pprinseqc compp: pprinseqc - time ./prinseq-lite.pl -fastq test_F.fastq -fastq2 test_R.fastq -min_len 5 -out_good Test_good -out_bad Test_bad -no_qual_header -ns_max_n 2 - time ./pprinseqc -fastq test_F.fastq -fastq2 test_R.fastq -min_len 5 -ns_max_n 2 + time -v ./prinseq-lite.pl -fastq stenotrophomonas_R1.fastq -fastq2 stenotrophomonas_R2.fastq -min_len 5 -out_good Test_good -out_bad Test_bad -no_qual_header -ns_max_n 2 + time -v ./pprinseqc -fastq stenotrophomonas_R1.fastq -fastq2 stenotrophomonas_R2.fastq -min_len 5 -ns_max_n 2 -out_name Test_time_ pprinseqc: main.o reads.o $(CXX) $(CXXFLAGS) -o pprinseqc main.o reads.o @@ -37,7 +37,17 @@ test: all ./pprinseqc -fastq test_F.fastq -fastq2 test_R.fastq -out_format 1 -out_name Test_1 && \ ./pprinseqc -fastq test_F.fastq -fastq2 test_R.fastq -trim_qual_right=25 -out_name Test_2 && \ ./pprinseqc -fastq test_F.fastq -fastq2 test_R.fastq -min_qual_mean 25 -ns_max_n 2 -out_name Test_3 && \ + ./pprinseqc -fastq test_F.fastq.gz -fastq2 test_R.fastq.gz -out_format 1 -out_name Test_1Z && \ + ./pprinseqc -fastq test_F.fastq.gz -fastq2 test_R.fastq.gz -trim_qual_right=25 -out_name Test_2Z && \ + ./pprinseqc -fastq test_F.fastq.gz -fastq2 test_R.fastq.gz -min_qual_mean 25 -ns_max_n 2 -out_name Test_3Z && \ echo "*** ALL TEST PASSED***" + +test_single: all + time -v ./pprinseqc -fastq stenotrophomonas_R1.fastq -min_len 5000 -derep -out_name Test_1_Single_t1 -out_gz -threads 1 + time -v ./pprinseqc -fastq stenotrophomonas_R1.fastq -min_len 5000 -derep -out_name Test_1_Single_t2 -out_gz -threads 2 + time -v ./pprinseqc -fastq stenotrophomonas_R1.fastq -min_len 5000 -derep -out_name Test_1_Single_t5 -out_gz -threads 5 + time -v ./pprinseqc -fastq stenotrophomonas_R1.fastq -min_len 5000 -derep -out_name Test_1_Single_t10 -out_gz -threads 10 + install: install -d $(DESTDIR)$(bindir)/bin @@ -52,6 +62,8 @@ clean: -rm -f pprinseqc -rm -f Test*fastq -rm -f Test*fasta + -rm -f Test*fastq.gz + -rm -f Test*fasta.gz -rm -f *.o cc: diff --git a/src/README b/src/README index 0418fc1..89c9b08 100644 --- a/src/README +++ b/src/README @@ -6,6 +6,8 @@ CXXFLAGS = -std=c++11 -O2 -Wall CXXFLAGS = -std=c++11 -Wall CXXFLAGS = -std=c++11 -Wall -fno-omit-frame-pointer -ggdb +CXXFLAGS = -std=c++11 -Wall -pthread + sudo sh -c " echo 0 > /proc/sys/kernel/kptr_restrict" sudo perf record -g ./pprinseqc -fastq stenotrophomonas_R1.fastq -fastq2 stenotrophomonas_R2.fastq -min_len 5000 -ns_max_n 2 diff --git a/src/main.cpp b/src/main.cpp index 476e68c..8497965 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -20,12 +20,26 @@ #include "reads.h" using namespace std; -int main (int argc, char **argv) -{ +#ifndef PTHREAD +#define PTHREAD +#include +#endif + +#include +#include +#include +#include + + +pthread_mutex_t write_mutex=PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t read_mutex=PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t read_mutex2=PTHREAD_MUTEX_INITIALIZER; + char *forward_read_file = NULL; char *reverse_read_file = NULL; + boost::filesystem::path p1,p2; string out_ext="fastq"; - int index; + int ii; int out_format=0; // 0 fastq, 1 fasta int min_qual_score=0; int ns_max_n=-1; @@ -36,7 +50,7 @@ int main (int argc, char **argv) int max_len=0; float max_gc=100; float min_gc=0; - opterr = 0; + int opterr = 0; int derep; float entropy_threshold=0.5 ; int lc_entropy=0; @@ -55,6 +69,10 @@ int main (int argc, char **argv) int rm_header=0; int trim_tail_left=0; int trim_tail_right=0; + int threads=5; + int out_gz=0; + + std::string line; struct option longopts[] = { { "fastq" , required_argument , NULL , 1 }, @@ -81,19 +99,37 @@ int main (int argc, char **argv) { "rm_header" , no_argument , &rm_header, 1 }, { "trim_tail_left" , required_argument , NULL , 20 }, { "trim_tail_right" , required_argument , NULL , 21 }, + { "out_gz" , no_argument , &out_gz , 1 }, + { "threads" , required_argument , NULL , 22 }, {0,0,0,0} }; + +struct arg_struct { + single_read * read; + bloom_filter * filter; +}; +struct arg_struct_pair { + pair_read * read; + bloom_filter * filter; +}; +void* do_single (void * arguments); +void* do_pair (void* arguments); + +int main (int argc, char **argv) +{ // Readin inout from the command line while ((c = getopt_long_only(argc, argv, "",longopts, NULL)) != -1) switch (c) { case 1: forward_read_file = optarg; + p1 = optarg; break; case 2: reverse_read_file = optarg; + p2=optarg; break; case 3: out_format = atoi(optarg); @@ -164,6 +200,9 @@ int main (int argc, char **argv) case 21: trim_tail_right=atoi(optarg); break; + case 22: + threads=atoi(optarg); + break; case 0: // getopt set a variable break; @@ -178,63 +217,143 @@ int main (int argc, char **argv) default: abort (); } - - printf ("forward_read_file = %s ,reverse_read_file =%s\n ", forward_read_file, reverse_read_file); + +/* printf ("forward_read_file = %s ,reverse_read_file =%s\n ", forward_read_file, reverse_read_file); cout << "random string " << out_name << " out format " << out_format << endl ; cout << "ns_max_n " << ns_max_n << endl ; - for (index = optind; index < argc; index++) - printf ("Non-option argument %s\n", argv[index]); - + cout << " extension()----------: " << p1.extension() << '\n'; + for (ii = optind; ii < argc; ii++) + printf ("Non-option argument %s\n", argv[ii]); +*/ /////////// open input files - ifstream inFile_f; - ifstream inFile_r; - inFile_f.open(forward_read_file); - if (!inFile_f) { + + istream * inFile_r; + istream * inFile_f; + ifstream *file; + ifstream *file2; + boost::iostreams::filtering_streambuf inbuf; + boost::iostreams::filtering_streambuf inbuf2; + + + file = new ifstream(p1.native() , std::ios_base::in | std::ios_base::binary); + file2 = new ifstream(p2.native() , std::ios_base::in | std::ios_base::binary); + + if (!(*file)) { cerr << "Error: can not open " << forward_read_file << endl ; return 1; } if (reverse_read_file) { - inFile_r.open(reverse_read_file); - if (!inFile_r) { + if (!(*file2)) { cerr << "Error: can not open " << reverse_read_file << endl ; return 1; } + } + + + + if (p1.extension()==".gz") { + inbuf.push(boost::iostreams::gzip_decompressor()); } - -////////// open and name oupput files - ofstream bad_out_file_R1; - ofstream single_out_file_R1; - ofstream good_out_file_R1; - ofstream bad_out_file_R2; - ofstream single_out_file_R2; - ofstream good_out_file_R2; + + + if (p2.extension()==".gz") { + inbuf2.push(boost::iostreams::gzip_decompressor()); + } + - if (out_format == 1 ) { out_ext = "fasta";} + inbuf.push(*file); + inFile_f = new std::istream(&inbuf); + inbuf2.push(*file2); + inFile_r = new std::istream(&inbuf2); + + + +// while(std::getline(*inFile_f, line)) { +// std::cout << line << std::endl; +// } +// std::cout << "bla" << line << "bla" << std::endl; +////////// open and name oupput files + ofstream *tmp_bad_out_file_R1; + ofstream *tmp_single_out_file_R1; + ofstream *tmp_good_out_file_R1; + ofstream *tmp_bad_out_file_R2; + ofstream *tmp_single_out_file_R2; + ofstream *tmp_good_out_file_R2; + ostream *bad_out_file_R1= NULL; + ostream *single_out_file_R1= NULL; + ostream *good_out_file_R1= NULL; + ostream *bad_out_file_R2= NULL; + ostream *single_out_file_R2= NULL; + ostream *good_out_file_R2= NULL; + + boost::iostreams::filtering_streambuf out_bad_R1_buf; + boost::iostreams::filtering_streambuf out_bad_R2_buf; + boost::iostreams::filtering_streambuf out_single_R1_buf; + boost::iostreams::filtering_streambuf out_single_R2_buf; + boost::iostreams::filtering_streambuf out_good_R1_buf; + boost::iostreams::filtering_streambuf out_good_R2_buf; + + if (out_format == 1 ) { out_ext = "fasta";} + if (out_gz == 1 ) { out_ext = out_ext + ".gz"; } if (reverse_read_file) { - bad_out_file_R1.open(out_name + "_bad_out_R1." + out_ext ); - single_out_file_R1.open(out_name + "_single_out_R1." + out_ext ); - good_out_file_R1.open(out_name + "_good_out_R1." + out_ext); - bad_out_file_R2.open(out_name + "_bad_out_R2." + out_ext ); - single_out_file_R2.open(out_name + "_single_out_R2." + out_ext); - good_out_file_R2.open(out_name + "_good_out_R2." + out_ext); + tmp_bad_out_file_R1= new std::ofstream(out_name + "_bad_out_R1." + out_ext ); + tmp_single_out_file_R1= new std::ofstream(out_name + "_single_out_R1." + out_ext ); + tmp_good_out_file_R1= new std::ofstream(out_name + "_good_out_R1." + out_ext); + tmp_bad_out_file_R2= new std::ofstream(out_name + "_bad_out_R2." + out_ext ); + tmp_single_out_file_R2= new std::ofstream(out_name + "_single_out_R2." + out_ext); + tmp_good_out_file_R2= new std::ofstream(out_name + "_good_out_R2." + out_ext); + if (out_gz) { + out_bad_R1_buf.push(boost::iostreams::gzip_compressor()); + out_bad_R2_buf.push(boost::iostreams::gzip_compressor()); + out_single_R1_buf.push(boost::iostreams::gzip_compressor()); + out_single_R2_buf.push(boost::iostreams::gzip_compressor()); + out_good_R1_buf.push(boost::iostreams::gzip_compressor()); + out_good_R2_buf.push(boost::iostreams::gzip_compressor()); + } + out_bad_R1_buf.push(*tmp_bad_out_file_R1); + out_bad_R2_buf.push(*tmp_bad_out_file_R2); + out_single_R1_buf.push(*tmp_single_out_file_R1); + out_single_R2_buf.push(*tmp_single_out_file_R2); + out_good_R1_buf.push(*tmp_good_out_file_R1); + out_good_R2_buf.push(*tmp_good_out_file_R2); + + bad_out_file_R1= new std::ostream(&out_bad_R1_buf); + bad_out_file_R2= new std::ostream(&out_bad_R2_buf); + single_out_file_R1= new std::ostream(&out_single_R1_buf); + single_out_file_R2= new std::ostream(&out_single_R2_buf); + good_out_file_R1= new std::ostream(&out_good_R1_buf); + good_out_file_R2= new std::ostream(&out_good_R2_buf); + + } else { - bad_out_file_R1.open(out_name + "_bad_out." + out_ext ); - good_out_file_R1.open(out_name + "_good_out." + out_ext); + tmp_good_out_file_R1= new std::ofstream(out_name + "_good_out." + out_ext); + tmp_bad_out_file_R1= new std::ofstream(out_name + "_bad_out." + out_ext); + if (out_gz) { + out_good_R1_buf.push(boost::iostreams::gzip_compressor()); + out_bad_R1_buf.push(boost::iostreams::gzip_compressor()); + } + out_good_R1_buf.push(*tmp_good_out_file_R1); + out_bad_R1_buf.push(*tmp_bad_out_file_R1); + + good_out_file_R1= new std::ostream(&out_good_R1_buf); + bad_out_file_R1= new std::ostream(&out_bad_R1_buf); } + - single_read read_f(inFile_f); - single_read read_r(inFile_r); - pair_read read_rf(inFile_f,inFile_r); + single_read read_f(*inFile_f); + single_read read_r(*inFile_r); + pair_read read_rf(*inFile_f,*inFile_r); + if (reverse_read_file) { - read_rf.set_outputs(bad_out_file_R1,single_out_file_R1,good_out_file_R1, - bad_out_file_R2,single_out_file_R2,good_out_file_R2); + read_rf.set_outputs(*bad_out_file_R1,*single_out_file_R1,*good_out_file_R1, + *bad_out_file_R2,*single_out_file_R2,*good_out_file_R2); read_rf.set_out_format(out_format); } else { - read_f.set_outputs(bad_out_file_R1,single_out_file_R1,good_out_file_R1); + read_f.set_outputs(*bad_out_file_R1,*bad_out_file_R1,*good_out_file_R1); } bloom_parameters parameters; bloom_filter *filter=NULL; @@ -249,70 +368,125 @@ int main (int argc, char **argv) parameters.compute_optimal_parameters(); filter = new bloom_filter(parameters); } - + + + // main loop if (reverse_read_file) { ////////////////////////////////////////for pair end - while(read_rf.read_read()) { - //read_rf.read1->trim_qual_right("mean","lt",5,10,30); - if (trim_tail_left) {read_rf.trim_tail_left(trim_tail_left);} - if (trim_tail_right) {read_rf.trim_tail_right(trim_tail_right);} - if (trim_qual_right) {read_rf.trim_qual_right("mean","lt",trim_qual_step,trim_qual_window,trim_qual_right_threshold);} - if (trim_qual_left) {read_rf.trim_qual_left("mean","lt",trim_qual_step,trim_qual_window,trim_qual_left_threshold);} - if (ns_max_n > -1 ) {read_rf.ns_max_n(ns_max_n);} - if (min_qual_mean) {read_rf.min_qual_mean(min_qual_mean);} - if (min_qual_score) { read_rf.min_qual_score(min_qual_score);} - if (noiupac) {read_rf.noiupac();} - if (min_len) {read_rf.min_len(min_len);} - if (max_len) {read_rf.max_len(max_len);} - if (max_gc < 100) {read_rf.max_gc(max_gc);} - if (min_gc > 0) {read_rf.min_gc(min_gc);} - if (derep) { - read_rf.set_read_status(filter->contains(read_rf.read1->seq_seq),filter->contains(read_rf.read2->seq_seq)); - filter->insert(read_rf.read1->seq_seq); - filter->insert(read_rf.read2->seq_seq); - } - - if (lc_entropy) {read_rf.entropy(entropy_threshold);} - if (lc_dust) {read_rf.dust(dust_threshold);} - if (rm_header) {read_rf.rm_header();} - read_rf.print(); - } + vector v2(threads); + vector tthreads(threads); + vector ttt2(threads); + for (ii=0 ; ii -1 ) {read_f.ns_max_n(ns_max_n);} - if (min_qual_mean) {read_f.min_qual_mean(min_qual_mean);} - if (min_qual_score) { read_f.min_qual_score(min_qual_score);} - if (noiupac) {read_f.noiupac();} - if (min_len) {read_f.min_len(min_len);} - if (max_len) {read_f.max_len(max_len);} - if (max_gc < 100) {read_f.max_gc(max_gc);} - if (min_gc > 0) {read_f.min_gc(min_gc);} - if (derep) { - if(filter->contains(read_f.seq_seq)) { read_f.set_read_status(2);} - filter->insert(read_f.seq_seq); - - } + //////////// pthreads magic + vector v(threads,*inFile_f); + vector tthreads(threads); + vector ttt(threads); + // declare structure for the thread - if (lc_entropy) {read_f.entropy(entropy_threshold);} - if (lc_dust) {read_f.dust(dust_threshold);} - if (rm_header) {read_f.rm_header();} - read_f.print(out_format); + + + for (ii=0 ; iiclose(); if (reverse_read_file){ - inFile_r.close(); +// inFile_r->close(); } + return 0; } +void* do_single (void * arguments) { + struct arg_struct *args = (arg_struct*) arguments; + single_read * read=args->read; + bloom_filter* filter=args->filter; + while( read->read_read( &read_mutex)) { + if (trim_tail_left) {read->trim_tail_left(trim_tail_left);} + if (trim_tail_right) {read->trim_tail_right(trim_tail_right);} + if (trim_qual_right) {read->trim_qual_right("mean","lt",trim_qual_step,trim_qual_window,trim_qual_right_threshold);} + if (trim_qual_left) {read->trim_qual_left("mean","lt",trim_qual_step,trim_qual_window,trim_qual_left_threshold);} + if (ns_max_n > -1 ) {read->ns_max_n(ns_max_n);} + if (min_qual_mean) {read->min_qual_mean(min_qual_mean);} + if (min_qual_score) {read->min_qual_score(min_qual_score);} + if (noiupac) {read->noiupac();} + if (min_len) {read->min_len(min_len);} + if (max_len) {read->max_len(max_len);} + if (max_gc < 100) {read->max_gc(max_gc);} + if (min_gc > 0) {read->min_gc(min_gc);} + if (derep) { + if(filter->contains(read->seq_seq)) { read->set_read_status(2);} + filter->insert(read->seq_seq); + + } + + if (lc_entropy) {read->entropy(entropy_threshold);} + if (lc_dust) {read->dust(dust_threshold);} + if (rm_header) {read->rm_header();} + pthread_mutex_lock(& write_mutex); + read->print(out_format); + pthread_mutex_unlock(& write_mutex); + } + pthread_exit(NULL); + +} +void* do_pair (void * arguments) { + struct arg_struct_pair *args = (arg_struct_pair*) arguments; + pair_read * read=args->read; + bloom_filter* filter=args->filter; + while(read->read_read(&read_mutex, &read_mutex2)) { + //read_rf.read1->trim_qual_right("mean","lt",5,10,30); + if (trim_tail_left) {read->trim_tail_left(trim_tail_left);} + if (trim_tail_right) {read->trim_tail_right(trim_tail_right);} + if (trim_qual_right) {read->trim_qual_right("mean","lt",trim_qual_step,trim_qual_window,trim_qual_right_threshold);} + if (trim_qual_left) {read->trim_qual_left("mean","lt",trim_qual_step,trim_qual_window,trim_qual_left_threshold);} + if (ns_max_n > -1 ) {read->ns_max_n(ns_max_n);} + if (min_qual_mean) {read->min_qual_mean(min_qual_mean);} + if (min_qual_score) { read->min_qual_score(min_qual_score);} + if (noiupac) {read->noiupac();} + if (min_len) {read->min_len(min_len);} + if (max_len) {read->max_len(max_len);} + if (max_gc < 100) {read->max_gc(max_gc);} + if (min_gc > 0) {read->min_gc(min_gc);} + if (derep) { + read->set_read_status(filter->contains(read->read1->seq_seq),filter->contains(read->read2->seq_seq)); + filter->insert(read->read1->seq_seq); + filter->insert(read->read2->seq_seq); + } + + if (lc_entropy) {read->entropy(entropy_threshold);} + if (lc_dust) {read->dust(dust_threshold);} + if (rm_header) {read->rm_header();} + pthread_mutex_lock(& write_mutex); + read->print(); + pthread_mutex_unlock(& write_mutex); + } + pthread_exit(NULL); +} \ No newline at end of file diff --git a/src/reads.cpp b/src/reads.cpp index 6db96d9..e22206a 100644 --- a/src/reads.cpp +++ b/src/reads.cpp @@ -13,6 +13,11 @@ #include #endif +#ifndef PTHREAD +#define PTHREAD +#include +#endif + #include "reads.h" #include #include @@ -22,6 +27,14 @@ using namespace std; single_read::single_read(istream &is): file1(is) { fastq_to_fasta.assign("^@"); } + + single_read::single_read(void) : file1(cin){ // starndar input + fastq_to_fasta.assign("^@"); + } + + void single_read::set_inputs(istream &is) { + file1.rdbuf(is.rdbuf()); + } void single_read::set_outputs(ostream& bad_out_file, ostream& single_out_file, ostream& good_out_file) { bad_out=bad_out_file.rdbuf(); @@ -29,14 +42,17 @@ using namespace std; good_out=good_out_file.rdbuf(); } - int single_read::read_read(void) { + int single_read::read_read(pthread_mutex_t * read_mutex) { read_status=0; + pthread_mutex_lock(read_mutex); if (getline(file1,seq_name, '\n')) { getline(file1, seq_seq, '\n'); getline(file1, seq_sep, '\n'); getline(file1, seq_qual, '\n'); + pthread_mutex_unlock(read_mutex); return 1; } else { + pthread_mutex_unlock(read_mutex); return 0; } } @@ -373,14 +389,30 @@ void single_read::trim_tail_right(int num) { ////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////// + pair_read::pair_read(istream &is1, istream &is2): file1(is1),file2(is2) { read1= new single_read(file1); read2= new single_read(file2); } + + pair_read::pair_read(void):file1(cin),file2(cin) { + read1= new single_read(file1); + read2= new single_read(file2); + } + + void pair_read::set_inputs(istream &read_f,istream &read_r) { + // read1->file1.rdbuf(read_f.rdbuf()); + // read2->file1.rdbuf(read_r.rdbuf()); + read1->set_inputs(read_f); + read2->set_inputs(read_r); + } - int pair_read::read_read(void) { - return read1->read_read() * read2->read_read(); + int pair_read::read_read(pthread_mutex_t* read_mutex_1, pthread_mutex_t* read_mutex_2) { + return read1->read_read(read_mutex_1) * read2->read_read(read_mutex_2); } diff --git a/src/reads.h b/src/reads.h index 4d8b158..4cf61b0 100644 --- a/src/reads.h +++ b/src/reads.h @@ -13,14 +13,21 @@ #include #endif +#ifndef PTHREAD +#define PTHREAD +#include +#endif + using namespace std; class single_read { public: single_read(istream &is); + single_read(void); + void set_inputs(istream &is); void set_outputs(ostream& bad_out_file, ostream& single_out_file, ostream& good_out_file); - int read_read(void); + int read_read(pthread_mutex_t * read_mutex); void ns_max_n(int ns_max_n); // int max_n_p(int ns_max_p); void print(int out_form); @@ -61,7 +68,9 @@ class single_read { class pair_read { public: pair_read(istream &is1, istream &is2); - int read_read(void); + pair_read(void); + void set_inputs(istream &read_f,istream &read_r); + int read_read(pthread_mutex_t* read_mutex_1, pthread_mutex_t* read_mutex_2); void print(void); void set_outputs(ostream& bad_out_file1, ostream& single_out_file1, ostream& good_out_file1, ostream& bad_out_file2, ostream& single_out_file2, ostream& good_out_file2); @@ -100,3 +109,5 @@ class pair_read { }; string random_string( size_t length ); + + diff --git a/src/test_F.fastq.gz b/src/test_F.fastq.gz new file mode 100644 index 0000000..eae7df0 Binary files /dev/null and b/src/test_F.fastq.gz differ diff --git a/src/test_R.fastq.gz b/src/test_R.fastq.gz new file mode 100644 index 0000000..dab1b1e Binary files /dev/null and b/src/test_R.fastq.gz differ