diff --git a/CMakeLists.txt b/CMakeLists.txt index c3eed33..d8d9e5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,6 @@ include(cmake/vars.cmake) # Add subdirectories add_subdirectory(ext) -add_subdirectory(include) add_subdirectory(src) add_subdirectory(demo) add_subdirectory(share) diff --git a/README.md b/README.md index caa609c..8e1f0e7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Integrated Singular Value Decomposition (iSVD) * https://github.com/emfomy/isvd ### Documentation -* http://emfomy.github.io/isvd +* Please download from https://github.com/emfomy/isvd/releases or build using **make doc** (see below) on your own computer. ### Author * Mu Yang <> @@ -33,9 +33,10 @@ Integrated Singular Value Decomposition (iSVD) Please use the following commands to create Makefiles ``` +cd mkdir build cd build -cmake +cmake .. ``` ### Options @@ -43,7 +44,7 @@ cmake Use the following command to set options ``` -ccmake +ccmake .. ``` The following table are the main options @@ -67,20 +68,28 @@ The following table are the main options ### Makefile -The following table are the main make rules +The following table are the main Makefile rules -| Command | Detail | -|----------------|--------------------------------| -| `make all` | build all libraries | -| `make install` | install package | -| `make check` | build and run unit tests | -| `make doc` | build documentation | -| `make help` | display make-rules | +| Command | Detail | Options | +|----------------|--------------------------------|--------------------------------| +| `make all` | build all libraries | | +| `make install` | install package | | +| `make check` | build and run unit tests | Require `ISVD_BUILD_TEST` | +| `make doc` | build documentation | Require `ISVD_BUILD_DOC` | +| `make help` | display make-rules | | + +### Test installation + +* Set `ISVD_BUILD_TEST` using **ccmake**. (Also recommended to unset `ISVD_VERBOSE_TEST` if GPU is enabled). +* Run **make check** + * Known issue: RealSingle_WenYinIntegration.Test.#/s_integrate_wen_yin_# always fail the test. ## Usage * Define `ISVD_USE_ILP64` before include `isvd.h` to use 64-bit integer. * All 64bit libraries and executables are named with suffix "`_64`". +* The header files are located in `build/include` +* The libraries are located in `build/lib` ### Libraries diff --git a/check/CMakeLists.txt b/check/CMakeLists.txt index e22c4d4..a47dafb 100644 --- a/check/CMakeLists.txt +++ b/check/CMakeLists.txt @@ -35,7 +35,7 @@ function(ISVD_SET_TARGET_CHECK_CPU target) isvd_set_target_mpi(${target} CXX) isvd_set_target_blas(${target}) isvd_set_target_gtest(${target}) - target_link_libraries(${target} checkisvd extmmio) + target_link_libraries(${target} checkisvd_gpu_none checkisvd_core_la extmmio) target_compile_definitions(${target} PUBLIC "-DISVD_DATA_PATH=\"${PROJECT_SOURCE_DIR}/data\"") endfunction() @@ -45,18 +45,18 @@ function(ISVD_SET_TARGET_CHECK_GPU target) isvd_set_target_blas(${target}) isvd_set_target_gtest(${target}) isvd_set_target_gpu(${target}) - target_link_libraries(${target} checkisvd_gpu extmmio) + target_link_libraries(${target} checkisvd_gpu_magma checkisvd_core_la extmmio) target_compile_definitions(${target} PUBLIC "-DISVD_DATA_PATH=\"${PROJECT_SOURCE_DIR}/data\"") endfunction() # Set include paths -include_directories("${PROJECT_CONFIG_DIR}/include/c" - "${CMAKE_CURRENT_CONFIG_DIR}/src" - "${PROJECT_CONFIG_DIR}/src" - SYSTEM "${PROJECT_SOURCE_DIR}/ext") +include_directories("${CMAKE_CURRENT_CONFIG_DIR}/lib" + "${PROJECT_BINARY_DIR}/include/c" + "${PROJECT_BINARY_DIR}/lib" + SYSTEM "${PROJECT_SOURCE_DIR}/ext/mmio") # Add subdirectories -add_subdirectory(src) +add_subdirectory(lib) # Configure files isvd_configure_x_fn("${CMAKE_CURRENT_SOURCE_DIR}/check" "${CMAKE_CURRENT_CONFIG_DIR}/check" "${ISVD_S_TYPES}") diff --git a/check/check/libisvd/core/stage/@x@_integrate_hierarchical_reduction.cxx b/check/check/libisvd/core/stage/@x@_integrate_hierarchical_reduction.cxx index d592a5a..33e9f1b 100644 --- a/check/check/libisvd/core/stage/@x@_integrate_hierarchical_reduction.cxx +++ b/check/check/libisvd/core/stage/@x@_integrate_hierarchical_reduction.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -10,7 +10,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; TEST(@XStr@_HierarchicalReductionIntegration, Test) { @@ -26,7 +26,7 @@ TEST(@XStr@_HierarchicalReductionIntegration, Test) { // Read Qs file = fopen(QS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -47,7 +47,7 @@ TEST(@XStr@_HierarchicalReductionIntegration, Test) { // Read Qbar file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -92,12 +92,12 @@ TEST(@XStr@_HierarchicalReductionIntegration, Test) { isvd_int_t ldqt = l; // Run stage - isvd_@x@IntegrateHierarchicalReduction(param, nullptr, 0, nullptr, 0, qst, ldqst, qt, ldqt); + isvd_@x@IntegrateHierarchicalReduction(param, NULL, 0, NULL, 0, qst, ldqst, qt, ldqt); // Gather results isvd_val_t *qt_ = isvd_@x@malloc(l * Pmb); isvd_int_t ldqt_ = l; - MPI_Gather(qt, mb*ldqt, MPI_@X_TYPE@, qt_, mb*ldqt, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(qt, mb*ldqt, MPI_@XTYPE@, qt_, mb*ldqt, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); if ( mpi_rank == mpi_root ) { // Compute space diff --git a/check/check/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.cxx b/check/check/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.cxx index a8f1cfd..e120dd7 100644 --- a/check/check/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.cxx +++ b/check/check/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -15,7 +15,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; TEST(@XStr@_KolmogorovNagumoIntegration, Test) { @@ -31,7 +31,7 @@ TEST(@XStr@_KolmogorovNagumoIntegration, Test) { // Read Qs file = fopen(QS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -52,7 +52,7 @@ TEST(@XStr@_KolmogorovNagumoIntegration, Test) { // Read Qbar file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -107,7 +107,7 @@ TEST(@XStr@_KolmogorovNagumoIntegration, Test) { // Gather results isvd_val_t *qt_ = isvd_@x@malloc(l * Pmb); isvd_int_t ldqt_ = l; - MPI_Gather(qt, mb*ldqt, MPI_@X_TYPE@, qt_, mb*ldqt, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(qt, mb*ldqt, MPI_@XTYPE@, qt_, mb*ldqt, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Check results if ( mpi_rank == mpi_root ) { diff --git a/check/check/libisvd/core/stage/@x@_integrate_wen_yin.cxx b/check/check/libisvd/core/stage/@x@_integrate_wen_yin.cxx index 44f0729..a0a753b 100644 --- a/check/check/libisvd/core/stage/@x@_integrate_wen_yin.cxx +++ b/check/check/libisvd/core/stage/@x@_integrate_wen_yin.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -15,7 +15,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; TEST(@XStr@_WenYinIntegration, Test) { @@ -31,7 +31,7 @@ TEST(@XStr@_WenYinIntegration, Test) { // Read Qs file = fopen(QS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -52,7 +52,7 @@ TEST(@XStr@_WenYinIntegration, Test) { // Read Qbar file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -107,7 +107,7 @@ TEST(@XStr@_WenYinIntegration, Test) { // Gather results isvd_val_t *qt_ = isvd_@x@malloc(l * Pmb); isvd_int_t ldqt_ = l; - MPI_Gather(qt, mb*ldqt, MPI_@X_TYPE@, qt_, mb*ldqt, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(qt, mb*ldqt, MPI_@XTYPE@, qt_, mb*ldqt, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Check results if ( mpi_rank == mpi_root ) { diff --git a/check/check/libisvd/core/stage/@x@_orthogonalize_gramian.cxx b/check/check/libisvd/core/stage/@x@_orthogonalize_gramian.cxx index 26c7ef0..d72eaca 100644 --- a/check/check/libisvd/core/stage/@x@_orthogonalize_gramian.cxx +++ b/check/check/libisvd/core/stage/@x@_orthogonalize_gramian.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -10,7 +10,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; TEST(@XStr@_GramianOrthogonalization, Test) { @@ -25,7 +25,7 @@ TEST(@XStr@_GramianOrthogonalization, Test) { // Read Ys file = fopen(YS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -46,7 +46,7 @@ TEST(@XStr@_GramianOrthogonalization, Test) { // Read Qs file = fopen(QS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -90,12 +90,12 @@ TEST(@XStr@_GramianOrthogonalization, Test) { isvd_@x@Omatcopy('N', Nl, mj, 1.0, yst0 + param.rowidxbegin * ldyst0, ldyst0, yst, ldyst); // Run - isvd_@x@OrthogonalizeGramian(param, nullptr, 0, nullptr, 0, yst, ldyst); + isvd_@x@OrthogonalizeGramian(param, NULL, 0, NULL, 0, yst, ldyst); // Gather results isvd_val_t *qst_ = isvd_@x@malloc(Pmb * ldyst); isvd_int_t ldqst_ = ldyst; - MPI_Gather(yst, mb*ldyst, MPI_@X_TYPE@, qst_, mb*ldyst, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(yst, mb*ldyst, MPI_@XTYPE@, qst_, mb*ldyst, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); if ( mpi_rank == mpi_root ) { // Compute space diff --git a/check/check/libisvd/core/stage/@x@_postprocess_gramian.cxx b/check/check/libisvd/core/stage/@x@_postprocess_gramian.cxx index ce9152a..3262efb 100644 --- a/check/check/libisvd/core/stage/@x@_postprocess_gramian.cxx +++ b/check/check/libisvd/core/stage/@x@_postprocess_gramian.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -13,7 +13,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; typedef enum { GatherUV, @@ -34,14 +34,14 @@ static void test( char dista, char ordera, const JobUV jobuv ) { MM_typecode matcode; // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); ASSERT_NE(dista_, '\0'); ASSERT_NE(ordera_, '\0'); // Read A file = fopen(A_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -72,7 +72,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read Q file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -97,7 +97,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read S file = fopen(S_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -120,7 +120,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read U file = fopen(U_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -146,7 +146,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read V file = fopen(V_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -216,7 +216,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case GatherUV: { // Run stage - isvd_@x@PostprocessGramian(param, nullptr, 0, nullptr, 0, dista_, ordera_, + isvd_@x@PostprocessGramian(param, NULL, 0, NULL, 0, dista_, ordera_, a, lda, qt, ldqt, s, ut_, ldut_, vt_, ldvt_, mpi_root, mpi_root); break; @@ -232,12 +232,12 @@ static void test( char dista, char ordera, const JobUV jobuv ) { isvd_int_t ldvt = l; // Run stage - isvd_@x@PostprocessGramian(param, nullptr, 0, nullptr, 0, dista_, ordera_, + isvd_@x@PostprocessGramian(param, NULL, 0, NULL, 0, dista_, ordera_, a, lda, qt, ldqt, s, ut, ldut, vt, ldvt, -1, -1); // Gather results - MPI_Gather(ut, mb*ldut, MPI_@X_TYPE@, ut_, mb*ldut, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); - MPI_Gather(vt, nb*ldvt, MPI_@X_TYPE@, vt_, nb*ldvt, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(ut, mb*ldut, MPI_@XTYPE@, ut_, mb*ldut, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(vt, nb*ldvt, MPI_@XTYPE@, vt_, nb*ldvt, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Deallocate memory isvd_free(ut); @@ -249,8 +249,8 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case NoUV: { // Run stage - isvd_@x@PostprocessGramian(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, nullptr, 0, nullptr, 0, -2, -2); + isvd_@x@PostprocessGramian(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, NULL, 0, NULL, 0, -2, -2); break; } diff --git a/check/check/libisvd/core/stage/@x@_postprocess_symmetric.cxx b/check/check/libisvd/core/stage/@x@_postprocess_symmetric.cxx index acfe9e0..e5ac46d 100644 --- a/check/check/libisvd/core/stage/@x@_postprocess_symmetric.cxx +++ b/check/check/libisvd/core/stage/@x@_postprocess_symmetric.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -12,7 +12,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; typedef enum { GatherUV, @@ -33,14 +33,14 @@ static void test( char dista, char ordera, const JobUV jobuv ) { MM_typecode matcode; // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); ASSERT_NE(dista_, '\0'); ASSERT_NE(ordera_, '\0'); // Read A file = fopen(A_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -71,7 +71,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read Q file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -96,7 +96,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read S file = fopen(S_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -119,7 +119,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read U file = fopen(U_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -186,8 +186,8 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case GatherUV: { // Run stage - isvd_@x@PostprocessSymmetric(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, ut_, ldut_, nullptr, 0, mpi_root, -2); + isvd_@x@PostprocessSymmetric(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, ut_, ldut_, NULL, 0, mpi_root, -2); break; } @@ -199,11 +199,11 @@ static void test( char dista, char ordera, const JobUV jobuv ) { isvd_int_t ldut = l; // Run stage - isvd_@x@PostprocessSymmetric(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, ut, ldut, nullptr, 0, -1, -2); + isvd_@x@PostprocessSymmetric(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, ut, ldut, NULL, 0, -1, -2); // Gather results - MPI_Gather(ut, mb*ldut, MPI_@X_TYPE@, ut_, mb*ldut, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(ut, mb*ldut, MPI_@XTYPE@, ut_, mb*ldut, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Deallocate memory isvd_free(ut); @@ -214,8 +214,8 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case NoUV: { // Run stage - isvd_@x@PostprocessSymmetric(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, nullptr, 0, nullptr, 0, -2, -2); + isvd_@x@PostprocessSymmetric(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, NULL, 0, NULL, 0, -2, -2); break; } diff --git a/check/check/libisvd/core/stage/@x@_sketch_gaussian_projection.cxx b/check/check/libisvd/core/stage/@x@_sketch_gaussian_projection.cxx index 4b7941e..86c2dea 100644 --- a/check/check/libisvd/core/stage/@x@_sketch_gaussian_projection.cxx +++ b/check/check/libisvd/core/stage/@x@_sketch_gaussian_projection.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -10,7 +10,7 @@ #define serr 1e-1 #define derr 1e-6 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; static void test( char dista, char ordera ) { @@ -25,14 +25,14 @@ static void test( char dista, char ordera ) { MM_typecode matcode; // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); ASSERT_NE(dista_, '\0'); ASSERT_NE(ordera_, '\0'); // Read A file = fopen(A_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -63,7 +63,7 @@ static void test( char dista, char ordera ) { // Read Ys file = fopen(YS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -119,13 +119,13 @@ static void test( char dista, char ordera ) { isvd_int_t ldyst = Nl; // Run stage - isvd_@x@SketchGaussianProjection(param, nullptr, 0, nullptr, 0, dista_, ordera_, a, lda, yst, ldyst, seed, mpi_root); + isvd_@x@SketchGaussianProjection(param, NULL, 0, NULL, 0, dista_, ordera_, a, lda, yst, ldyst, seed, mpi_root); #if defined(ISVD_USE_MKL) // Gather results isvd_val_t *yst_ = isvd_@x@malloc(Nl * Pmb); isvd_int_t ldyst_ = Nl; - MPI_Gather(yst, mb*ldyst, MPI_@X_TYPE@, yst_, mb*ldyst, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(yst, mb*ldyst, MPI_@XTYPE@, yst_, mb*ldyst, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Check results if ( mpi_rank == mpi_root ) { diff --git a/check/check/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.cxx b/check/check/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.cxx index a208e06..2f6cfb1 100644 --- a/check/check/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.cxx +++ b/check/check/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -13,7 +13,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; typedef enum { GatherUV, @@ -34,14 +34,14 @@ static void test( char dista, char ordera, const JobUV jobuv ) { MM_typecode matcode; // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); ASSERT_NE(dista_, '\0'); ASSERT_NE(ordera_, '\0'); // Read A file = fopen(A_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -72,7 +72,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read Q file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -97,7 +97,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read S file = fopen(S_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -120,7 +120,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read U file = fopen(U_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -146,7 +146,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read V file = fopen(V_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -216,7 +216,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case GatherUV: { // Run stage - isvd_@x@PostprocessGramian_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, + isvd_@x@PostprocessGramian_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, a, lda, qt, ldqt, s, ut_, ldut_, vt_, ldvt_, mpi_root, mpi_root); break; @@ -232,12 +232,12 @@ static void test( char dista, char ordera, const JobUV jobuv ) { isvd_int_t ldvt = l; // Run stage - isvd_@x@PostprocessGramian_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, + isvd_@x@PostprocessGramian_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, a, lda, qt, ldqt, s, ut, ldut, vt, ldvt, -1, -1); // Gather results - MPI_Gather(ut, mb*ldut, MPI_@X_TYPE@, ut_, mb*ldut, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); - MPI_Gather(vt, nb*ldvt, MPI_@X_TYPE@, vt_, nb*ldvt, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(ut, mb*ldut, MPI_@XTYPE@, ut_, mb*ldut, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(vt, nb*ldvt, MPI_@XTYPE@, vt_, nb*ldvt, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Deallocate memory isvd_free(ut); @@ -249,8 +249,8 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case NoUV: { // Run stage - isvd_@x@PostprocessGramian_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, nullptr, 0, nullptr, 0, -2, -2); + isvd_@x@PostprocessGramian_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, NULL, 0, NULL, 0, -2, -2); break; } diff --git a/check/check/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.cxx b/check/check/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.cxx index 5737672..d041385 100644 --- a/check/check/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.cxx +++ b/check/check/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -12,7 +12,7 @@ #define serr 1e-3 #define derr 1e-8 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; typedef enum { GatherUV, @@ -33,14 +33,14 @@ static void test( char dista, char ordera, const JobUV jobuv ) { MM_typecode matcode; // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); ASSERT_NE(dista_, '\0'); ASSERT_NE(ordera_, '\0'); // Read A file = fopen(A_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -71,7 +71,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read Q file = fopen(Q_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -96,7 +96,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read S file = fopen(S_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -119,7 +119,7 @@ static void test( char dista, char ordera, const JobUV jobuv ) { // Read U file = fopen(U_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -186,8 +186,8 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case GatherUV: { // Run stage - isvd_@x@PostprocessSymmetric_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, ut_, ldut_, nullptr, 0, mpi_root, -2); + isvd_@x@PostprocessSymmetric_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, ut_, ldut_, NULL, 0, mpi_root, -2); break; } @@ -199,11 +199,11 @@ static void test( char dista, char ordera, const JobUV jobuv ) { isvd_int_t ldut = l; // Run stage - isvd_@x@PostprocessSymmetric_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, ut, ldut, nullptr, 0, -1, -2); + isvd_@x@PostprocessSymmetric_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, ut, ldut, NULL, 0, -1, -2); // Gather results - MPI_Gather(ut, mb*ldut, MPI_@X_TYPE@, ut_, mb*ldut, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(ut, mb*ldut, MPI_@XTYPE@, ut_, mb*ldut, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Deallocate memory isvd_free(ut); @@ -214,8 +214,8 @@ static void test( char dista, char ordera, const JobUV jobuv ) { case NoUV: { // Run stage - isvd_@x@PostprocessSymmetric_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, - a, lda, qt, ldqt, s, nullptr, 0, nullptr, 0, -2, -2); + isvd_@x@PostprocessSymmetric_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, + a, lda, qt, ldqt, s, NULL, 0, NULL, 0, -2, -2); break; } diff --git a/check/check/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.cxx b/check/check/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.cxx index 31edc73..609de5a 100644 --- a/check/check/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.cxx +++ b/check/check/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.cxx @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -10,7 +10,7 @@ #define serr 1e-1 #define derr 1e-6 -typedef @xtype@ isvd_val_t; +typedef @xtype_____@ isvd_val_t; static void test( char dista, char ordera ) { @@ -25,14 +25,14 @@ static void test( char dista, char ordera ) { MM_typecode matcode; // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); ASSERT_NE(dista_, '\0'); ASSERT_NE(ordera_, '\0'); // Read A file = fopen(A_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -63,7 +63,7 @@ static void test( char dista, char ordera ) { // Read Ys file = fopen(YS_PATH, "r"); - ASSERT_NE(file, (void*)(nullptr)); + ASSERT_NE(file, nullptr); ASSERT_EQ(mm_read_banner(file, &matcode), 0); EXPECT_TRUE(mm_is_array(matcode)) << mm_typecode_to_str(matcode); EXPECT_TRUE(mm_is_real(matcode)) << mm_typecode_to_str(matcode); @@ -119,13 +119,13 @@ static void test( char dista, char ordera ) { isvd_int_t ldyst = Nl; // Run stage - isvd_@x@SketchGaussianProjection_gpu(param, nullptr, 0, nullptr, 0, dista_, ordera_, a, lda, yst, ldyst, seed, mpi_root); + isvd_@x@SketchGaussianProjection_gpu(param, NULL, 0, NULL, 0, dista_, ordera_, a, lda, yst, ldyst, seed, mpi_root); #if defined(ISVD_USE_MKL) // Gather results isvd_val_t *yst_ = isvd_@x@malloc(Nl * Pmb); isvd_int_t ldyst_ = Nl; - MPI_Gather(yst, mb*ldyst, MPI_@X_TYPE@, yst_, mb*ldyst, MPI_@X_TYPE@, mpi_root, MPI_COMM_WORLD); + MPI_Gather(yst, mb*ldyst, MPI_@XTYPE@, yst_, mb*ldyst, MPI_@XTYPE@, mpi_root, MPI_COMM_WORLD); // Check results if ( mpi_rank == mpi_root ) { diff --git a/check/lib/CMakeLists.txt b/check/lib/CMakeLists.txt new file mode 100644 index 0000000..59e4584 --- /dev/null +++ b/check/lib/CMakeLists.txt @@ -0,0 +1,42 @@ +# The CMake setting of 'check/lib' + +isvd_set_config_var() + +# Configure files +file(COPY "${PROJECT_CONFIG_DIR}/src/lib/" DESTINATION "${CMAKE_CURRENT_CONFIG_DIR}") +file(GLOB_RECURSE cfiles "${CMAKE_CURRENT_CONFIG_DIR}/*.c") +foreach(cfile ${cfiles}) + string(REGEX REPLACE "\\.c$" ".cxx" cxxfile ${cfile}) + file(RENAME ${cfile} ${cxxfile}) +endforeach() + +file(RENAME "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/param.cxx" + "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/param.c") + +# checkisvd_core_la +file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/*" + "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/la/*" + "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/util/*") +add_library(checkisvd_core_la EXCLUDE_FROM_ALL ${files}) +isvd_set_target(checkisvd_core_la) +isvd_set_target_omp(checkisvd_core_la CXX) +isvd_set_target_mpi(checkisvd_core_la CXX) +isvd_set_target_blas(checkisvd_core_la) +isvd_set_target_gtest(checkisvd_core_la) + +# checkisvd_gpu_none +file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/nogpu/*") +add_library(checkisvd_gpu_none EXCLUDE_FROM_ALL ${files}) +isvd_set_target(checkisvd_gpu_none) +isvd_set_target_mpi(checkisvd_gpu_none CXX) +isvd_set_target_gtest(checkisvd_gpu_none) + +# checkisvd_gpu_magma +if(ISVD_USE_GPU) + file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/gpu/*") + add_library(checkisvd_gpu_magma EXCLUDE_FROM_ALL ${files}) + isvd_set_target(checkisvd_gpu_magma) + isvd_set_target_mpi(checkisvd_gpu_magma CXX) + isvd_set_target_gpu(checkisvd_gpu_magma) + isvd_set_target_gtest(checkisvd_gpu_magma) +endif() diff --git a/check/src/CMakeLists.txt b/check/src/CMakeLists.txt deleted file mode 100644 index bebedf5..0000000 --- a/check/src/CMakeLists.txt +++ /dev/null @@ -1,41 +0,0 @@ -# The CMake setting of 'check/src' - -isvd_set_config_var() - -# Configure files -file(COPY "${PROJECT_CONFIG_DIR}/src/" DESTINATION "${CMAKE_CURRENT_CONFIG_DIR}") -file(GLOB_RECURSE cfiles "${CMAKE_CURRENT_CONFIG_DIR}/*.c") -foreach(cfile ${cfiles}) - string(REGEX REPLACE "\\.c$" ".cxx" cxxfile ${cfile}) - file(RENAME ${cfile} ${cxxfile}) -endforeach() - -file(RENAME "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/param.cxx" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/param.c") - -# checkisvd -file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/la/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/util/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/nogpu/*") -add_library(checkisvd EXCLUDE_FROM_ALL ${files}) -isvd_set_target(checkisvd) -isvd_set_target_omp(checkisvd CXX) -isvd_set_target_mpi(checkisvd CXX) -isvd_set_target_blas(checkisvd) -isvd_set_target_gtest(checkisvd) - -# checkisvd_gpu -if(ISVD_USE_GPU) - file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/la/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/util/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/gpu/*") - add_library(checkisvd_gpu EXCLUDE_FROM_ALL ${files}) - isvd_set_target(checkisvd_gpu) - isvd_set_target_omp(checkisvd_gpu CXX) - isvd_set_target_mpi(checkisvd_gpu CXX) - isvd_set_target_blas(checkisvd_gpu) - isvd_set_target_gpu(checkisvd_gpu) - isvd_set_target_gtest(checkisvd_gpu) -endif() diff --git a/cmake/display.cmake b/cmake/display.cmake index d125268..3e635cd 100644 --- a/cmake/display.cmake +++ b/cmake/display.cmake @@ -153,4 +153,8 @@ if(ISVD_BUILD_BIN) if(ISVD_BLAS STREQUAL "MKL" AND NOT ISVD_OMP) message(${DEPRECATION} "${Esc}[1;33mOpenMP is not enabled. Recommended to use it for better performance.${Esc}[0m") endif() + + if(CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_C_COMPILER_VERSION VERSION_GREATER 7 AND ISVD_USE_GPU AND CMAKE_BUILD_TYPE STREQUAL "Debug") + message(${DEPRECATION} "${Esc}[1;33mGPU routines with GCC under 7.0 might crash in debug mode.${Esc}[0m") + endif() endif() diff --git a/cmake/options.cmake b/cmake/options.cmake index 3a3158c..e5ea5f7 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -9,7 +9,7 @@ endif() option(ISVD_BUILD_LIB "Build libraries." "ON") option(ISVD_BUILD_DEMO "Build demo codes." "ON") option(ISVD_BUILD_TEST "Build unit tests." "OFF") -option(ISVD_BUILD_DOC "Build documentation." "OFF") +option(ISVD_BUILD_DOC "Build documentation." "ON") option(ISVD_USE_GPU "Enable GPU support." "OFF") option(ISVD_VERBOSE_TEST "Verbose unit tests." "ON") @@ -79,4 +79,4 @@ endif() # Set environment variables list(APPEND ENVS "OMP_NUM_THREADS=${OMP_THRDS}") -list(APPEND ENVS "ASAN_OPTIONS=color=always:protect_shadow_gap=0:replace_intrin=0:detect_leaks=0") +list(APPEND ENVS "ASAN_OPTIONS=color=always:protect_shadow_gap=0") diff --git a/cmake/vars.cmake b/cmake/vars.cmake index 97d3464..c69fbcd 100644 --- a/cmake/vars.cmake +++ b/cmake/vars.cmake @@ -1,11 +1,11 @@ # Types definitions -macro(ISVD_SET_TYPES x_ xtype_ x_type_ XName_ XStr_) +macro(ISVD_SET_TYPES x_ xtype_ XName_ XStr_) string(TOLOWER "${x_}" x) string(TOUPPER "${x_}" X) - set(xtype ${xtype_}) + set(xtype_____ ${xtype_}) - string(TOUPPER "${x_type_}" X_TYPE) + string(TOUPPER "${xtype_}" XTYPE) string(TOLOWER "${XName_}" xname) set(XName "${XName_}") @@ -17,25 +17,25 @@ unset(ISVD_S_TYPES) unset(ISVD_D_TYPES) unset(ISVD_C_TYPES) unset(ISVD_Z_TYPES) -list(APPEND ISVD_S_TYPES "s" "float" "float" "Real Single" "RealSingle") -list(APPEND ISVD_D_TYPES "d" "double" "double" "Real Double" "RealDouble") -list(APPEND ISVD_C_TYPES "c" "_Complex float" "complex_float" "Complex Single" "ComplexSingle") -list(APPEND ISVD_Z_TYPES "z" "_Complex double" "complex_double" "Complex Double" "ComplexDouble") +list(APPEND ISVD_S_TYPES "s" "isvd_s_val_t" "Real Single" "RealSingle") +list(APPEND ISVD_D_TYPES "d" "isvd_d_val_t" "Real Double" "RealDouble") +list(APPEND ISVD_C_TYPES "c" "isvd_c_val_t" "Complex Single" "ComplexSingle") +list(APPEND ISVD_Z_TYPES "z" "isvd_z_val_t" "Complex Double" "ComplexDouble") # BLAS definitions set( - ISVD_LA_BLAS_TYPE_DEFINE + ISVD_TYPE_MACRO_DEFINE "#define CHAR1 char" "#define INT isvd_int_t" - "#define REAL4 float" - "#define REAL8 double" - "#define COMP4 _Complex float" - "#define COMP8 _Complex double" + "#define REAL4 isvd_s_val_t" + "#define REAL8 isvd_d_val_t" + "#define COMP4 isvd_c_val_t" + "#define COMP8 isvd_z_val_t" ) -string(REPLACE ";" "\n" ISVD_LA_BLAS_TYPE_DEFINE "${ISVD_LA_BLAS_TYPE_DEFINE}") +string(REPLACE ";" "\n" ISVD_TYPE_MACRO_DEFINE "${ISVD_TYPE_MACRO_DEFINE}") set( - ISVD_LA_BLAS_TYPE_UNDEF + ISVD_TYPE_MACRO_UNDEF "#undef CHAR1" "#undef INT" "#undef REAL4" @@ -43,4 +43,4 @@ set( "#undef COMP4" "#undef COMP8" ) -string(REPLACE ";" "\n" ISVD_LA_BLAS_TYPE_UNDEF "${ISVD_LA_BLAS_TYPE_UNDEF}") +string(REPLACE ";" "\n" ISVD_TYPE_MACRO_UNDEF "${ISVD_TYPE_MACRO_UNDEF}") diff --git a/cpplint/CMakeLists.txt b/cpplint/CMakeLists.txt index 4863902..642d100 100644 --- a/cpplint/CMakeLists.txt +++ b/cpplint/CMakeLists.txt @@ -5,15 +5,14 @@ find_program(CMAKE_CPPLINT cpplint) mark_as_advanced(CMAKE_CPPLINT) # Macro -function(SET_CPPLINT_TARGET path) +function(SET_CPPLINT_TARGET path linelength filter) string(REPLACE "/" "_" target ${path}) - add_custom_target( - cpplint_${target} - COMMAND ${CMAKE_CPPLINT} --recursive --root=${PROJECT_CONFIG_DIR}/${path} ${ARGN} ${path} - WORKING_DIRECTORY ${PROJECT_CONFIG_DIR} + add_custom_command( + TARGET cpplint POST_BUILD + COMMAND ${CMAKE_CPPLINT} --recursive --root="${PROJECT_BINARY_DIR}/${path}" --linelength=${linelength} --filter=${filter} ${path} + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}" COMMENT "Run cpplint for ${path}" ) - add_dependencies(cpplint cpplint_${target}) endfunction() # CppLint @@ -24,5 +23,5 @@ add_custom_target( COMMENT "Run cpplint" ) -set_cpplint_target(include/c --linelength=128 --filter=-build/include,-build/include_order,-readability/casting,-whitespace/blank_line,-whitespace/braces,-whitespace/parens) -set_cpplint_target(src --linelength=128 --filter=-build/include,-build/include_order,-build/include_what_you_use,-readability/casting,-whitespace/blank_line,-whitespace/braces,-whitespace/parens) +set_cpplint_target(include/c 128 "-build/include,-build/include_order,-readability/casting,-whitespace/blank_line,-whitespace/braces,-whitespace/parens") +set_cpplint_target(lib 128 "-build/include,-build/include_order,-build/include_what_you_use,-readability/casting,-whitespace/blank_line,-whitespace/braces,-whitespace/parens") diff --git a/demo/CMakeLists.txt b/demo/CMakeLists.txt index ba8b38a..5447745 100644 --- a/demo/CMakeLists.txt +++ b/demo/CMakeLists.txt @@ -23,9 +23,13 @@ macro(SET_TARGET name files) endmacro() # Set include paths -include_directories("${PROJECT_CONFIG_DIR}/include/c") +include_directories("${PROJECT_BINARY_DIR}/include/c") # C demo set_target(cdemo cdemo.c ${PROJECT_SOURCE_DIR}/data/a.mtx /dev/null /dev/null /dev/null) target_link_libraries(isvd_cdemo isvd extmmio) -target_include_directories(isvd_cdemo ${SYSTEM} PUBLIC "${PROJECT_SOURCE_DIR}/ext") +target_include_directories(isvd_cdemo ${SYSTEM} PUBLIC "${PROJECT_SOURCE_DIR}/ext/mmio") + +# C test +set_target(ctest ctest.c) +target_link_libraries(isvd_ctest isvd) diff --git a/demo/cdemo.c b/demo/cdemo.c index 7cae048..7060168 100644 --- a/demo/cdemo.c +++ b/demo/cdemo.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Main function @@ -53,7 +53,7 @@ int main( int argc, char **argv ) { if ( !mm_is_array(matcode) || !mm_is_real(matcode) || !mm_is_general(matcode) ) { if ( mpi_rank == mpi_root ) { fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + fprintf(stderr, "Matrix Market type: [%s]\n", mm_typecode_to_str(matcode)); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Barrier(MPI_COMM_WORLD); diff --git a/demo/ctest.c b/demo/ctest.c new file mode 100644 index 0000000..c8e2fcb --- /dev/null +++ b/demo/ctest.c @@ -0,0 +1,40 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file demo/ctest.c +/// \brief The C test code +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// Main function +/// +int main( int argc, char **argv ) { + + isvd_init(&argc, &argv, MPI_COMM_WORLD); + + const mpi_int_t mpi_size = isvd_getMpiSize(MPI_COMM_WORLD); + const mpi_int_t mpi_rank = isvd_getMpiRank(MPI_COMM_WORLD); + const mpi_int_t mpi_root = 0; + + if ( mpi_rank == mpi_root ) { + printf("iSVD C demo\n"); + isvd_printEnvironment(MPI_COMM_WORLD); + } + + const isvd_int_t m = 100; + const isvd_int_t n = 1000; + + double *a = isvd_dmalloc(m * n); + + isvd_vdRngGaussianDriver(0, 0, m*n, a, 0.0, 1.0); + + isvd_finalize(); + + return 0; +} diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile index 5a0d8cd..abc5966 100644 --- a/doxygen/Doxyfile +++ b/doxygen/Doxyfile @@ -152,7 +152,8 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = @PROJECT_CONFIG_DIR@ +STRIP_FROM_PATH = @CMAKE_CURRENT_CONFIG_DIR@ \ + @PROJECT_CONFIG_DIR@/src # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -161,7 +162,7 @@ STRIP_FROM_PATH = @PROJECT_CONFIG_DIR@ # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = @PROJECT_CONFIG_DIR@ +STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't @@ -785,7 +786,6 @@ WARN_LOGFILE = INPUT = @PROJECT_SOURCE_DIR@/README.md \ @PROJECT_CONFIG_DIR@/doxygen \ - @PROJECT_CONFIG_DIR@/include \ @PROJECT_CONFIG_DIR@/src # This tag can be used to specify the character encoding of the source files diff --git a/doxygen/example.dox b/doxygen/example.dox new file mode 100644 index 0000000..b4ed890 --- /dev/null +++ b/doxygen/example.dox @@ -0,0 +1,3 @@ +/** + \example demo/cdemo.c +*/ \ No newline at end of file diff --git a/doxygen/ext/mkl.h b/doxygen/ext/mkl.h new file mode 100644 index 0000000..e7d83fc --- /dev/null +++ b/doxygen/ext/mkl.h @@ -0,0 +1,4 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file ext/mkl.h +/// \brief The Intel MKL header. +/// diff --git a/doxygen/ext/mpi.h b/doxygen/ext/mpi.h new file mode 100644 index 0000000..a74402d --- /dev/null +++ b/doxygen/ext/mpi.h @@ -0,0 +1,4 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file ext/mpi.h +/// \brief The MPI header. +/// diff --git a/doxygen/ext/omp.h b/doxygen/ext/omp.h new file mode 100644 index 0000000..1de9750 --- /dev/null +++ b/doxygen/ext/omp.h @@ -0,0 +1,4 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file ext/omp.h +/// \brief The OpenMP header. +/// diff --git a/doxygen/tutorial/core/detail.dox b/doxygen/tutorial/core/detail.dox new file mode 100644 index 0000000..f786b2a --- /dev/null +++ b/doxygen/tutorial/core/detail.dox @@ -0,0 +1,36 @@ +/** + @page tutorial_core_notation Notations + +

Format Notations

+ Name | Format | Example + -------------------------------|----------------------------------------|-------------------------------- + scalars | normal italic | \f$m, \alpha, N\f$ + vectors | bold italic lowercase | \f$\boldsymbol{v}, \boldsymbol{\beta}\f$ + matrices | bold italic uppercase | \f$\boldsymbol{A}, \boldsymbol{\Omega}\f$ + combined matrices | bold Frankfurt uppercase | \f$\boldsymbol{\mathfrak{Q}}, \boldsymbol{\mathfrak{Y}}\f$ + submatrices | sans serif / Frankfurt uppercase | \f$\mathsf{U}, \mathfrak{Y}\f$ + things in \f$i\f$-th sketch | under-script bracketed | \f$\boldsymbol{Q}_{[i]}, \boldsymbol{Y}_{[i]}\f$ + things in \f$j\f$-th process | super-script parenthesized / angled | \f$\mathsf{V}^{(j)}, \mathsf{A}^{\langle j \rangle}\f$ + +

Parameter Notations

+ + - \f$m\f$: the number of rows of the input matrix \f$\boldsymbol{A}\f$. + - \f$n\f$: the number of columns of the input matrix \f$\boldsymbol{A}\f$. + - \f$k\f$: the desired rank of approximate SVD. + - \f$p\f$: the oversampling parameter. + - \f$l\f$: the dimension of randomized sketches, i.e., \f$l=k+p\f$. + - \f$N\f$: the number of random sketches. + - \f$P\f$: the number of MPI processors. + - \f$m_b\f$: the row dimensions of a row-block, i.e., \f$m_b = \lceil \frac{m}{P} \rceil\f$. + - \f$n_b\f$: the column dimensions of a column-block, i.e., \f$n_b = \lceil \frac{n}{P} \rceil\f$. + - \f$m^{(j)}\f$: the exact row dimensions of the \f$j\f$-th row-block. + - For \f$j < P-1\f$, \f$m^{(j)} = m_b\f$. + - For \f$j = P-1\f$, \f$m^{(j)} = m - m_b(P-1)\f$. + - \f$m^{(0)} + m^{(1)} + \dotsb + m^{(P-1)} = m\f$. + - \f$n^{(j)}\f$: the exact column dimensions of the \f$j\f$-th column-block. + - For \f$j < P-1\f$, \f$n^{(j)} = n_b\f$. + - For \f$j = P-1\f$, \f$n^{(j)} = n - n_b(P-1)\f$. + - \f$n^{(0)} + n^{(1)} + \dotsb + n^{(P-1)} = n\f$. + + \see isvd_Param +*/ diff --git a/doxygen/tutorial/core/example.dox b/doxygen/tutorial/core/example.dox index b1dcde7..60ca091 100644 --- a/doxygen/tutorial/core/example.dox +++ b/doxygen/tutorial/core/example.dox @@ -1,52 +1,72 @@ /** - \page tutorial_core_example iSVD Example + \page tutorial_core_example Example Code -

iSVD

+ \ref isvd_dIsvd "iSVD driver" solves an approximate low-rank singular value decomposition of matrix 𝑨. \ref demo/cdemo.c is an example code. In the following, we describe the details of each part of this example. - iSVD solves an approximate low-rank singular value decomposition of matrix `a`. - -

Example

- - The following is an example of using \ref isvd_dIsvd "iSVD driver". The code is also available in `/demo/cdemo.c`. - - \includelineno demo/cdemo.c - -

Initialize Environment

+

Initialize Environment

The iSVD environment and random seed should be set up before calling the driver. \ref isvd_init also initialize the MPI environment and the MAGMA environment, and \ref isvd_printEnvironment display the the MPI environment and the MAGMA environment. - If one needs CPU only, one may use \ref isvd_init_cpu, \ref isvd_printEnvironment_cpu, and \ref isvd_finalize_cpu. + If one needs CPU only, one may use \ref isvd_init_cpu, \ref isvd_printEnvironment_cpu, and \ref isvd_finalize_cpu instead. \snippetlineno demo/cdemo.c init-isvd -

Load Data

+

Load Data

- In this example, we load the data from a Matrix Market format file. Note the all the MPI nodes should load the matrix. + In this example, we load the data from a Matrix Market format file. Note the all the MPI processors should load the matrix. \snippetlineno demo/cdemo.c load-data -

Allocate matrices

+

Allocate matrices

The output matrices should be allocated before calling the driver. See \ref isvd_dIsvd for the detail of matrix sizes. \snippetlineno demo/cdemo.c allocate-matrix -

Run iSVD

+

Run iSVD

- The iSVD driver compute the approximate low-rank singular value decomposition. See \ref isvd_dIsvd for the detail of the arguments. + The iSVD driver compute the approximate low-rank singular value decomposition. See \ref isvd_dIsvd for the detail of the arguments. The 𝑗-th MPI-rank stores the 𝑗-th block of the input matrix 𝑨 (see \ref tutorial_core_storage for detail). \snippetlineno demo/cdemo.c run-isvd -

Get Executing Time

+

Get Executing Time

- iSVD also records the executing time of each stage. + The iSVD driver also records the executing time of each stage. \snippetlineno demo/cdemo.c display-time -

Finalize

+

Finalize

Remember to \ref isvd_finalize "finalize" the environment. \snippetlineno demo/cdemo.c final-isvd +

Possible Result

+ + One may use **make run_cdemo** to run \ref demo/cdemo.c. If you want to compile it directly, please refer the usage guide in the main page. The following are the possible results of the example code. + + \code{.txt} + iSVD C demo + iSVD 1.2.0, 32-bit isvd_int_t, 64-bit pointer + 4 MPI processors, 4 OpenMP threads per process + + No GPU support + + Using Real Double Precision Gaussian Projection Sketching + Using Real Double Precision Gramian Orthogonalization + Using Real Double Precision Kolmogorov-Nagumo Integration + Using Real Double Precision Gramian Postprocessing + + Sketching ...................... done + Orthogonalizing ................ done + Integrating .................... done + Postprocessing ................. done + + Average total computing time: 0.085510 seconds. + Average sketching time: 0.030245 seconds. + Average orthogonalizing time: 0.028071 seconds. + Average integrating time: 0.026233 seconds. + Average postprocessing time: 0.000962 seconds. + \endcode + */ diff --git a/doxygen/tutorial/core/stage.dox b/doxygen/tutorial/core/stage.dox index f94ae11..a19f483 100644 --- a/doxygen/tutorial/core/stage.dox +++ b/doxygen/tutorial/core/stage.dox @@ -1,9 +1,31 @@ /** - \page tutorial_core_stage iSVD Stages + @page tutorial_core_stage Stages + + The iSVD algorithm contains four stages: sketching, orthogonalization, integration, and postprocessing. + + Stage | Input | Output + --------------------|----------------------------------------------------|-------------------------------- + sketching | \f$\boldsymbol{A}\f$ | \f$\boldsymbol{\mathfrak{Y}}\f$ + orthogonalization | \f$\boldsymbol{\mathfrak{Y}}\f$ | \f$\boldsymbol{\mathfrak{Q}}\f$ + integration | \f$\boldsymbol{\mathfrak{Q}}\f$ | \f$\overline{\boldsymbol{Q}}\f$ + postprocessing | \f$\boldsymbol{A}, \overline{\boldsymbol{Q}}\f$ | \f$\boldsymbol{U}, \boldsymbol{V}, \boldsymbol{\sigma}\f$ + + Note that the Frankfurt matrices are defined as + @f[ + \boldsymbol{\mathfrak{Y}} = + \begin{bmatrix} + \boldsymbol{Y}_{[0]} & \boldsymbol{Y}_{[1]} & \cdots & \boldsymbol{Y}_{[N-1]} + \end{bmatrix}, + \qquad + \boldsymbol{\mathfrak{Q}} = + \begin{bmatrix} + \boldsymbol{Q}_{[0]} & \boldsymbol{Q}_{[1]} & \cdots & \boldsymbol{Q}_{[N-1]} + \end{bmatrix}. + @f]

Sketching

- The sketching stage randomly sketches \ref isvd_Param::num_sketch "𝑁" rank-\ref isvd_Param::dim_sketch "𝑙" column subspaces \f$\boldsymbol{Y}_{[i]}\f$ of the input matrix \f$\boldsymbol{A}\f$. + The sketching stage randomly sketches 𝑁 rank-𝑙 column subspaces \f$\boldsymbol{Y}_{[i]}\f$ of the input matrix \f$\boldsymbol{A}\f$. - The \ref isvd_dSketchGaussianProjection "Gramian Projection Sketching" multiples \f$\boldsymbol{A}\f$ by some random matrices using Gaussian normal distribution. @@ -23,9 +45,8 @@

Postprocessing

- The postprocessing stage forms a rank-\ref isvd_Param::rank "𝑘" approximate SVD of \f$\boldsymbol{A}\f$ in the range of \f$\overline{\boldsymbol{Q}}\f$. + The postprocessing stage forms a rank-𝑘 approximate SVD of \f$\boldsymbol{A}\f$ in the range of \f$\overline{\boldsymbol{Q}}\f$. - The \ref isvd_dPostprocessGramian "Gramian Postprocessing" finds approximate SVD using the eigen-decomposition of \f$\overline{\boldsymbol{Q}}^\top \boldsymbol{A} \boldsymbol{A}^\top \overline{\boldsymbol{Q}}\f$. - The \ref isvd_dPostprocessSymmetric "Symmetric Postprocessing" finds approximate SVD using the eigen-decomposition of \f$\overline{\boldsymbol{Q}}^\top \boldsymbol{A} \overline{\boldsymbol{Q}}\f$ for symmetric input matrix \f$\boldsymbol{A}\f$. - */ diff --git a/doxygen/tutorial/core/storage.dox b/doxygen/tutorial/core/storage.dox new file mode 100644 index 0000000..875354c --- /dev/null +++ b/doxygen/tutorial/core/storage.dox @@ -0,0 +1,58 @@ +/** + \page tutorial_core_storage Parallelism Storage + +

Block-Row Storage

+ + In Block-Row Parallelism, we storage the matrices in row-blocks. The \f$j\f$-th block is stored in the \f$j\f$-th MPI-rank. + + @f[ + \boldsymbol{A} = + \begin{bmatrix} + \mathsf{A}^{(0)} \\ \mathsf{A}^{(1)} \\ \vdots \\ \mathsf{A}^{(P-1)} + \end{bmatrix}, + \qquad + \boldsymbol{\mathfrak{Y}} = + \begin{bmatrix} + \mathfrak{Y}^{(0)} \\ \mathfrak{Y}^{(1)} \\ \vdots \\ \mathfrak{Y}^{(P-1)} + \end{bmatrix}, + \qquad + \boldsymbol{\mathfrak{Q}} = + \begin{bmatrix} + \mathfrak{Q}^{(0)} \\ \mathfrak{Q}^{(1)} \\ \vdots \\ \mathfrak{Q}^{(P-1)} + \end{bmatrix}, + \qquad + \overline{\boldsymbol{Q}} = + \begin{bmatrix} + \overline{\mathsf{Q}}^{(0)} \\ \overline{\mathsf{Q}}^{(1)} \\ \vdots \\ \overline{\mathsf{Q}}^{(P-1)} + \end{bmatrix}, + \qquad + \boldsymbol{U} = + \begin{bmatrix} + \mathsf{U}^{(0)} \\ \mathsf{U}^{(1)} \\ \vdots \\ \mathsf{U}^{(P-1)} + \end{bmatrix}, + \qquad + \boldsymbol{V} = + \begin{bmatrix} + \mathsf{V}^{(0)} \\ \mathsf{V}^{(1)} \\ \vdots \\ \mathsf{V}^{(P-1)} + \end{bmatrix}. + @f] + + The \f$j\f$-th block of the input matrices should contain \f$m^{(j)}\f$ rows, and the \f$j\f$-th block of the input matrices should contain \f$m_b\f$ rows. + + For example, let \f$\boldsymbol{A}\f$ be a matrix with 11 rows and \f$P\f$ is 4. Then \f$\mathsf{A}^{(0)}\f$ are the (0-2)-th rows, \f$\mathsf{A}^{(1)}\f$ are the (0-5)-th rows, \f$\mathsf{A}^{(2)}\f$ are the (6-8)-th rows, \f$\mathsf{A}^{(3)}\f$ are the (9-10)-th rows. + +

Block-Column Storage

+ + In Block-Column Parallelism, we storage the input matrix \f$\boldsymbol{A}\f$ in row-blocks. The \f$j\f$-th block is stored in the \f$j\f$-th MPI-rank. + + @f[ + \boldsymbol{A} = + \begin{bmatrix} + \mathsf{A}^{\langle 0 \rangle} & \mathsf{A}^{\langle 1 \rangle} & \cdots & \mathsf{A}^{\langle P-1 \rangle} + \end{bmatrix}. + @f] + + The \f$j\f$-th block of the input matrices should contain \f$n^{(j)}\f$ columns, and the \f$j\f$-th block of the input matrices should contain \f$n_b\f$ columns. + + For example, let \f$\boldsymbol{A}\f$ be a matrix with 11 columns and \f$P\f$ is 4. Then \f$\mathsf{A}^{\langle 0 \rangle}\f$ are the (0-2)-th columns, \f$\mathsf{A}^{\langle 1 \rangle}\f$ are the (0-5)-th columns, \f$\mathsf{A}^{\langle 2 \rangle}\f$ are the (6-8)-th columns, \f$\mathsf{A}^{\langle 3 \rangle}\f$ are the (9-10)-th columns. +*/ diff --git a/doxygen/tutorial/main.dox b/doxygen/tutorial/main.dox index b01f899..b0d8a87 100644 --- a/doxygen/tutorial/main.dox +++ b/doxygen/tutorial/main.dox @@ -1,8 +1,10 @@ /** \page tutorial_main Tutorial -

Subpages

+

Subpages

- \subpage tutorial_core_example + - \subpage tutorial_core_notation - \subpage tutorial_core_stage + - \subpage tutorial_core_storage */ diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt deleted file mode 100644 index 24c4f82..0000000 --- a/include/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# The CMake setting of 'include/' - -isvd_set_config_var() - -# Configure files -isvd_configure_x_fn("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_CONFIG_DIR}" "${ISVD_S_TYPES}") -isvd_configure_x_fn("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_CONFIG_DIR}" "${ISVD_D_TYPES}") -isvd_configure_fn("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_CONFIG_DIR}") - -# Set install rule -install(DIRECTORY "${CMAKE_CURRENT_CONFIG_DIR}/c/" DESTINATION include/c) diff --git a/include/c/isvd/core/@x@_stage.h b/include/c/isvd/core/@x@_stage.h deleted file mode 100644 index 4c2b9ff..0000000 --- a/include/c/isvd/core/@x@_stage.h +++ /dev/null @@ -1,79 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file include/c/isvd/core/@x@_stage.h -/// \brief The iSVD stages. -/// -/// \author Mu Yang <> -/// \copyright MIT License -/// - -#ifndef ISVD_CORE_@X@_STAGE_H_ -#define ISVD_CORE_@X@_STAGE_H_ - -#include -#include - -#if defined(__cplusplus) -extern "C" { -#endif // __cplusplus - -// Sketching -void isvd_@x@SketchGaussianProjection( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, - @xtype@ *yst, const isvd_int_t ldyst, const isvd_int_t seed, const mpi_int_t mpi_root -); - -// Orthogonalization -void isvd_@x@OrthogonalizeTallSkinnyQr( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - @xtype@ *yst, const isvd_int_t ldyst -); - -void isvd_@x@OrthogonalizeGramian( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - @xtype@ *yst, const isvd_int_t ldyst -); - -// Integration -void isvd_@x@IntegrateKolmogorovNagumo( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const @xtype@ *yst, const isvd_int_t ldyst, @xtype@ *qt, const isvd_int_t ldqt -); - -void isvd_@x@IntegrateWenYin( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const @xtype@ *yst, const isvd_int_t ldyst, @xtype@ *qt, const isvd_int_t ldqt -); - -void isvd_@x@IntegrateHierarchicalReduction( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - @xtype@ *yst, const isvd_int_t ldyst, @xtype@ *qt, const isvd_int_t ldqt -); - -// Postprocessing -void isvd_@x@PostprocessTallSkinnyQr( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, const @xtype@ *qt, const isvd_int_t ldqt, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, - const mpi_int_t ut_root, const mpi_int_t vt_root -); - -void isvd_@x@PostprocessGramian( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, const @xtype@ *qt, const isvd_int_t ldqt, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, - const mpi_int_t ut_root, const mpi_int_t vt_root -); - -void isvd_@x@PostprocessSymmetric( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, const @xtype@ *qt, const isvd_int_t ldqt, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, - const mpi_int_t ut_root, const mpi_int_t vt_root -); - -#if defined(__cplusplus) -} -#endif // __cplusplus - -#endif // ISVD_CORE_@X@_STAGE_H_ diff --git a/include/c/isvd/util/memory.h b/include/c/isvd/util/memory.h deleted file mode 100644 index c238b9e..0000000 --- a/include/c/isvd/util/memory.h +++ /dev/null @@ -1,42 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file include/c/isvd/util/memory.h -/// \brief The memory utilities. -/// -/// \author Mu Yang <> -/// \copyright MIT License -/// - -#ifndef ISVD_UTIL_MEMORY_H_ -#define ISVD_UTIL_MEMORY_H_ - -#include - -#if defined(__cplusplus) -extern "C" { -#endif // __cplusplus - -isvd_int_t* isvd_imalloc( const size_t num ); -float* isvd_smalloc( const size_t num ); -double* isvd_dmalloc( const size_t num ); -_Complex float* isvd_cmalloc( const size_t num ); -_Complex double* isvd_zmalloc( const size_t num ); - -void isvd_free( void *ptr ); - -void isvd_imemset0( isvd_int_t *ptr, const size_t num ); -void isvd_smemset0( float *ptr, const size_t num ); -void isvd_dmemset0( double *ptr, const size_t num ); -void isvd_cmemset0( _Complex float *ptr, const size_t num ); -void isvd_zmemset0( _Complex double *ptr, const size_t num ); - -void isvd_imemcpy( isvd_int_t *dst, const isvd_int_t *src, const size_t num ); -void isvd_smemcpy( float *dst, const float *src, const size_t num ); -void isvd_dmemcpy( double *dst, const double *src, const size_t num ); -void isvd_cmemcpy( _Complex float *dst, const _Complex float *src, const size_t num ); -void isvd_zmemcpy( _Complex double *dst, const _Complex double *src, const size_t num ); - -#if defined(__cplusplus) -} -#endif // __cplusplus - -#endif // ISVD_UTIL_MEMORY_H_ diff --git a/share/isvd/isvd-config.cmake b/share/isvd/isvd-config.cmake deleted file mode 100644 index bfcc580..0000000 --- a/share/isvd/isvd-config.cmake +++ /dev/null @@ -1,3 +0,0 @@ -# Compute paths -set(ISVD_INCLUDES "@CMAKE_INSTALL_PREFIX@/include") -set(ISVD_LIBRARIES "@CMAKE_INSTALL_PREFIX@/@LIB_FOLDER@/libisvd.so") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 369cf18..8a9d2a0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,83 +2,18 @@ isvd_set_config_var() +# Shared library SET(BUILD_SHARED_LIBS ON) - -# RPath SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${LIB_FOLDER}") set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) -# Macro -macro(SET_TARGET target files) - # Set library - add_library(${target} SHARED ${files}) - isvd_set_target(${target}) - set_property(TARGET ${target} PROPERTY VERSION ${ISVD_VERSION}) - install(TARGETS ${target} LIBRARY DESTINATION ${LIB_FOLDER}) -endmacro() - -# Set include paths -include_directories("${PROJECT_CONFIG_DIR}/include/c" - "${CMAKE_CURRENT_CONFIG_DIR}") - # Configure files isvd_configure_x_fn("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_CONFIG_DIR}" "${ISVD_S_TYPES}") isvd_configure_x_fn("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_CONFIG_DIR}" "${ISVD_D_TYPES}") isvd_configure_fn("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_CONFIG_DIR}") -# Break -if(NOT ISVD_BUILD_LIB) - return() -endif() - -# libisvd -set_target(isvd /dev/null) -set_property(TARGET isvd PROPERTY LINKER_LANGUAGE C) - -# libisvd_core -file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/*" - "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/util/*") -set_target(isvd_core "${files}") -isvd_set_target_omp(isvd_core C) -isvd_set_target_mpi(isvd_core C) -isvd_set_target_blas(isvd_core) -target_link_libraries(isvd isvd_core) - -# libisvd_la -if(ISVD_OMP) - string(TOLOWER ${ISVD_OMP} isvd_omp) - set(isvd_omp "_${isvd_omp}") -else() - set(isvd_omp "") -endif() -if(ISVD_USE_MKL) - set(isvd_la isvd_la_mkl${isvd_omp}) -else() - set(isvd_la isvd_la_blas${isvd_omp}) -endif(ISVD_USE_MKL) -file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/la/*") -set_target(${isvd_la} "${files}") -isvd_set_target_omp(${isvd_la} C) -isvd_set_target_mpi(${isvd_la} C) -isvd_set_target_blas(${isvd_la}) -target_link_libraries(isvd ${isvd_la}) - -# libisvd_gpu_none -file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/nogpu/*") -set_target(isvd_gpu_none "${files}") -isvd_set_target_omp(isvd_gpu_none C) -isvd_set_target_mpi(isvd_gpu_none C) -isvd_set_target_blas(isvd_gpu_none) +add_subdirectory(include) +execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${CMAKE_CURRENT_CONFIG_DIR}/include" "${PROJECT_BINARY_DIR}/include") -# libisvd_gpu -if(ISVD_USE_GPU) - file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/gpu/*") - set_target(isvd_gpu_magma "${files}") - isvd_set_target_omp(isvd_gpu_magma C) - isvd_set_target_mpi(isvd_gpu_magma C) - isvd_set_target_blas(isvd_gpu_magma) - isvd_set_target_gpu(isvd_gpu_magma) - target_link_libraries(isvd isvd_gpu_magma) -else() - target_link_libraries(isvd isvd_gpu_none) -endif(ISVD_USE_GPU) +add_subdirectory(lib) +execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${CMAKE_CURRENT_CONFIG_DIR}/lib" "${PROJECT_BINARY_DIR}/lib") diff --git a/src/include/CMakeLists.txt b/src/include/CMakeLists.txt new file mode 100644 index 0000000..c9c9494 --- /dev/null +++ b/src/include/CMakeLists.txt @@ -0,0 +1,4 @@ +# The CMake setting of 'src/include/' + +# Set install rule +install(DIRECTORY "${CMAKE_CURRENT_CONFIG_DIR}/c/" DESTINATION include/c) diff --git a/include/c/isvd.h b/src/include/c/isvd.h similarity index 98% rename from include/c/isvd.h rename to src/include/c/isvd.h index 49e5fc7..19656dc 100644 --- a/include/c/isvd.h +++ b/src/include/c/isvd.h @@ -9,7 +9,6 @@ #ifndef ISVD_H_ #define ISVD_H_ -#include #include #include #include diff --git a/include/c/isvd/config.h b/src/include/c/isvd/config.h similarity index 100% rename from include/c/isvd/config.h rename to src/include/c/isvd/config.h diff --git a/include/c/isvd/core.h b/src/include/c/isvd/core.h similarity index 100% rename from include/c/isvd/core.h rename to src/include/c/isvd/core.h diff --git a/include/c/isvd/core/@x@_driver.h b/src/include/c/isvd/core/@x@_driver.h similarity index 76% rename from include/c/isvd/core/@x@_driver.h rename to src/include/c/isvd/core/@x@_driver.h index 616537e..69d7942 100644 --- a/include/c/isvd/core/@x@_driver.h +++ b/src/include/c/isvd/core/@x@_driver.h @@ -19,10 +19,10 @@ extern "C" { void isvd_@x@Isvd( const char *alg_s, const char *alg_o, const char *alg_i, const char *alg_p, const isvd_int_t m, const isvd_int_t n, const isvd_int_t k, const isvd_int_t p, const isvd_int_t N, - const @xtype@ *argv[4], const isvd_int_t argc[4], @xtype@ *retv[4], const isvd_int_t retc[4], + const @xtype_____@ *argv[4], const isvd_int_t argc[4], @xtype_____@ *retv[4], const isvd_int_t retc[4], double time[4], FILE *stream, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, + const char dista, const char ordera, const @xtype_____@ *a, const isvd_int_t lda, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, const isvd_int_t seed, const mpi_int_t ut_root, const mpi_int_t vt_root, const mpi_int_t mpi_root, const isvd_MpiComm mpi_comm ); diff --git a/src/include/c/isvd/core/@x@_stage.h b/src/include/c/isvd/core/@x@_stage.h new file mode 100644 index 0000000..c54cdb8 --- /dev/null +++ b/src/include/c/isvd/core/@x@_stage.h @@ -0,0 +1,82 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file include/c/isvd/core/@x@_stage.h +/// \brief The iSVD stages. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#ifndef ISVD_CORE_@X@_STAGE_H_ +#define ISVD_CORE_@X@_STAGE_H_ + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +// Sketching +void isvd_@x@SketchGaussianProjection( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, const @xtype_____@ *a, const isvd_int_t lda, + @xtype_____@ *yst, const isvd_int_t ldyst, const isvd_int_t seed, const mpi_int_t mpi_root +); + +// Orthogonalization +void isvd_@x@OrthogonalizeTallSkinnyQr( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + @xtype_____@ *yst, const isvd_int_t ldyst +); + +void isvd_@x@OrthogonalizeGramian( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + @xtype_____@ *yst, const isvd_int_t ldyst +); + +// Integration +void isvd_@x@IntegrateKolmogorovNagumo( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const @xtype_____@ *yst, const isvd_int_t ldyst, @xtype_____@ *qt, const isvd_int_t ldqt +); + +void isvd_@x@IntegrateWenYin( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const @xtype_____@ *yst, const isvd_int_t ldyst, @xtype_____@ *qt, const isvd_int_t ldqt +); + +void isvd_@x@IntegrateHierarchicalReduction( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + @xtype_____@ *yst, const isvd_int_t ldyst, @xtype_____@ *qt, const isvd_int_t ldqt +); + +// Postprocessing +void isvd_@x@PostprocessTallSkinnyQr( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, + const @xtype_____@ *a, const isvd_int_t lda, const @xtype_____@ *qt, const isvd_int_t ldqt, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, + const mpi_int_t ut_root, const mpi_int_t vt_root +); + +void isvd_@x@PostprocessGramian( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, + const @xtype_____@ *a, const isvd_int_t lda, const @xtype_____@ *qt, const isvd_int_t ldqt, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, + const mpi_int_t ut_root, const mpi_int_t vt_root +); + +void isvd_@x@PostprocessSymmetric( + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, + const @xtype_____@ *a, const isvd_int_t lda, const @xtype_____@ *qt, const isvd_int_t ldqt, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, + const mpi_int_t ut_root, const mpi_int_t vt_root +); + +#if defined(__cplusplus) +} +#endif // __cplusplus + +#endif // ISVD_CORE_@X@_STAGE_H_ diff --git a/include/c/isvd/core/env.h b/src/include/c/isvd/core/env.h similarity index 100% rename from include/c/isvd/core/env.h rename to src/include/c/isvd/core/env.h diff --git a/include/c/isvd/core/param.h b/src/include/c/isvd/core/param.h similarity index 94% rename from include/c/isvd/core/param.h rename to src/include/c/isvd/core/param.h index 59d849b..56e38fc 100644 --- a/include/c/isvd/core/param.h +++ b/src/include/c/isvd/core/param.h @@ -19,6 +19,8 @@ extern "C" { /// \ingroup c_core_module /// \brief The parameters. /// +/// \see \ref tutorial_core_notation +/// typedef struct { /// The MPI communicator. @@ -36,10 +38,10 @@ typedef struct { /// \f$n\f$ The number of columns of the matrix. const isvd_int_t ncol; - /// \f$m_j\f$ The number of rows of current MPI process. + /// \f$m^{(j)}\f$ The number of rows of current MPI process. const isvd_int_t nrow_proc; - /// \f$n_j\f$ The number of columns of current MPI process. + /// \f$n^{(j)}\f$ The number of columns of current MPI process. const isvd_int_t ncol_proc; /// \f$m_b = \lceil \frac{m}{P} \rceil\f$ The number of rows per MPI process. diff --git a/include/c/isvd/def.h b/src/include/c/isvd/def.h similarity index 50% rename from include/c/isvd/def.h rename to src/include/c/isvd/def.h index f4f9a2c..a346232 100644 --- a/include/c/isvd/def.h +++ b/src/include/c/isvd/def.h @@ -9,9 +9,11 @@ #ifndef ISVD_DEF_H_ #define ISVD_DEF_H_ +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) #if !defined(__STDC_FORMAT_MACROS) #define __STDC_FORMAT_MACROS -#endif +#endif // __STDC_FORMAT_MACROS +#endif // DOXYGEN_SHOULD_SKIP_THIS #include #include @@ -26,31 +28,57 @@ #undef imaginary #undef complex +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) #if !defined(__cplusplus) #define ISVD_UNKNOWN #else // __cplusplus #define ISVD_UNKNOWN ... #endif // __cplusplus +#endif // DOXYGEN_SHOULD_SKIP_THIS #if defined(__cplusplus) extern "C" { #endif // __cplusplus /// \ingroup c_core_module -/// \brief The type of index. +/// \brief The type of index. (N = 32/64) +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) #if !defined(ISVD_USE_ILP64) -typedef int32_t isvd_int_t; +typedef int32_t isvd_int_t; #else // ISVD_USE_ILP64 -typedef int64_t isvd_int_t; +typedef int64_t isvd_int_t; #endif // ISVD_USE_ILP64 +#else // DOXYGEN_SHOULD_SKIP_THIS) +typedef intN_t isvd_int_t; +#endif // DOXYGEN_SHOULD_SKIP_THIS /// \ingroup c_core_module -/// \brief The type of unsigned index. +/// \brief The type of unsigned index. (N = 32/64) +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) #if !defined(ISVD_USE_ILP64) -typedef uint32_t isvd_uint_t; +typedef uint32_t isvd_uint_t; #else // ISVD_USE_ILP64 -typedef uint64_t isvd_uint_t; +typedef uint64_t isvd_uint_t; #endif // ISVD_USE_ILP64 +#else // DOXYGEN_SHOULD_SKIP_THIS) +typedef uintN_t isvd_uint_t; +#endif // DOXYGEN_SHOULD_SKIP_THIS + +/// \ingroup c_core_module +/// \brief The type of real single floating point. +typedef float isvd_s_val_t; + +/// \ingroup c_core_module +/// \brief The type of real double floating point. +typedef double isvd_d_val_t; + +/// \ingroup c_core_module +/// \brief The type of complex single floating point. +typedef _Complex float isvd_c_val_t; + +/// \ingroup c_core_module +/// \brief The type of complex double floating point. +typedef _Complex double isvd_z_val_t; /// \ingroup c_core_module /// \brief The type of MPI index. @@ -62,7 +90,11 @@ typedef int omp_int_t; /// \ingroup c_core_module /// \brief The MPI communicator type. +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) typedef int isvd_MpiComm; +#else // DOXYGEN_SHOULD_SKIP_THIS) +typedef MPI_Comm isvd_MpiComm; +#endif // DOXYGEN_SHOULD_SKIP_THIS #if defined(__cplusplus) } diff --git a/include/c/isvd/gpu.h b/src/include/c/isvd/gpu.h similarity index 100% rename from include/c/isvd/gpu.h rename to src/include/c/isvd/gpu.h diff --git a/include/c/isvd/gpu/@x@_stage.h b/src/include/c/isvd/gpu/@x@_stage.h similarity index 59% rename from include/c/isvd/gpu/@x@_stage.h rename to src/include/c/isvd/gpu/@x@_stage.h index 5607219..62cefab 100644 --- a/include/c/isvd/gpu/@x@_stage.h +++ b/src/include/c/isvd/gpu/@x@_stage.h @@ -18,60 +18,63 @@ extern "C" { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_gpu_@x@_stage_module -/// \brief GPU Gaussian Projection Sketching (@xname@ precision) +/// \brief GPU Gaussian Projection Sketching (@xname@ precision). /// /// \copydetails isvd_@x@SketchGaussianProjection /// /// \attention Set \ref isvd_gpu_memory_limit as the limit of GPU memory usage. /// void isvd_@x@SketchGaussianProjection_gpu( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, - @xtype@ *yst, const isvd_int_t ldyst, const isvd_int_t seed, const mpi_int_t mpi_root + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, const @xtype_____@ *a, const isvd_int_t lda, + @xtype_____@ *yst, const isvd_int_t ldyst, const isvd_int_t seed, const mpi_int_t mpi_root ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_gpu_@x@_stage_module -/// \brief GPU Tall-Skinny QR Postprocessing (@xname@ precision) +/// \brief GPU Tall-Skinny QR Postprocessing (@xname@ precision). /// /// \copydetails isvd_@x@PostprocessTallSkinnyQr /// /// \attention Set \ref isvd_gpu_memory_limit as the limit of GPU memory usage. /// void isvd_@x@PostprocessTallSkinnyQr_gpu( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, const @xtype@ *qt, const isvd_int_t ldqt, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, + const @xtype_____@ *a, const isvd_int_t lda, const @xtype_____@ *qt, const isvd_int_t ldqt, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, const mpi_int_t ut_root, const mpi_int_t vt_root ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_gpu_@x@_stage_module -/// \brief GPU Gramian Postprocessing (@xname@ precision) +/// \brief GPU Gramian Postprocessing (@xname@ precision). /// /// \copydetails isvd_@x@PostprocessGramian /// /// \attention Set \ref isvd_gpu_memory_limit as the limit of GPU memory usage. /// void isvd_@x@PostprocessGramian_gpu( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, const @xtype@ *qt, const isvd_int_t ldqt, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, + const @xtype_____@ *a, const isvd_int_t lda, const @xtype_____@ *qt, const isvd_int_t ldqt, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, const mpi_int_t ut_root, const mpi_int_t vt_root ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_gpu_@x@_stage_module -/// \brief GPU Symmetric Postprocessing (@xname@ precision) +/// \brief GPU Symmetric Postprocessing (@xname@ precision). /// /// \copydetails isvd_@x@PostprocessSymmetric /// /// \attention Set \ref isvd_gpu_memory_limit as the limit of GPU memory usage. /// void isvd_@x@PostprocessSymmetric_gpu( - const isvd_Param param, const @xtype@ *argv, const isvd_int_t argc, @xtype@ *retv, const isvd_int_t retc, - const char dista, const char ordera, const @xtype@ *a, const isvd_int_t lda, const @xtype@ *qt, const isvd_int_t ldqt, - @xtype@ *s, @xtype@ *ut, const isvd_int_t ldut, @xtype@ *vt, const isvd_int_t ldvt, + const isvd_Param param, const @xtype_____@ *argv, const isvd_int_t argc, @xtype_____@ *retv, const isvd_int_t retc, + const char dista, const char ordera, + const @xtype_____@ *a, const isvd_int_t lda, const @xtype_____@ *qt, const isvd_int_t ldqt, + @xtype_____@ *s, @xtype_____@ *ut, const isvd_int_t ldut, @xtype_____@ *vt, const isvd_int_t ldvt, const mpi_int_t ut_root, const mpi_int_t vt_root ); diff --git a/include/c/isvd/gpu/env.h b/src/include/c/isvd/gpu/env.h similarity index 100% rename from include/c/isvd/gpu/env.h rename to src/include/c/isvd/gpu/env.h diff --git a/include/c/isvd/la.h b/src/include/c/isvd/la.h similarity index 100% rename from include/c/isvd/la.h rename to src/include/c/isvd/la.h diff --git a/include/c/isvd/la/blas.h b/src/include/c/isvd/la/blas.h similarity index 100% rename from include/c/isvd/la/blas.h rename to src/include/c/isvd/la/blas.h diff --git a/include/c/isvd/la/blas/blas1.h b/src/include/c/isvd/la/blas/blas1.h similarity index 99% rename from include/c/isvd/la/blas/blas1.h rename to src/include/c/isvd/la/blas/blas1.h index f0e6b67..ec24ebf 100644 --- a/include/c/isvd/la/blas/blas1.h +++ b/src/include/c/isvd/la/blas/blas1.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -169,7 +169,7 @@ static inline void isvd_zdScal( ) { zdscal_(&n, &alpha, x, &incx); } //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/blas/blas2.h b/src/include/c/isvd/la/blas/blas2.h similarity index 91% rename from include/c/isvd/la/blas/blas2.h rename to src/include/c/isvd/la/blas/blas2.h index ae4d7e2..96ffe77 100644 --- a/include/c/isvd/la/blas/blas2.h +++ b/src/include/c/isvd/la/blas/blas2.h @@ -15,13 +15,13 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) #endif // DOXYGEN_SHOULD_SKIP_THIS -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/blas/blas3.h b/src/include/c/isvd/la/blas/blas3.h similarity index 99% rename from include/c/isvd/la/blas/blas3.h rename to src/include/c/isvd/la/blas/blas3.h index 0769a96..9c89688 100644 --- a/include/c/isvd/la/blas/blas3.h +++ b/src/include/c/isvd/la/blas/blas3.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -102,7 +102,7 @@ static inline void isvd_zSyrk( ) { zherk_(&uplo, &trans, &n, &k, &alpha, a, &lda, &beta, c, &ldc); } //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/blas/blas_like.h b/src/include/c/isvd/la/blas/blas_like.h similarity index 98% rename from include/c/isvd/la/blas/blas_like.h rename to src/include/c/isvd/la/blas/blas_like.h index 39f25c6..978172c 100644 --- a/include/c/isvd/la/blas/blas_like.h +++ b/src/include/c/isvd/la/blas/blas_like.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_la_blas_like_module @@ -96,7 +96,7 @@ void isvd_zGemmt( ); //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/lapack.h b/src/include/c/isvd/la/lapack.h similarity index 100% rename from include/c/isvd/la/lapack.h rename to src/include/c/isvd/la/lapack.h diff --git a/include/c/isvd/la/lapack/auxiliary.h b/src/include/c/isvd/la/lapack/auxiliary.h similarity index 97% rename from include/c/isvd/la/lapack/auxiliary.h rename to src/include/c/isvd/la/lapack/auxiliary.h index 66f04d9..8bd65e1 100644 --- a/include/c/isvd/la/lapack/auxiliary.h +++ b/src/include/c/isvd/la/lapack/auxiliary.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -54,7 +54,7 @@ bool isvd_Lsame( const CHAR1 ca, const CHAR1 cb ); /// \brief Tests two character strings for equality regardless of the case. bool isvd_Lsamen( const INT n, const CHAR1 *sa, const CHAR1 *sb ); -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/lapack/least_square.h b/src/include/c/isvd/la/lapack/least_square.h similarity index 97% rename from include/c/isvd/la/lapack/least_square.h rename to src/include/c/isvd/la/lapack/least_square.h index 15f17d6..98dd62e 100644 --- a/include/c/isvd/la/lapack/least_square.h +++ b/src/include/c/isvd/la/lapack/least_square.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_la_lapack_ls_module @@ -41,7 +41,7 @@ void isvd_zGesvd( const CHAR1 jobu, const CHAR1 jobvt, const INT m, const INT n, const INT ldu, COMP8 *v, const INT ldvt ); //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/lapack/linear_equation.h b/src/include/c/isvd/la/lapack/linear_equation.h similarity index 95% rename from include/c/isvd/la/lapack/linear_equation.h rename to src/include/c/isvd/la/lapack/linear_equation.h index a339cd7..83fdd1f 100644 --- a/include/c/isvd/la/lapack/linear_equation.h +++ b/src/include/c/isvd/la/lapack/linear_equation.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_la_lapack_le_module @@ -27,7 +27,7 @@ void isvd_cGeinv( const INT n, COMP4 *a, const INT lda ); void isvd_zGeinv( const INT n, COMP8 *a, const INT lda ); //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/vml.h b/src/include/c/isvd/la/vml.h similarity index 100% rename from include/c/isvd/la/vml.h rename to src/include/c/isvd/la/vml.h diff --git a/include/c/isvd/la/vml/mathematical.h b/src/include/c/isvd/la/vml/mathematical.h similarity index 97% rename from include/c/isvd/la/vml/mathematical.h rename to src/include/c/isvd/la/vml/mathematical.h index ca43144..1401138 100644 --- a/include/c/isvd/la/vml/mathematical.h +++ b/src/include/c/isvd/la/vml/mathematical.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_la_vml_math_module @@ -47,7 +47,7 @@ void isvd_vcSqrt( const INT n, const COMP4 *a, COMP4 *y ); void isvd_vzSqrt( const INT n, const COMP8 *a, COMP8 *y ); //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/vml/power_root.h b/src/include/c/isvd/la/vml/power_root.h similarity index 95% rename from include/c/isvd/la/vml/power_root.h rename to src/include/c/isvd/la/vml/power_root.h index d41dab7..e3dca13 100644 --- a/include/c/isvd/la/vml/power_root.h +++ b/src/include/c/isvd/la/vml/power_root.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_la_vml_math_module @@ -27,7 +27,7 @@ void isvd_vcDiv( const INT n, const COMP4 *a, const COMP4 *b, COMP4 *y ); void isvd_vzDiv( const INT n, const COMP8 *a, const COMP8 *b, COMP8 *y ); //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/la/vsl.h b/src/include/c/isvd/la/vsl.h similarity index 77% rename from include/c/isvd/la/vsl.h rename to src/include/c/isvd/la/vsl.h index 80d28f3..f4e5d60 100644 --- a/include/c/isvd/la/vsl.h +++ b/src/include/c/isvd/la/vsl.h @@ -11,6 +11,7 @@ #include #include +#include //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \defgroup c_la_vsl_service_module Service Routines @@ -24,4 +25,10 @@ /// \brief The VSL Distribution Generators /// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \defgroup c_la_vsl_driver_module Drivers +/// \ingroup c_la_vsl_module +/// \brief The VSL Drivers +/// + #endif // ISVD_LA_VSL_H_ diff --git a/include/c/isvd/la/vsl/distribution.h b/src/include/c/isvd/la/vsl/distribution.h similarity index 95% rename from include/c/isvd/la/vsl/distribution.h rename to src/include/c/isvd/la/vsl/distribution.h index f405a80..24ee155 100644 --- a/include/c/isvd/la/vsl/distribution.h +++ b/src/include/c/isvd/la/vsl/distribution.h @@ -16,7 +16,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_la_vsl_distribution_module @@ -26,7 +26,7 @@ void isvd_vsRngGaussian( isvd_VSLStreamStatePtr stream, const INT n, REAL4 *r, c void isvd_vdRngGaussian( isvd_VSLStreamStatePtr stream, const INT n, REAL8 *r, const REAL8 a, const REAL8 sigma ); //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/src/include/c/isvd/la/vsl/driver.h b/src/include/c/isvd/la/vsl/driver.h new file mode 100644 index 0000000..8d05a6a --- /dev/null +++ b/src/include/c/isvd/la/vsl/driver.h @@ -0,0 +1,35 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file include/c/isvd/la/vsl/driver.h +/// \brief The VSL driver header. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#ifndef ISVD_LA_VSL_DRIVER_H_ +#define ISVD_LA_VSL_DRIVER_H_ + +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +@ISVD_TYPE_MACRO_DEFINE@ + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \ingroup c_la_vsl_driver_module +/// \brief Generates normally distributed random numbers in parallel. +//\{ +void isvd_vsRngGaussianDriver( const INT seed, const INT nskip, const INT n, REAL4 *r, const REAL4 a, const REAL4 sigma ); +void isvd_vdRngGaussianDriver( const INT seed, const INT nskip, const INT n, REAL8 *r, const REAL8 a, const REAL8 sigma ); +//\} + +@ISVD_TYPE_MACRO_UNDEF@ + +#if defined(__cplusplus) +} +#endif // __cplusplus + +#endif // ISVD_LA_VSL_DRIVER_H_ diff --git a/include/c/isvd/la/vsl/service.h b/src/include/c/isvd/la/vsl/service.h similarity index 96% rename from include/c/isvd/la/vsl/service.h rename to src/include/c/isvd/la/vsl/service.h index 05139de..134c34d 100644 --- a/include/c/isvd/la/vsl/service.h +++ b/src/include/c/isvd/la/vsl/service.h @@ -15,7 +15,7 @@ extern "C" { #endif // __cplusplus -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ typedef INT* isvd_VSLStreamStatePtr; @@ -34,7 +34,7 @@ void isvd_vslDeleteStream( isvd_VSLStreamStatePtr *streamp ); /// \brief Initializes the stream by the skip-ahead method. void isvd_vslSkipAheadStream( isvd_VSLStreamStatePtr stream, const INT nskip ); -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/include/c/isvd/util.h b/src/include/c/isvd/util.h similarity index 100% rename from include/c/isvd/util.h rename to src/include/c/isvd/util.h diff --git a/include/c/isvd/util/io.h b/src/include/c/isvd/util/io.h similarity index 72% rename from include/c/isvd/util/io.h rename to src/include/c/isvd/util/io.h index bb270ac..892ebc7 100644 --- a/include/c/isvd/util/io.h +++ b/src/include/c/isvd/util/io.h @@ -15,9 +15,13 @@ extern "C" { #endif // __cplusplus -void isvd_ifget( FILE *stream, isvd_int_t *varp ); -void isvd_sfget( FILE *stream, float *varp ); -void isvd_dfget( FILE *stream, double *varp ); +@ISVD_TYPE_MACRO_DEFINE@ + +void isvd_ifget( FILE *stream, INT *varp ); +void isvd_sfget( FILE *stream, REAL4 *varp ); +void isvd_dfget( FILE *stream, REAL8 *varp ); + +@ISVD_TYPE_MACRO_UNDEF@ #if defined(__cplusplus) } diff --git a/src/include/c/isvd/util/memory.h b/src/include/c/isvd/util/memory.h new file mode 100644 index 0000000..cd1fa37 --- /dev/null +++ b/src/include/c/isvd/util/memory.h @@ -0,0 +1,46 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file include/c/isvd/util/memory.h +/// \brief The memory utilities. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#ifndef ISVD_UTIL_MEMORY_H_ +#define ISVD_UTIL_MEMORY_H_ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +@ISVD_TYPE_MACRO_DEFINE@ + +INT* isvd_imalloc( const size_t num ); +REAL4* isvd_smalloc( const size_t num ); +REAL8* isvd_dmalloc( const size_t num ); +COMP4* isvd_cmalloc( const size_t num ); +COMP8* isvd_zmalloc( const size_t num ); + +void isvd_free( void *ptr ); + +void isvd_imemset0( INT *ptr, const size_t num ); +void isvd_smemset0( REAL4 *ptr, const size_t num ); +void isvd_dmemset0( REAL8 *ptr, const size_t num ); +void isvd_cmemset0( COMP4 *ptr, const size_t num ); +void isvd_zmemset0( COMP8 *ptr, const size_t num ); + +void isvd_imemcpy( INT *dst, const INT *src, const size_t num ); +void isvd_smemcpy( REAL4 *dst, const REAL4 *src, const size_t num ); +void isvd_dmemcpy( REAL8 *dst, const REAL8 *src, const size_t num ); +void isvd_cmemcpy( COMP4 *dst, const COMP4 *src, const size_t num ); +void isvd_zmemcpy( COMP8 *dst, const COMP8 *src, const size_t num ); + +@ISVD_TYPE_MACRO_UNDEF@ + +#if defined(__cplusplus) +} +#endif // __cplusplus + +#endif // ISVD_UTIL_MEMORY_H_ diff --git a/include/c/isvd/util/mpi.h b/src/include/c/isvd/util/mpi.h similarity index 61% rename from include/c/isvd/util/mpi.h rename to src/include/c/isvd/util/mpi.h index 8655673..54da529 100644 --- a/include/c/isvd/util/mpi.h +++ b/src/include/c/isvd/util/mpi.h @@ -15,6 +15,18 @@ extern "C" { #endif // __cplusplus +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) +#if !defined(ISVD_USE_ILP64) +#define MPI_ISVD_INT_T MPI_INTEGER4 +#else // ISVD_USE_ILP64 +#define MPI_ISVD_INT_T MPI_INTEGER8 +#endif // ISVD_USE_ILP64 +#define MPI_ISVD_S_VAL_T MPI_REAL4 +#define MPI_ISVD_D_VAL_T MPI_REAL8 +#define MPI_ISVD_C_VAL_T MPI_COMPLEX8 +#define MPI_ISVD_Z_VAL_T MPI_COMPLEX16 +#endif // DOXYGEN_SHOULD_SKIP_THIS + mpi_int_t isvd_getMpiSize( const isvd_MpiComm comm ); mpi_int_t isvd_getMpiRank( const isvd_MpiComm comm ); diff --git a/include/c/isvd/util/omp.h b/src/include/c/isvd/util/omp.h similarity index 93% rename from include/c/isvd/util/omp.h rename to src/include/c/isvd/util/omp.h index ebbea8c..2bf865b 100644 --- a/include/c/isvd/util/omp.h +++ b/src/include/c/isvd/util/omp.h @@ -15,6 +15,7 @@ extern "C" { #endif // __cplusplus +omp_int_t isvd_getOmpMaxSize( void ); omp_int_t isvd_getOmpSize( void ); omp_int_t isvd_getOmpRank( void ); diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt new file mode 100644 index 0000000..00643ce --- /dev/null +++ b/src/lib/CMakeLists.txt @@ -0,0 +1,73 @@ +# The CMake setting of 'src/lib' + +isvd_set_config_var() + +# Shared library +SET(BUILD_SHARED_LIBS ON) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${LIB_FOLDER}") +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) + +# Macro +macro(SET_TARGET target files) + # Set library + add_library(${target} SHARED ${files}) + isvd_set_target(${target}) + set_property(TARGET ${target} PROPERTY VERSION ${ISVD_VERSION}) + set_property(TARGET ${target} PROPERTY LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_CONFIG_DIR}") + install(TARGETS ${target} LIBRARY DESTINATION ${LIB_FOLDER}) +endmacro() + +# Set include paths +include_directories("${CMAKE_CURRENT_CONFIG_DIR}/../include/c" + "${CMAKE_CURRENT_CONFIG_DIR}") + +# Break +if(NOT ISVD_BUILD_LIB) + return() +endif() + +# libisvd +set_target(isvd /dev/null) +set_property(TARGET isvd PROPERTY LINKER_LANGUAGE C) + +# libisvd_core +file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/core/*" + "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/util/*") +set_target(isvd_core "${files}") +isvd_set_target_mpi(isvd_core C) +target_link_libraries(isvd isvd_core) + +# libisvd_la +if(ISVD_OMP) + string(TOLOWER ${ISVD_OMP} isvd_omp) + set(isvd_omp "_${isvd_omp}") +else() + set(isvd_omp "") +endif() +if(ISVD_USE_MKL) + set(isvd_la isvd_la_mkl${isvd_omp}) +else() + set(isvd_la isvd_la_blas${isvd_omp}) +endif(ISVD_USE_MKL) +file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/la/*") +set_target(${isvd_la} "${files}") +isvd_set_target_omp(${isvd_la} C) +isvd_set_target_mpi(${isvd_la} C) +isvd_set_target_blas(${isvd_la}) +target_link_libraries(isvd ${isvd_la}) + +# libisvd_gpu_none +file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/nogpu/*") +set_target(isvd_gpu_none "${files}") +isvd_set_target_mpi(isvd_gpu_none C) + +# libisvd_gpu +if(ISVD_USE_GPU) + file(GLOB_RECURSE files "${CMAKE_CURRENT_CONFIG_DIR}/libisvd/gpu/*") + set_target(isvd_gpu_magma "${files}") + isvd_set_target_mpi(isvd_gpu_magma C) + isvd_set_target_gpu(isvd_gpu_magma) + target_link_libraries(isvd isvd_gpu_magma) +else() + target_link_libraries(isvd isvd_gpu_none) +endif(ISVD_USE_GPU) diff --git a/src/libisvd.h b/src/lib/libisvd.h similarity index 88% rename from src/libisvd.h rename to src/lib/libisvd.h index 1353227..49a6c7c 100644 --- a/src/libisvd.h +++ b/src/lib/libisvd.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd.h +/// \file lib/libisvd.h /// \brief The main library header. /// /// \author Mu Yang <> @@ -9,8 +9,8 @@ #ifndef LIBISVD_H_ #define LIBISVD_H_ -#include #include +#include #include #if defined(ISVD_USE_GPU) diff --git a/src/libisvd/core.h b/src/lib/libisvd/core.h similarity index 91% rename from src/libisvd/core.h rename to src/lib/libisvd/core.h index 3dceb78..6342534 100644 --- a/src/libisvd/core.h +++ b/src/lib/libisvd/core.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core.h +/// \file lib/libisvd/core.h /// \brief The core header. /// /// \author Mu Yang <> diff --git a/src/libisvd/core/@x@_arg.h b/src/lib/libisvd/core/@x@_arg.h similarity index 95% rename from src/libisvd/core/@x@_arg.h rename to src/lib/libisvd/core/@x@_arg.h index 0124a68..53a143c 100644 --- a/src/libisvd/core/@x@_arg.h +++ b/src/lib/libisvd/core/@x@_arg.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/@x@_arg.h +/// \file lib/libisvd/core/@x@_arg.h /// \brief The @xname@ precision drivers argument utilities. /// /// \author Mu Yang <> @@ -24,14 +24,14 @@ static inline isvd_fun_t isvd_arg2@x@AlgS( const char *arg ) { if ( !strcmp(arg, "GP") ) return (isvd_fun_t) isvd_@x@SketchGaussianProjection; if ( !strcmp(arg, "GP_gpu") ) return (isvd_fun_t) isvd_@x@SketchGaussianProjection_gpu; fprintf(stderr, "Unknown sketching abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline isvd_fun_t isvd_arg2@x@AlgO( const char *arg ) { if ( !strcmp(arg, "TS") ) return (isvd_fun_t) isvd_@x@OrthogonalizeTallSkinnyQr; if ( !strcmp(arg, "GR") ) return (isvd_fun_t) isvd_@x@OrthogonalizeGramian; fprintf(stderr, "Unknown orthogonalization abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline isvd_fun_t isvd_arg2@x@AlgI( const char *arg ) { @@ -39,7 +39,7 @@ static inline isvd_fun_t isvd_arg2@x@AlgI( const char *arg ) { if ( !strcmp(arg, "WY") ) return (isvd_fun_t) isvd_@x@IntegrateWenYin; if ( !strcmp(arg, "HR") ) return (isvd_fun_t) isvd_@x@IntegrateHierarchicalReduction; fprintf(stderr, "Unknown integration abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline isvd_fun_t isvd_arg2@x@AlgP( const char *arg ) { @@ -50,21 +50,21 @@ static inline isvd_fun_t isvd_arg2@x@AlgP( const char *arg ) { if ( !strcmp(arg, "GR_gpu") ) return (isvd_fun_t) isvd_@x@PostprocessGramian_gpu; if ( !strcmp(arg, "SY_gpu") ) return (isvd_fun_t) isvd_@x@PostprocessSymmetric_gpu; fprintf(stderr, "Unknown postprocess abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline const char* isvd_arg2@x@AlgNameS( const char *arg ) { if ( !strcmp(arg, "GP") ) return "@XName@ Precision Gaussian Projection Sketching"; if ( !strcmp(arg, "GP_gpu") ) return "@XName@ Precision GPU Gaussian Projection Sketching"; fprintf(stderr, "Unknown sketching abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline const char* isvd_arg2@x@AlgNameO( const char *arg ) { if ( !strcmp(arg, "TS") ) return "@XName@ Precision Tall-Skinny QR Orthogonalization"; if ( !strcmp(arg, "GR") ) return "@XName@ Precision Gramian Orthogonalization"; fprintf(stderr, "Unknown orthogonalization abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline const char* isvd_arg2@x@AlgNameI( const char *arg ) { @@ -72,7 +72,7 @@ static inline const char* isvd_arg2@x@AlgNameI( const char *arg ) { if ( !strcmp(arg, "WY") ) return "@XName@ Precision Wen-Yin Integration"; if ( !strcmp(arg, "HR") ) return "@XName@ Precision Hierarchical Reduction Integration"; fprintf(stderr, "Unknown integration abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } static inline const char* isvd_arg2@x@AlgNameP( const char *arg ) { @@ -83,7 +83,7 @@ static inline const char* isvd_arg2@x@AlgNameP( const char *arg ) { if ( !strcmp(arg, "GR_gpu") ) return "@XName@ Precision GPU Gramian Postprocessing"; if ( !strcmp(arg, "SY_gpu") ) return "@XName@ Precision GPU Symmetric Postprocessing"; fprintf(stderr, "Unknown postprocess abbreviation \"%s\"!\n", arg); - return nullptr; + return NULL; } #if defined(__cplusplus) diff --git a/src/libisvd/core/driver/@x@_isvd.c b/src/lib/libisvd/core/driver/@x@_isvd.c similarity index 65% rename from src/libisvd/core/driver/@x@_isvd.c rename to src/lib/libisvd/core/driver/@x@_isvd.c index 489da9d..e1174f3 100644 --- a/src/libisvd/core/driver/@x@_isvd.c +++ b/src/lib/libisvd/core/driver/@x@_isvd.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/driver/@x@_isvd.c +/// \file lib/libisvd/core/driver/@x@_isvd.c /// \brief The iSVD driver (@xname@ precision). /// /// \author Mu Yang <> @@ -54,9 +54,9 @@ /// \param[in] ordera The storage ordering of 𝑨.
/// `'C'`: column-major ordering.
/// `'R'`: row-major ordering. -/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n_j\f$) and its leading dimension.
-/// \b dista = `'C'`: the size must be \f$m \times n_j\f$.
-/// \b dista = `'R'`: the size must be \f$m_j \times n\f$. +/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n^{(j)}\f$) and its leading dimension.
+/// \b dista = `'C'`: the size must be \f$m \times n^{(j)}\f$.
+/// \b dista = `'R'`: the size must be \f$m^{(j)} \times n\f$. /// \param[in] s The vector 𝝈 (\f$k \times 1\f$). /// \param[in] ut, ldut The matrix 𝑼 (row-major) and its leading dimension.
/// \b ut_root ≥ 0: the size must be \f$Pm_b \times k\f$, and \b ldut must be \f$l\f$.
@@ -94,36 +94,41 @@ /// \param[out] ut Replaced by the left singular vectors 𝑼 (row-major). /// \param[out] vt Replaced by the right singular vectors 𝑽 (row-major). /// +/// \note If \b argc ≠ `NULL` and \b argc[i] < 0, then a default argument query is assumed on the i-th stage; +/// the routine only returns the first \b retc[i] default arguments in \b retv[i]. +/// +/// \see isvd_Param +/// void isvd_@x@Isvd( - const char *alg_s, - const char *alg_o, - const char *alg_i, - const char *alg_p, - const isvd_int_t m, - const isvd_int_t n, - const isvd_int_t k, - const isvd_int_t p, - const isvd_int_t N, - const @xtype@ *argv[4], - const isvd_int_t argc[4], - @xtype@ *retv[4], - const isvd_int_t retc[4], - double time[4], - FILE *stream, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const isvd_int_t seed, - const mpi_int_t ut_root, - const mpi_int_t vt_root, - const mpi_int_t mpi_root, - const isvd_MpiComm mpi_comm + const char *alg_s, + const char *alg_o, + const char *alg_i, + const char *alg_p, + const isvd_int_t m, + const isvd_int_t n, + const isvd_int_t k, + const isvd_int_t p, + const isvd_int_t N, + const @xtype_____@ *argv[4], + const isvd_int_t argc[4], + @xtype_____@ *retv[4], + const isvd_int_t retc[4], + double time[4], + FILE *stream, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const isvd_int_t seed, + const mpi_int_t ut_root, + const mpi_int_t vt_root, + const mpi_int_t mpi_root, + const isvd_MpiComm mpi_comm ) { const mpi_int_t mpi_rank = isvd_getMpiRank(MPI_COMM_WORLD); @@ -136,10 +141,10 @@ void isvd_@x@Isvd( const char *opts_i[] = {"KN", "WY", "HR"}; const char *opts_p[] = {"TS", "GR", "SY", "TS_gpu", "GR_gpu", "SY_gpu"}; - const char *alg_s_ = isvd_arg2str("ALG_S", alg_s, opts_s, nullptr, lenof(opts_s)); - const char *alg_o_ = isvd_arg2str("ALG_O", alg_o, opts_o, nullptr, lenof(opts_o)); - const char *alg_i_ = isvd_arg2str("ALG_I", alg_i, opts_i, nullptr, lenof(opts_i)); - const char *alg_p_ = isvd_arg2str("ALG_P", alg_p, opts_p, nullptr, lenof(opts_p)); + const char *alg_s_ = isvd_arg2str("ALG_S", alg_s, opts_s, NULL, lenof(opts_s)); + const char *alg_o_ = isvd_arg2str("ALG_O", alg_o, opts_o, NULL, lenof(opts_o)); + const char *alg_i_ = isvd_arg2str("ALG_I", alg_i, opts_i, NULL, lenof(opts_i)); + const char *alg_p_ = isvd_arg2str("ALG_P", alg_p, opts_p, NULL, lenof(opts_p)); if ( !alg_s_ || !alg_o_ || !alg_i_ || !alg_p_ ) return; isvd_fun_t fun_s = isvd_arg2@x@AlgS(alg_s_); @@ -147,14 +152,6 @@ void isvd_@x@Isvd( isvd_fun_t fun_i = isvd_arg2@x@AlgI(alg_i_); isvd_fun_t fun_p = isvd_arg2@x@AlgP(alg_p_); - if ( stream != nullptr && mpi_rank == mpi_root ) { - fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameS(alg_s_)); - fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameO(alg_o_)); - fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameI(alg_i_)); - fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameP(alg_p_)); - fprintf(stream, "\n"); - } - // ====================================================================================================================== // // Create parameters @@ -167,65 +164,112 @@ void isvd_@x@Isvd( // ====================================================================================================================== // // Gets arguments and return values of stages - const @xtype@ *argv_s = (argv != nullptr) ? argv[0] : nullptr; - const @xtype@ *argv_o = (argv != nullptr) ? argv[1] : nullptr; - const @xtype@ *argv_i = (argv != nullptr) ? argv[2] : nullptr; - const @xtype@ *argv_p = (argv != nullptr) ? argv[3] : nullptr; + const @xtype_____@ *argv_s = (argv != NULL) ? argv[0] : NULL; + const @xtype_____@ *argv_o = (argv != NULL) ? argv[1] : NULL; + const @xtype_____@ *argv_i = (argv != NULL) ? argv[2] : NULL; + const @xtype_____@ *argv_p = (argv != NULL) ? argv[3] : NULL; + + const isvd_int_t argc_s = (argv != NULL) ? argc[0] : 0; + const isvd_int_t argc_o = (argv != NULL) ? argc[1] : 0; + const isvd_int_t argc_i = (argv != NULL) ? argc[2] : 0; + const isvd_int_t argc_p = (argv != NULL) ? argc[3] : 0; - const isvd_int_t argc_s = (argv != nullptr) ? argc[0] : 0; - const isvd_int_t argc_o = (argv != nullptr) ? argc[1] : 0; - const isvd_int_t argc_i = (argv != nullptr) ? argc[2] : 0; - const isvd_int_t argc_p = (argv != nullptr) ? argc[3] : 0; + @xtype_____@ *retv_s = (retv != NULL) ? retv[0] : NULL; + @xtype_____@ *retv_o = (retv != NULL) ? retv[1] : NULL; + @xtype_____@ *retv_i = (retv != NULL) ? retv[2] : NULL; + @xtype_____@ *retv_p = (retv != NULL) ? retv[3] : NULL; - @xtype@ *retv_s = (retv != nullptr) ? retv[0] : nullptr; - @xtype@ *retv_o = (retv != nullptr) ? retv[1] : nullptr; - @xtype@ *retv_i = (retv != nullptr) ? retv[2] : nullptr; - @xtype@ *retv_p = (retv != nullptr) ? retv[3] : nullptr; + const isvd_int_t retc_s = (retv != NULL) ? retc[0] : 0; + const isvd_int_t retc_o = (retv != NULL) ? retc[1] : 0; + const isvd_int_t retc_i = (retv != NULL) ? retc[2] : 0; + const isvd_int_t retc_p = (retv != NULL) ? retc[3] : 0; + + // ====================================================================================================================== // + // Check stage arguments + + if ( argc_s > 0 ) { isvd_assert_ne(argv_s, nullptr); } + if ( argc_o > 0 ) { isvd_assert_ne(argv_o, nullptr); } + if ( argc_i > 0 ) { isvd_assert_ne(argv_i, nullptr); } + if ( argc_p > 0 ) { isvd_assert_ne(argv_p, nullptr); } + if ( retc_s > 0 ) { isvd_assert_ne(retv_s, nullptr); } + if ( retc_o > 0 ) { isvd_assert_ne(retv_o, nullptr); } + if ( retc_i > 0 ) { isvd_assert_ne(retv_i, nullptr); } + if ( retc_p > 0 ) { isvd_assert_ne(retv_p, nullptr); } + + // ====================================================================================================================== // + // Query stage arguments - const isvd_int_t retc_s = (retv != nullptr) ? retc[0] : 0; - const isvd_int_t retc_o = (retv != nullptr) ? retc[1] : 0; - const isvd_int_t retc_i = (retv != nullptr) ? retc[2] : 0; - const isvd_int_t retc_p = (retv != nullptr) ? retc[3] : 0; + bool query = false; + if ( argc_s < 0 ) { + fun_s(param, argv_s, argc_s, retv_s, retc_s, dista, ordera, NULL, 1, NULL, 1, seed, mpi_root); + query = true; + } + if ( argc_o < 0 ) { + fun_o(param, argv_o, argc_o, retv_o, retc_o, NULL, 1); + query = true; + } + if ( argc_i < 0 ) { + fun_i(param, argv_i, argc_i, retv_i, retc_i, NULL, 1, NULL, 1); + query = true; + } + if ( argc_p < 0 ) { + fun_p(param, argv_p, argc_p, retv_p, retc_p, dista, ordera, NULL, 1, NULL, 1, NULL, NULL, 1, NULL, 1, ut_root, vt_root); + query = true; + } + if ( query ) { + return; + } + + // ====================================================================================================================== // + // Print arguments + + if ( stream != NULL && mpi_rank == mpi_root ) { + fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameS(alg_s_)); + fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameO(alg_o_)); + fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameI(alg_i_)); + fprintf(stream, "Using %s\n", isvd_arg2@x@AlgNameP(alg_p_)); + fprintf(stream, "\n"); + } // ====================================================================================================================== // // Allocate memory - @xtype@ *yst = isvd_@x@malloc(Nl * mb); + @xtype_____@ *yst = isvd_@x@malloc(Nl * mb); isvd_int_t ldyst = Nl; - @xtype@ *qt = isvd_@x@malloc(l * mb); + @xtype_____@ *qt = isvd_@x@malloc(l * mb); isvd_int_t ldqt = l; // ====================================================================================================================== // // Run - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "Sketching ...................... "); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "Sketching ...................... "); fflush(stream); } double time_s = MPI_Wtime(); fun_s(param, argv_s, argc_s, retv_s, retc_s, dista, ordera, a, lda, yst, ldyst, seed, mpi_root); time_s = MPI_Wtime() - time_s; - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "Orthogonalizing ................ "); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "Orthogonalizing ................ "); fflush(stream); } double time_i = MPI_Wtime(); fun_o(param, argv_o, argc_o, retv_o, retc_o, yst, ldyst); time_i = MPI_Wtime() - time_i; - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "Integrating .................... "); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "Integrating .................... "); fflush(stream); } double time_o = MPI_Wtime(); fun_i(param, argv_i, argc_i, retv_i, retc_i, yst, ldyst, qt, ldqt); time_o = MPI_Wtime() - time_o; - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "Postprocessing ................. "); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "Postprocessing ................. "); fflush(stream); } double time_p = MPI_Wtime(); fun_p(param, argv_p, argc_p, retv_p, retc_p, dista, ordera, a, lda, qt, ldqt, s, ut, ldut, vt, ldvt, ut_root, vt_root); time_p = MPI_Wtime() - time_p; - if ( stream != nullptr && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } + if ( stream != NULL && mpi_rank == mpi_root ) { fprintf(stream, "done\n"); fflush(stream); } // ====================================================================================================================== // // Gets executing times - if ( time != nullptr ) { + if ( time != NULL ) { time[0] = time_s; time[1] = time_i; time[2] = time_o; diff --git a/src/libisvd/core/env.c b/src/lib/libisvd/core/env.c similarity index 86% rename from src/libisvd/core/env.c rename to src/lib/libisvd/core/env.c index fe1f5d4..40101f2 100644 --- a/src/libisvd/core/env.c +++ b/src/lib/libisvd/core/env.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/env.c +/// \file lib/libisvd/core/env.c /// \brief The iSVD environment routines (CPU only). /// /// \author Mu Yang <> @@ -39,12 +39,9 @@ void isvd_finalize_cpu( void ) { /// \note This routines displays the MPI environment. /// void isvd_printEnvironment_cpu( const isvd_MpiComm mpi_comm ) { - mpi_int_t mpi_size = isvd_getMpiSize(mpi_comm), omp_size; - ISVD_OMP_PARALLEL - { - omp_size = isvd_getOmpSize(); - } + mpi_int_t mpi_size = isvd_getMpiSize(mpi_comm); + omp_int_t omp_size = isvd_getOmpMaxSize(); printf("iSVD %s, %lu-bit isvd_int_t, %lu-bit pointer\n", ISVD_VERSION, sizeof(isvd_int_t) * 8, sizeof(void*) * 8); - printf("%d MPI nodes, %d OpenMP threads per node\n\n", mpi_size, omp_size); + printf("%d MPI processors, %d OpenMP threads per process\n\n", mpi_size, omp_size); } diff --git a/src/libisvd/core/param.c b/src/lib/libisvd/core/param.c similarity index 98% rename from src/libisvd/core/param.c rename to src/lib/libisvd/core/param.c index e58f70f..503e274 100644 --- a/src/libisvd/core/param.c +++ b/src/lib/libisvd/core/param.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/param.c +/// \file lib/libisvd/core/param.c /// \brief The parameter structure. /// /// \author Mu Yang <> diff --git a/src/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c b/src/lib/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c similarity index 79% rename from src/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c rename to src/lib/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c index ca78300..9488538 100644 --- a/src/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c +++ b/src/lib/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c -/// \brief The Hierarchical Reduction Integration (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_integrate_hierarchical_reduction.c +/// \brief The Hierarchical Reduction Integration (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -10,10 +10,11 @@ #include #include #include +#include //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Hierarchical Reduction Integration (@xname@ precision) +/// \brief Hierarchical Reduction Integration (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length. (not using) @@ -30,24 +31,20 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@IntegrateHierarchicalReduction( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - @xtype@ *yst, - const isvd_int_t ldyst, - @xtype@ *qt, - const isvd_int_t ldqt + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + @xtype_____@ *yst, + const isvd_int_t ldyst, + @xtype_____@ *qt, + const isvd_int_t ldqt ) { - if ( argc < 0 ) { - return; - } - - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; // ====================================================================================================================== // // Get parameters @@ -65,21 +62,21 @@ void isvd_@x@IntegrateHierarchicalReduction( // ====================================================================================================================== // // Allocate memory - @xtype@ *qst = yst; + @xtype_____@ *qst = yst; isvd_int_t ldqst = ldyst; // matrix B - @xtype@ *bs = isvd_@x@malloc(l * l * (N+1)/2); + @xtype_____@ *bs = isvd_@x@malloc(l * l * (N+1)/2); isvd_int_t ldbs = l; // matrix T - @xtype@ *tt = isvd_@x@malloc(l * l); + @xtype_____@ *tt = isvd_@x@malloc(l * l); isvd_int_t ldtt = l; // vector s - @xtype@ *s = isvd_@x@malloc(l); + @xtype_____@ *s = isvd_@x@malloc(l); - @xtype@ *tmpt = qt; + @xtype_____@ *tmpt = qt; isvd_int_t ldtmpt = ldqt; // ====================================================================================================================== // @@ -91,24 +88,24 @@ void isvd_@x@IntegrateHierarchicalReduction( for ( isvd_int_t i = 0; i < h; ++i ) { isvd_@x@Gemm('N', 'T', l, l, mj, 1.0, qst + i*l, ldqst, qst + (i+h)*l, ldqst, 0.0, bs + i*ldbs*l, ldbs); } - MPI_Allreduce(MPI_IN_PLACE, bs, ldbs*l*h, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, bs, ldbs*l*h, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); for ( isvd_int_t i = 0; i < h; ++i ) { // matrix W - @xtype@ *w = bs + i*ldbs*l; + @xtype_____@ *w = bs + i*ldbs*l; isvd_int_t ldw = ldbs; // matrix Q(i) - @xtype@ *qit = qst + i*l; + @xtype_____@ *qit = qst + i*l; isvd_int_t ldqit = ldqst; // matrix Q(i+h) - @xtype@ *qiht = qst + (i+h)*l; + @xtype_____@ *qiht = qst + (i+h)*l; isvd_int_t ldqiht = ldqst; // svd(B(i)) = W * S * T' - isvd_@x@Gesvd('O', 'S', l, l, w, ldw, s, nullptr, 1, tt, ldtt); + isvd_@x@Gesvd('O', 'S', l, l, w, ldw, s, NULL, 1, tt, ldtt); // Q(i) := Q(i) * W + Q(i+h) * T (Q(i)' := W' * Q(i)' + T' * Q(i+h)') isvd_@x@Omatcopy('N', l, mj, 1.0, qit, ldqit, tmpt, ldtmpt); diff --git a/src/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c b/src/lib/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c similarity index 83% rename from src/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c rename to src/lib/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c index 94e665d..c5a1a0a 100644 --- a/src/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c +++ b/src/lib/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c -/// \brief The Kolmogorov-Nagumo Integration (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_integrate_kolmogorov_nagumo.c +/// \brief The Kolmogorov-Nagumo Integration (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -10,13 +10,14 @@ #include #include #include +#include #define kMaxit 256 #define kTol 1e-4 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Kolmogorov-Nagumo Integration (@xname@ precision) +/// \brief Kolmogorov-Nagumo Integration (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length.
@@ -36,17 +37,20 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@IntegrateKolmogorovNagumo( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const @xtype@ *yst, - const isvd_int_t ldyst, - @xtype@ *qt, - const isvd_int_t ldqt + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const @xtype_____@ *yst, + const isvd_int_t ldyst, + @xtype_____@ *qt, + const isvd_int_t ldqt ) { + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + // ====================================================================================================================== // // Query arguments @@ -62,7 +66,7 @@ void isvd_@x@IntegrateKolmogorovNagumo( isvd_int_t argi = -1; const isvd_int_t maxit = ( argc > ++argi ) ? argv[argi] : kMaxit; - const @xtype@ tol = ( argc > ++argi ) ? argv[argi] : kTol; + const @xtype_____@ tol = ( argc > ++argi ) ? argv[argi] : kTol; // ====================================================================================================================== // // Get parameters @@ -82,64 +86,64 @@ void isvd_@x@IntegrateKolmogorovNagumo( // ====================================================================================================================== // // Allocate memory - const @xtype@ *qst = yst; + const @xtype_____@ *qst = yst; isvd_int_t ldqst = ldyst; // matrix Qc' - @xtype@ *qct = isvd_@x@malloc(l * mj); + @xtype_____@ *qct = isvd_@x@malloc(l * mj); isvd_int_t ldqct = l; // matrix Q+' - @xtype@ *qpt = isvd_@x@malloc(l * mj); + @xtype_____@ *qpt = isvd_@x@malloc(l * mj); isvd_int_t ldqpt = l; // matrix Gc' - @xtype@ *gct = isvd_@x@malloc(l * mj); + @xtype_____@ *gct = isvd_@x@malloc(l * mj); isvd_int_t ldgct = l; // matrix Bc - @xtype@ *bc = isvd_@x@malloc(Nl * l); + @xtype_____@ *bc = isvd_@x@malloc(Nl * l); isvd_int_t ldbc = Nl; // matrix B+ - @xtype@ *bp = isvd_@x@malloc(Nl * l); + @xtype_____@ *bp = isvd_@x@malloc(Nl * l); isvd_int_t ldbp = Nl; // matrix Bgc - @xtype@ *bgc = isvd_@x@malloc(Nl * l); + @xtype_____@ *bgc = isvd_@x@malloc(Nl * l); isvd_int_t ldbgc = Nl; // matrix Dc - @xtype@ *dc = isvd_@x@malloc(l * l); + @xtype_____@ *dc = isvd_@x@malloc(l * l); isvd_int_t lddc = l; // matrix Z - @xtype@ *z = isvd_@x@malloc(l * l); + @xtype_____@ *z = isvd_@x@malloc(l * l); isvd_int_t ldz = l; // matrix C - @xtype@ *c = isvd_@x@malloc(l * l); + @xtype_____@ *c = isvd_@x@malloc(l * l); isvd_int_t ldc = l; // matrix inv(C) - @xtype@ *cinv = isvd_@x@malloc(l * l); + @xtype_____@ *cinv = isvd_@x@malloc(l * l); isvd_int_t ldcinv = l; // vector s - @xtype@ *s = isvd_@x@malloc(l * 2); + @xtype_____@ *s = isvd_@x@malloc(l * 2); // matrix Z * sqrt(S) - @xtype@ *zs = cinv; - isvd_int_t ldzs = ldcinv; + @xtype_____@ *zs = cinv; + isvd_int_t ldzs = ldcinv; // matrix Z / sqrt(S) - @xtype@ *zinvs = z; + @xtype_____@ *zinvs = z; isvd_int_t ldzinvs = ldz; // matrix sqrt(S) - @xtype@ *ss = s + l; + @xtype_____@ *ss = s + l; - @xtype@ *tmp; + @xtype_____@ *tmp; // ====================================================================================================================== // // Initializing @@ -149,13 +153,13 @@ void isvd_@x@IntegrateKolmogorovNagumo( // Bc := Qs' * Qc isvd_@x@Gemm('N', 'T', Nl, l, mj, 1.0, qst, ldqst, qct, ldqct, 0.0, bc, ldbc); - MPI_Allreduce(MPI_IN_PLACE, bc, ldbc*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, bc, ldbc*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // ====================================================================================================================== // // Iterating isvd_int_t iter; - @xtype@ error = -1.0/0.0; + @xtype_____@ error = -1.0/0.0; for ( iter = 1; ; ++iter ) { @@ -167,7 +171,7 @@ void isvd_@x@IntegrateKolmogorovNagumo( // Bgc := Qs' * Gc isvd_@x@Gemm('N', 'T', Nl, l, mj, 1.0, qst, ldqst, gct, ldgct, 0.0, bgc, ldbgc); - MPI_Allreduce(MPI_IN_PLACE, bgc, ldbgc*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, bgc, ldbgc*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // Dc := 1/N * Bc' * Bc isvd_@x@Gemm('T', 'N', l, l, Nl, 1.0/N, bc, ldbc, bc, ldbc, 0.0, dc, lddc); diff --git a/src/libisvd/core/stage/@x@_integrate_wen_yin.c b/src/lib/libisvd/core/stage/@x@_integrate_wen_yin.c similarity index 76% rename from src/libisvd/core/stage/@x@_integrate_wen_yin.c rename to src/lib/libisvd/core/stage/@x@_integrate_wen_yin.c index e2fae04..6af8cce 100644 --- a/src/libisvd/core/stage/@x@_integrate_wen_yin.c +++ b/src/lib/libisvd/core/stage/@x@_integrate_wen_yin.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_integrate_wen_yin.c -/// \brief The Wen-Yin Integration (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_integrate_wen_yin.c +/// \brief The Wen-Yin Integration (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -10,6 +10,7 @@ #include #include #include +#include #define kMaxit 256 #define kTol 1e-3 @@ -23,12 +24,19 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Wen-Yin Integration (@xname@ precision) +/// \brief Wen-Yin Integration (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length.
/// \b argv[0]: The maximum number of iteration.
-/// \b argv[1]: The tolerance of convergence condition. +/// \b argv[1]: The tolerance of convergence condition.
+/// \b argv[2]: The initial step size \f$ \tau_0 \f$.
+/// \b argv[3]: The maximum predicting step size \f$ \tau_\max \f$.
+/// \b argv[4]: The minimum predicting step size \f$ \tau_\min \f$.
+/// \b argv[5]: The maximum number of iteration in predicting step size.
+/// \b argv[6]: The scaling parameter for step size searching \f$ \beta \f$.
+/// \b argv[7]: The parameter for step size searching \f$ \sigma \f$.
+/// \b argv[8]: The parameter for next step searching \f$ \eta \f$. /// \param[in] retv, retc The return values and its length. ///
/// \param[in] yst, ldyst The row-block 𝕼 (\f$ m_b \times Nl \f$, row-major) and its leading dimension. @@ -43,17 +51,20 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@IntegrateWenYin( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const @xtype@ *yst, - const isvd_int_t ldyst, - @xtype@ *qt, - const isvd_int_t ldqt + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const @xtype_____@ *yst, + const isvd_int_t ldyst, + @xtype_____@ *qt, + const isvd_int_t ldqt ) { + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + // ====================================================================================================================== // // Query arguments @@ -75,15 +86,15 @@ void isvd_@x@IntegrateWenYin( // Get arguments isvd_int_t argi = -1; - const isvd_int_t maxit = ( argc > ++argi ) ? argv[argi] : kMaxit; - const @xtype@ tol = ( argc > ++argi ) ? argv[argi] : kTol; - const @xtype@ tau0 = ( argc > ++argi ) ? argv[argi] : kTau0; - const @xtype@ taumax = ( argc > ++argi ) ? argv[argi] : kTaumax; - const @xtype@ taumin = ( argc > ++argi ) ? argv[argi] : kTaumin; - const isvd_int_t taumaxit = ( argc > ++argi ) ? argv[argi] : kTauMaxit; - const @xtype@ beta = ( argc > ++argi ) ? argv[argi] : kBeta; - const @xtype@ sigma = ( argc > ++argi ) ? argv[argi] : kSigma; - const @xtype@ eta = ( argc > ++argi ) ? argv[argi] : kEta; + const isvd_int_t maxit = ( argc > ++argi ) ? argv[argi] : kMaxit; + const @xtype_____@ tol = ( argc > ++argi ) ? argv[argi] : kTol; + const @xtype_____@ tau0 = ( argc > ++argi ) ? argv[argi] : kTau0; + const @xtype_____@ taumax = ( argc > ++argi ) ? argv[argi] : kTaumax; + const @xtype_____@ taumin = ( argc > ++argi ) ? argv[argi] : kTaumin; + const isvd_int_t taumaxit = ( argc > ++argi ) ? argv[argi] : kTauMaxit; + const @xtype_____@ beta = ( argc > ++argi ) ? argv[argi] : kBeta; + const @xtype_____@ sigma = ( argc > ++argi ) ? argv[argi] : kSigma; + const @xtype_____@ eta = ( argc > ++argi ) ? argv[argi] : kEta; // ====================================================================================================================== // // Get parameters @@ -111,74 +122,74 @@ void isvd_@x@IntegrateWenYin( // ====================================================================================================================== // // Allocate memory - const @xtype@ *qst = yst; + const @xtype_____@ *qst = yst; isvd_int_t ldqst = ldyst; // matrix Qc' - @xtype@ *qct = isvd_@x@malloc(l * mj); + @xtype_____@ *qct = isvd_@x@malloc(l * mj); isvd_int_t ldqct = l; // matrix Q+' - @xtype@ *qpt = isvd_@x@malloc(l * mj); + @xtype_____@ *qpt = isvd_@x@malloc(l * mj); isvd_int_t ldqpt = l; // matrix Gc' - @xtype@ *gct = isvd_@x@malloc(l * mj); + @xtype_____@ *gct = isvd_@x@malloc(l * mj); isvd_int_t ldgct = l; // matrix Xc' - @xtype@ *xct = isvd_@x@malloc(l * mj); + @xtype_____@ *xct = isvd_@x@malloc(l * mj); isvd_int_t ldxct = l; // matrix X+' - @xtype@ *xpt = isvd_@x@malloc(l * mj); + @xtype_____@ *xpt = isvd_@x@malloc(l * mj); isvd_int_t ldxpt = l; // matrix Bc - @xtype@ *bc = isvd_@x@malloc(Nl * l); + @xtype_____@ *bc = isvd_@x@malloc(Nl * l); isvd_int_t ldbc = Nl; // matrix B+ - @xtype@ *bp = isvd_@x@malloc(Nl * l); + @xtype_____@ *bp = isvd_@x@malloc(Nl * l); isvd_int_t ldbp = Nl; // matrix Bgc - @xtype@ *bgc = isvd_@x@malloc(Nl * l); + @xtype_____@ *bgc = isvd_@x@malloc(Nl * l); isvd_int_t ldbgc = Nl; // matrix Dc - @xtype@ *dc = isvd_@x@malloc(l * l); + @xtype_____@ *dc = isvd_@x@malloc(l * l); isvd_int_t lddc = l; // matrix Dgc - @xtype@ *dgc = isvd_@x@malloc(l * l); + @xtype_____@ *dgc = isvd_@x@malloc(l * l); isvd_int_t lddgc = l; // matrix C & inv(C) - @xtype@ *c = isvd_@x@malloc(l2 * l2); + @xtype_____@ *c = isvd_@x@malloc(l2 * l2); isvd_int_t ldc = l2; // matrix inv(C)_?? - @xtype@ *c11 = c; - @xtype@ *c21 = c + l; - @xtype@ *c12 = c + ldc*l; - @xtype@ *c22 = c + l + ldc*l; + @xtype_____@ *c11 = c; + @xtype_____@ *c21 = c + l; + @xtype_____@ *c12 = c + ldc*l; + @xtype_____@ *c22 = c + l + ldc*l; // matrix inv(C)_#? - @xtype@ *cs1 = c11; - @xtype@ *cs2 = c12; + @xtype_____@ *cs1 = c11; + @xtype_____@ *cs2 = c12; // matrix Fc - @xtype@ *fc = c21; + @xtype_____@ *fc = c21; isvd_int_t ldfc = ldc; // matrix Fgc - @xtype@ *fgc = c11; + @xtype_____@ *fgc = c11; isvd_int_t ldfgc = ldc; - @xtype@ *tmp; + @xtype_____@ *tmp; - @xtype@ taug, zeta, phi, mu; + @xtype_____@ taug, zeta, phi, mu; // ====================================================================================================================== // // Initializing @@ -188,7 +199,7 @@ void isvd_@x@IntegrateWenYin( // Bc := Qs' * Qc isvd_@x@Gemm('N', 'T', Nl, l, mj, 1.0, qst, ldqst, qct, ldqct, 0.0, bc, ldbc); - MPI_Allreduce(MPI_IN_PLACE, bc, ldbc*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, bc, ldbc*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // Dc := 1/N * Bc' * Bc isvd_@x@Gemm('T', 'N', l, l, Nl, 1.0/N, bc, ldbc, bc, ldbc, 0.0, dc, lddc); @@ -198,7 +209,7 @@ void isvd_@x@IntegrateWenYin( // Bgc := Qs' * Gc isvd_@x@Gemm('N', 'T', Nl, l, mj, 1.0, qst, ldqst, gct, ldgct, 0.0, bgc, ldbgc); - MPI_Allreduce(MPI_IN_PLACE, bgc, ldbgc*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, bgc, ldbgc*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // Dgc := 1/N * Bc' * Bgc isvd_@x@Gemm('T', 'N', l, l, Nl, 1.0/N, bc, ldbc, bgc, ldbgc, 0.0, dgc, lddgc); @@ -224,7 +235,7 @@ void isvd_@x@IntegrateWenYin( // ================================================================================================================== // // Find step size - @xtype@ tau = taug, phit = phi; + @xtype_____@ tau = taug, phit = phi; for ( isvd_int_t tauiter = 1; tauiter <= taumaxit; ++tauiter, tau *= beta ) { // C := [ Dc/2 - I/tau , I/2 ; @@ -285,7 +296,7 @@ void isvd_@x@IntegrateWenYin( // Bg+ [in Bgc] := Qs' * G+ [in Gc] isvd_@x@Gemm('N', 'T', Nl, l, mj, 1.0, qst, ldqst, gct, ldgct, 0.0, bgc, ldbgc); - MPI_Allreduce(MPI_IN_PLACE, bgc, ldbgc*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, bgc, ldbgc*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // Dg+ [in Dgc] := 1/N * B+' * Bg+ [in Bgc] isvd_@x@Gemm('T', 'N', l, l, Nl, 1.0/N, bp, ldbp, bgc, ldbgc, 0.0, dgc, lddgc); @@ -317,7 +328,7 @@ void isvd_@x@IntegrateWenYin( isvd_v@x@Sub(mj*ldxct, xct, xpt, xct); // Update taug - @xtype@ t[2]; + @xtype_____@ t[2]; if ( iter % 2 ) { t[0] = isvd_@x@Dot(mj*ldqct, qct, 1, qct, 1); @@ -327,7 +338,7 @@ void isvd_@x@IntegrateWenYin( t[1] = isvd_@x@Dot(mj*ldxct, xct, 1, xct, 1); } - MPI_Allreduce(MPI_IN_PLACE, t, 2, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, t, 2, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); taug = fabs(t[0]/t[1]); if ( taug < taumin ) { taug = taumin; } diff --git a/src/libisvd/core/stage/@x@_orthogonalize_gramian.c b/src/lib/libisvd/core/stage/@x@_orthogonalize_gramian.c similarity index 78% rename from src/libisvd/core/stage/@x@_orthogonalize_gramian.c rename to src/lib/libisvd/core/stage/@x@_orthogonalize_gramian.c index 64ae42f..07d8591 100644 --- a/src/libisvd/core/stage/@x@_orthogonalize_gramian.c +++ b/src/lib/libisvd/core/stage/@x@_orthogonalize_gramian.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_orthogonalize_gramian.c -/// \brief The Gramian Orthogonalization (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_orthogonalize_gramian.c +/// \brief The Gramian Orthogonalization (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -10,10 +10,11 @@ #include #include #include +#include //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Gramian Orthogonalization (@xname@ precision) +/// \brief Gramian Orthogonalization (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length. (not using) @@ -27,22 +28,18 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@OrthogonalizeGramian( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - @xtype@ *yst, - const isvd_int_t ldyst + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + @xtype_____@ *yst, + const isvd_int_t ldyst ) { - if ( argc < 0 ) { - return; - } - - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; // ====================================================================================================================== // // Get parameters @@ -60,13 +57,13 @@ void isvd_@x@OrthogonalizeGramian( // ====================================================================================================================== // // Allocate memory - @xtype@ *yst_ = isvd_@x@malloc(ldyst * mj); + @xtype_____@ *yst_ = isvd_@x@malloc(ldyst * mj); isvd_int_t ldyst_ = ldyst; - @xtype@ *w = isvd_@x@malloc(l * Nl); + @xtype_____@ *w = isvd_@x@malloc(l * Nl); isvd_int_t ldw = l; - @xtype@ *s = isvd_@x@malloc(l * N); + @xtype_____@ *s = isvd_@x@malloc(l * N); isvd_int_t lds = l; // ====================================================================================================================== // @@ -76,11 +73,11 @@ void isvd_@x@OrthogonalizeGramian( for ( isvd_int_t i = 0; i < N; ++i ) { isvd_@x@Gemm('N', 'T', l, l, mj, 1.0, yst + i*l, ldyst, yst + i*l, ldyst, 0.0, w + i*ldw*l, ldw); } - MPI_Allreduce(MPI_IN_PLACE, w, ldw*Nl, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, w, ldw*Nl, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // eig(Wi) = Wi * Si^2 * Wi' for ( isvd_int_t i = 0; i < N; ++i ) { - isvd_@x@Gesvd('O', 'N', l, l, w + i*ldw*l, ldw, s + i*lds, nullptr, 1, nullptr, 1); + isvd_@x@Gesvd('O', 'N', l, l, w + i*ldw*l, ldw, s + i*lds, NULL, 1, NULL, 1); } isvd_v@x@Sqrt(lds*N, s, s); diff --git a/src/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c b/src/lib/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c similarity index 69% rename from src/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c rename to src/lib/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c index 6a065ea..6df2d46 100644 --- a/src/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c +++ b/src/lib/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c -/// \brief The Tall-Skinny QR Orthogonalization (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_orthogonalize_tall_skinny_qr.c +/// \brief The Tall-Skinny QR Orthogonalization (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -10,10 +10,11 @@ #include #include #include +#include //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Tall-Skinny QR Orthogonalization (@xname@ precision) +/// \brief Tall-Skinny QR Orthogonalization (@xname@ precision). /// /// \attention Not implemented! /// @@ -21,25 +22,22 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@OrthogonalizeTallSkinnyQr( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - @xtype@ *yst, - const isvd_int_t ldyst + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + @xtype_____@ *yst, + const isvd_int_t ldyst ) { fprintf(stderr, "Tall-Skinny QR Orthogonalization is not implemented!\n"); - if ( argc < 0 ) { - return; - } + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; ISVD_UNUSED(param); - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); ISVD_UNUSED(yst); ISVD_UNUSED(ldyst); diff --git a/src/libisvd/core/stage/@x@_postprocess.h b/src/lib/libisvd/core/stage/@x@_postprocess.h similarity index 80% rename from src/libisvd/core/stage/@x@_postprocess.h rename to src/lib/libisvd/core/stage/@x@_postprocess.h index a42cb74..ffc9229 100644 --- a/src/libisvd/core/stage/@x@_postprocess.h +++ b/src/lib/libisvd/core/stage/@x@_postprocess.h @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_postprocess.h -/// \brief The Postprocessing utilities (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_postprocess.h +/// \brief The Postprocessing utilities (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -12,21 +12,22 @@ #include #include #include +#include #if !defined(DOXYGEN_SHOULD_SKIP_THIS) static void projectBlockCol( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *zt, - const isvd_int_t ldzt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - const mpi_int_t ut_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *zt, + const isvd_int_t ldzt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + const mpi_int_t ut_root ) { ISVD_UNUSED(s); @@ -61,7 +62,7 @@ static void projectBlockCol( // ====================================================================================================================== // // Allocate memory - @xtype@ *qt_; + @xtype_____@ *qt_; if ( use_ut ) { qt_ = ut; } else { @@ -72,7 +73,7 @@ static void projectBlockCol( // ====================================================================================================================== // // Rearrange - MPI_Allgather(qt, mb*ldqt, MPI_@X_TYPE@, qt_, mb*ldqt, MPI_@X_TYPE@, param.mpi_comm); + MPI_Allgather(qt, mb*ldqt, MPI_@XTYPE@, qt_, mb*ldqt, MPI_@XTYPE@, param.mpi_comm); // ====================================================================================================================== // // Project @@ -91,18 +92,18 @@ static void projectBlockCol( } static void projectBlockRow( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *zt, - const isvd_int_t ldzt, - @xtype@ *s, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t vt_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *zt, + const isvd_int_t ldzt, + @xtype_____@ *s, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t vt_root ) { ISVD_UNUSED(s); @@ -137,7 +138,7 @@ static void projectBlockRow( // ====================================================================================================================== // // Allocate memory - @xtype@ *zt_; + @xtype_____@ *zt_; if ( use_vt ) { zt_ = vt; } else { @@ -155,7 +156,7 @@ static void projectBlockRow( // ====================================================================================================================== // // Rearrange - MPI_Reduce_scatter_block(zt_, zt, nb*ldzt, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Reduce_scatter_block(zt_, zt, nb*ldzt, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // ====================================================================================================================== // // Deallocate memory diff --git a/src/libisvd/core/stage/@x@_postprocess_gramian.c b/src/lib/libisvd/core/stage/@x@_postprocess_gramian.c similarity index 77% rename from src/libisvd/core/stage/@x@_postprocess_gramian.c rename to src/lib/libisvd/core/stage/@x@_postprocess_gramian.c index 70c0528..f3ebb3d 100644 --- a/src/libisvd/core/stage/@x@_postprocess_gramian.c +++ b/src/lib/libisvd/core/stage/@x@_postprocess_gramian.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_postprocess_gramian.c -/// \brief The Gramian Postprocessing (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_postprocess_gramian.c +/// \brief The Gramian Postprocessing (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -15,7 +15,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Gramian Postprocessing (@xname@ precision) +/// \brief Gramian Postprocessing (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length. (not using) @@ -27,9 +27,9 @@ /// \param[in] ordera The storage ordering of 𝑨.
/// `'C'`: column-major ordering.
/// `'R'`: row-major ordering. -/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n_j\f$) and its leading dimension.
-/// \b dista = `'C'`: the size must be \f$m \times n_j\f$.
-/// \b dista = `'R'`: the size must be \f$m_j \times n\f$. +/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n^{(j)}\f$) and its leading dimension.
+/// \b dista = `'C'`: the size must be \f$m \times n^{(j)}\f$.
+/// \b dista = `'R'`: the size must be \f$m^{(j)} \times n\f$. /// \param[in] qt, ldqt The row-block 𝑸 (\f$ m_b \times l \f$, row-major) and its leading dimension. /// \param[in] s The vector 𝝈 (\f$k \times 1\f$). /// \param[in] ut, ldut The matrix 𝑼 (row-major) and its leading dimension.
@@ -57,33 +57,29 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@PostprocessGramian( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t ut_root, - const mpi_int_t vt_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t ut_root, + const mpi_int_t vt_root ) { - if ( argc < 0 ) { - return; - } - - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; // ====================================================================================================================== // // Get parameters @@ -98,8 +94,8 @@ void isvd_@x@PostprocessGramian( // ====================================================================================================================== // // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); if ( !dista_ || !ordera_ ) return; if ( ut_root >= 0 ) { @@ -117,10 +113,10 @@ void isvd_@x@PostprocessGramian( // ====================================================================================================================== // // Allocate memory - @xtype@ *zt = isvd_@x@malloc(l * nb); + @xtype_____@ *zt = isvd_@x@malloc(l * nb); isvd_int_t ldzt = l; - @xtype@ *w = isvd_@x@malloc(l * l); + @xtype_____@ *w = isvd_@x@malloc(l * l); isvd_int_t ldw = l; // ====================================================================================================================== // @@ -137,11 +133,11 @@ void isvd_@x@PostprocessGramian( // W := Z' * Z isvd_@x@Gemm('N', 'T', l, l, nj, 1.0, zt, ldzt, zt, ldzt, 0.0, w, ldw); - MPI_Allreduce(MPI_IN_PLACE, w, ldw*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, w, ldw*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // eig(W) = W * S^2 * W' const char jobw_ = (ut_root >= -1 || vt_root >= -1) ? 'O' : 'N'; - isvd_@x@Gesvd(jobw_, 'N', l, l, w, ldw, s, nullptr, 1, nullptr, 1); + isvd_@x@Gesvd(jobw_, 'N', l, l, w, ldw, s, NULL, 1, NULL, 1); isvd_v@x@Sqrt(l, s, s); // ====================================================================================================================== // @@ -153,9 +149,9 @@ void isvd_@x@PostprocessGramian( if ( ut_root >= 0 ) { if ( param.mpi_rank == ut_root ) { - MPI_Gather(MPI_IN_PLACE, mb*ldut, MPI_@X_TYPE@, ut, mb*ldut, MPI_@X_TYPE@, ut_root, param.mpi_comm); + MPI_Gather(MPI_IN_PLACE, mb*ldut, MPI_@XTYPE@, ut, mb*ldut, MPI_@XTYPE@, ut_root, param.mpi_comm); } else { - MPI_Gather(ut, mb*ldut, MPI_@X_TYPE@, nullptr, mb*ldut, MPI_@X_TYPE@, ut_root, param.mpi_comm); + MPI_Gather(ut, mb*ldut, MPI_@XTYPE@, NULL, mb*ldut, MPI_@XTYPE@, ut_root, param.mpi_comm); } } } @@ -167,9 +163,9 @@ void isvd_@x@PostprocessGramian( if ( vt_root >= 0 ) { if ( param.mpi_rank == vt_root ) { - MPI_Gather(MPI_IN_PLACE, nb*ldvt, MPI_@X_TYPE@, vt, nb*ldvt, MPI_@X_TYPE@, vt_root, param.mpi_comm); + MPI_Gather(MPI_IN_PLACE, nb*ldvt, MPI_@XTYPE@, vt, nb*ldvt, MPI_@XTYPE@, vt_root, param.mpi_comm); } else { - MPI_Gather(vt, nb*ldvt, MPI_@X_TYPE@, nullptr, nb*ldvt, MPI_@X_TYPE@, vt_root, param.mpi_comm); + MPI_Gather(vt, nb*ldvt, MPI_@XTYPE@, NULL, nb*ldvt, MPI_@XTYPE@, vt_root, param.mpi_comm); } } } diff --git a/src/libisvd/core/stage/@x@_postprocess_symmetric.c b/src/lib/libisvd/core/stage/@x@_postprocess_symmetric.c similarity index 80% rename from src/libisvd/core/stage/@x@_postprocess_symmetric.c rename to src/lib/libisvd/core/stage/@x@_postprocess_symmetric.c index a862634..c34828c 100644 --- a/src/libisvd/core/stage/@x@_postprocess_symmetric.c +++ b/src/lib/libisvd/core/stage/@x@_postprocess_symmetric.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_postprocess_symmetric.c -/// \brief The Symmetric Postprocessing (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_postprocess_symmetric.c +/// \brief The Symmetric Postprocessing (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -15,7 +15,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Symmetric Postprocessing (@xname@ precision) +/// \brief Symmetric Postprocessing (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length. (not using) @@ -27,9 +27,9 @@ /// \param[in] ordera The storage ordering of 𝑨.
/// `'C'`: column-major ordering.
/// `'R'`: row-major ordering. -/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n_j\f$) and its leading dimension.
-/// \b dista = `'C'`: the size must be \f$m \times n_j\f$.
-/// \b dista = `'R'`: the size must be \f$m_j \times n\f$. +/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n^{(j)}\f$) and its leading dimension.
+/// \b dista = `'C'`: the size must be \f$m \times n^{(j)}\f$.
+/// \b dista = `'R'`: the size must be \f$m^{(j)} \times n\f$. /// \param[in] qt, ldqt The row-block 𝑸 (\f$ m_b \times l \f$, row-major) and its leading dimension. /// \param[in] s The vector 𝝈 (\f$k \times 1\f$). /// \param[in] ut, ldut The matrix 𝑼 (row-major) and its leading dimension.
@@ -57,33 +57,30 @@ /// \note The result of 𝑼 and 𝑽 are the same. /// void isvd_@x@PostprocessSymmetric( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t ut_root, - const mpi_int_t vt_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t ut_root, + const mpi_int_t vt_root ) { - if ( argc < 0 ) { - return; - } + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); ISVD_UNUSED(vt); ISVD_UNUSED(ldvt); @@ -100,8 +97,8 @@ void isvd_@x@PostprocessSymmetric( // ====================================================================================================================== // // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); if ( vt_root >= -1 ) { fprintf(stderr, "VT_ROOT must not be set!"); return; @@ -120,10 +117,10 @@ void isvd_@x@PostprocessSymmetric( // ====================================================================================================================== // // Allocate memory - @xtype@ *zt = isvd_@x@malloc(l * nb); + @xtype_____@ *zt = isvd_@x@malloc(l * nb); isvd_int_t ldzt = l; - @xtype@ *w = isvd_@x@malloc(l * l); + @xtype_____@ *w = isvd_@x@malloc(l * l); isvd_int_t ldw = l; // ====================================================================================================================== // @@ -140,7 +137,7 @@ void isvd_@x@PostprocessSymmetric( // W := Z' * Q isvd_@x@Gemmt('U', 'N', 'T', l, nj, 1.0, zt, ldzt, qt, ldqt, 0.0, w, ldw); - MPI_Allreduce(MPI_IN_PLACE, w, ldw*l, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Allreduce(MPI_IN_PLACE, w, ldw*l, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // eig(W) = W * S * W' const char jobw_ = (ut_root >= -1) ? 'V' : 'N'; @@ -155,9 +152,9 @@ void isvd_@x@PostprocessSymmetric( if ( ut_root >= 0 ) { if ( param.mpi_rank == ut_root ) { - MPI_Gather(MPI_IN_PLACE, mb*ldut, MPI_@X_TYPE@, ut, mb*ldut, MPI_@X_TYPE@, ut_root, param.mpi_comm); + MPI_Gather(MPI_IN_PLACE, mb*ldut, MPI_@XTYPE@, ut, mb*ldut, MPI_@XTYPE@, ut_root, param.mpi_comm); } else { - MPI_Gather(ut, mb*ldut, MPI_@X_TYPE@, nullptr, mb*ldut, MPI_@X_TYPE@, ut_root, param.mpi_comm); + MPI_Gather(ut, mb*ldut, MPI_@XTYPE@, NULL, mb*ldut, MPI_@XTYPE@, ut_root, param.mpi_comm); } } } diff --git a/src/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c b/src/lib/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c similarity index 62% rename from src/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c rename to src/lib/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c index da5bbe9..3276561 100644 --- a/src/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c +++ b/src/lib/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c -/// \brief The Tall-Skinny QR Postprocessing (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_postprocess_tall_skinny_qr.c +/// \brief The Tall-Skinny QR Postprocessing (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -15,7 +15,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Tall-Skinny QR Postprocessing (@xname@ precision) +/// \brief Tall-Skinny QR Postprocessing (@xname@ precision). /// /// \attention Not implemented! /// @@ -23,36 +23,33 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@PostprocessTallSkinnyQr( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t ut_root, - const mpi_int_t vt_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t ut_root, + const mpi_int_t vt_root ) { fprintf(stderr, "Tall-Skinny QR Postprocessing is not implemented!\n"); - if ( argc < 0 ) { - return; - } + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; ISVD_UNUSED(param); - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); ISVD_UNUSED(dista); ISVD_UNUSED(ordera); ISVD_UNUSED(a); diff --git a/src/libisvd/core/stage/@x@_sketch.h b/src/lib/libisvd/core/stage/@x@_sketch.h similarity index 69% rename from src/libisvd/core/stage/@x@_sketch.h rename to src/lib/libisvd/core/stage/@x@_sketch.h index d594b76..a8a61ba 100644 --- a/src/libisvd/core/stage/@x@_sketch.h +++ b/src/lib/libisvd/core/stage/@x@_sketch.h @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_sketch.h -/// \brief The Sketching utilities (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_sketch.h +/// \brief The Sketching utilities (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -13,18 +13,19 @@ #include #include #include +#include #include #if !defined(DOXYGEN_SHOULD_SKIP_THIS) static void sketchBlockCol( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *yst, - const isvd_int_t ldyst, - const isvd_int_t seed, - const mpi_int_t mpi_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *yst, + const isvd_int_t ldyst, + const isvd_int_t seed, + const mpi_int_t mpi_root ) { ISVD_UNUSED(ldyst); @@ -52,10 +53,10 @@ static void sketchBlockCol( // ====================================================================================================================== // // Allocate memory - @xtype@ *omegat = isvd_@x@malloc(Nl * nj); + @xtype_____@ *omegat = isvd_@x@malloc(Nl * nj); isvd_int_t ldomegat = Nl; - @xtype@ *yst_ = isvd_@x@malloc(Nl * Pmb); + @xtype_____@ *yst_ = isvd_@x@malloc(Nl * Pmb); isvd_int_t ldyst_ = Nl; // ====================================================================================================================== // @@ -63,26 +64,7 @@ static void sketchBlockCol( isvd_int_t seed_ = seed; MPI_Bcast(&seed_, sizeof(seed_), MPI_BYTE, mpi_root, param.mpi_comm); - - - ISVD_OMP_PARALLEL - { - omp_int_t omp_size = isvd_getOmpSize(); - omp_int_t omp_rank = isvd_getOmpRank(); - - isvd_int_t len = nj * Nl / omp_size; - isvd_int_t start = len * omp_rank; - if ( omp_rank == omp_size-1 ) { - len = nj * Nl - start; - } - - isvd_VSLStreamStatePtr stream; - isvd_vslNewStream(&stream, seed_); - isvd_vslSkipAheadStream(stream, (Nl * nb * param.mpi_rank + start) * 2); - isvd_v@x@RngGaussian(stream, len, omegat + start, 0.0, 1.0); - - isvd_vslDeleteStream(&stream); - } + isvd_v@x@RngGaussianDriver(seed_, Nl * nb * param.mpi_rank, nj * Nl, omegat, 0.0, 1.0); // ====================================================================================================================== // // Project @@ -94,7 +76,7 @@ static void sketchBlockCol( // ====================================================================================================================== // // Rearrange - MPI_Reduce_scatter_block(yst_, yst, mb*ldyst_, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Reduce_scatter_block(yst_, yst, mb*ldyst_, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // ====================================================================================================================== // // Deallocate memory @@ -105,14 +87,14 @@ static void sketchBlockCol( } static void sketchBlockRow( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *yst, - const isvd_int_t ldyst, - const isvd_int_t seed, - const mpi_int_t mpi_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *yst, + const isvd_int_t ldyst, + const isvd_int_t seed, + const mpi_int_t mpi_root ) { // ====================================================================================================================== // @@ -135,7 +117,7 @@ static void sketchBlockRow( // ====================================================================================================================== // // Allocate memory - @xtype@ *omegat = isvd_@x@malloc(Nl * n); + @xtype_____@ *omegat = isvd_@x@malloc(Nl * n); isvd_int_t ldomegat = Nl; // ====================================================================================================================== // @@ -143,25 +125,7 @@ static void sketchBlockRow( isvd_int_t seed_ = seed; MPI_Bcast(&seed_, sizeof(isvd_VSLStreamStatePtr), MPI_BYTE, mpi_root, param.mpi_comm); - - ISVD_OMP_PARALLEL - { - omp_int_t omp_size = isvd_getOmpSize(); - omp_int_t omp_rank = isvd_getOmpRank(); - - isvd_int_t len = n * Nl / omp_size; - isvd_int_t start = len * omp_rank; - if ( omp_rank == omp_size-1 ) { - len = n * Nl - start; - } - - isvd_VSLStreamStatePtr stream; - isvd_vslNewStream(&stream, seed_); - isvd_vslSkipAheadStream(stream, start * 2); - isvd_v@x@RngGaussian(stream, len, omegat + start, 0.0, 1.0); - - isvd_vslDeleteStream(&stream); - } + isvd_v@x@RngGaussianDriver(seed_, 0, n * Nl, omegat, 0.0, 1.0); // ====================================================================================================================== // // Project diff --git a/src/libisvd/core/stage/@x@_sketch_gaussian_projection.c b/src/lib/libisvd/core/stage/@x@_sketch_gaussian_projection.c similarity index 74% rename from src/libisvd/core/stage/@x@_sketch_gaussian_projection.c rename to src/lib/libisvd/core/stage/@x@_sketch_gaussian_projection.c index 44feacf..8e9ffc1 100644 --- a/src/libisvd/core/stage/@x@_sketch_gaussian_projection.c +++ b/src/lib/libisvd/core/stage/@x@_sketch_gaussian_projection.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/core/stage/@x@_sketch_gaussian_projection.c -/// \brief The Gaussian Projection Sketching (@xname@ precision) +/// \file lib/libisvd/core/stage/@x@_sketch_gaussian_projection.c +/// \brief The Gaussian Projection Sketching (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -14,7 +14,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_core_@x@_stage_module -/// \brief Gaussian Projection Sketching (@xname@ precision) +/// \brief Gaussian Projection Sketching (@xname@ precision). /// /// \param[in] param The \ref isvd_Param "parameters". /// \param[in] argv, argc The arguments and its length. (not using) @@ -26,9 +26,9 @@ /// \param[in] ordera The storage ordering of 𝑨.
/// `'C'`: column-major ordering.
/// `'R'`: row-major ordering. -/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n_j\f$) and its leading dimension.
-/// \b dista = `'C'`: the size must be \f$m \times n_j\f$.
-/// \b dista = `'R'`: the size must be \f$m_j \times n\f$. +/// \param[in] a, lda The column/row-block 𝑨 (\f$m \times n^{(j)}\f$) and its leading dimension.
+/// \b dista = `'C'`: the size must be \f$m \times n^{(j)}\f$.
+/// \b dista = `'R'`: the size must be \f$m^{(j)} \times n\f$. /// \param[in] yst, ldyst The row-block 𝖄 (\f$m_b \times Nl\f$, row-major) and its leading dimension.
/// \b dista = `'C'`: \b ldyst must be \f$Nl\f$.
/// \b dista = `'R'`: no condition. @@ -41,34 +41,30 @@ /// the routine only returns the first \b retc default arguments in \b retv. /// void isvd_@x@SketchGaussianProjection( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *yst, - const isvd_int_t ldyst, - const isvd_int_t seed, - const mpi_int_t mpi_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *yst, + const isvd_int_t ldyst, + const isvd_int_t seed, + const mpi_int_t mpi_root ) { - if ( argc < 0 ) { - return; - } - - ISVD_UNUSED(argv); - ISVD_UNUSED(retv); - ISVD_UNUSED(retc); + if ( argc > 0 ) { isvd_assert_ne(argv, nullptr); } + if ( retc > 0 ) { isvd_assert_ne(retv, nullptr); } + if ( argc < 0 ) return; // ====================================================================================================================== // // Check arguments - const char dista_ = isvd_arg2char("DISTA", dista, "CR", nullptr); - const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", nullptr); + const char dista_ = isvd_arg2char("DISTA", dista, "CR", NULL); + const char ordera_ = isvd_arg2char("ORDERA", ordera, "CR", NULL); if ( !dista_ || !ordera_ ) return; // ====================================================================================================================== // diff --git a/src/libisvd/def.h b/src/lib/libisvd/def.h similarity index 81% rename from src/libisvd/def.h rename to src/lib/libisvd/def.h index 1333429..cc33244 100644 --- a/src/libisvd/def.h +++ b/src/lib/libisvd/def.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/def.h +/// \file lib/libisvd/def.h /// \brief The definitions. /// /// \author Mu Yang <> @@ -10,55 +10,18 @@ #define LIBISVD_DEF_H_ #include - -#if defined(ISVD_USE_ILP64) && !defined(MKL_ILP64) - #define MKL_ILP64 -#else // ISVD_USE_ILP64 - #undef MKL_ILP64 -#endif // ISVD_USE_ILP64 - -#if defined(_OPENMP) - #if !defined(ISVD_USE_OMP) - #define ISVD_USE_OMP - #endif // ISVD_USE_OMP -#else // _OPENMP - #undef ISVD_USE_OMP -#endif // _OPENMP - -#if defined(ISVD_USE_OMP) - #include - #define ISVD_OMP_PARALLEL _Pragma("omp parallel") -#else - #define ISVD_OMP_PARALLEL -#endif // ISVD_USE_OMP - #include #if defined(ISVD_USE_GTEST) && defined(__cplusplus) #include #endif // ISVD_USE_GTEST - #define ISVD_UNUSED( x ) (void)(x) #if !defined(__cplusplus) || (__cplusplus < 201103L) - #define nullptr NULL + #define nullptr (void*)(NULL) #endif -#if !defined(DOXYGEN_SHOULD_SKIP_THIS) -#if defined(ISVD_USE_MKL) - #define MKL_INT isvd_int_t - #define MKL_UINT isvd_uint_t - #define MKL_Complex8 _Complex float - #define MKL_Complex16 _Complex double -#endif // ISVD_USE_MKL -#endif // DOXYGEN_SHOULD_SKIP_THIS - -#if defined(ISVD_USE_MKL) - #include -#endif // ISVD_USE_MKL - - #if defined(__cplusplus) extern "C" { #endif // __cplusplus diff --git a/src/libisvd/gpu.h b/src/lib/libisvd/gpu.h similarity index 83% rename from src/libisvd/gpu.h rename to src/lib/libisvd/gpu.h index a7e8367..7e137a4 100644 --- a/src/libisvd/gpu.h +++ b/src/lib/libisvd/gpu.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu.h +/// \file lib/libisvd/gpu.h /// \brief The GPU header. /// /// \author Mu Yang <> @@ -9,6 +9,4 @@ #ifndef LIBISVD_GPU_H_ #define LIBISVD_GPU_H_ -#include - #endif // LIBISVD_GPU_H_ diff --git a/src/libisvd/gpu/def.h b/src/lib/libisvd/gpu/def.h similarity index 94% rename from src/libisvd/gpu/def.h rename to src/lib/libisvd/gpu/def.h index 48b33cd..e6efe21 100644 --- a/src/libisvd/gpu/def.h +++ b/src/lib/libisvd/gpu/def.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/def.h +/// \file lib/libisvd/gpu/def.h /// \brief The GPU definitions. /// /// \author Mu Yang <> diff --git a/src/libisvd/gpu/env.c b/src/lib/libisvd/gpu/env.c similarity index 98% rename from src/libisvd/gpu/env.c rename to src/lib/libisvd/gpu/env.c index 71d8337..687b1b0 100644 --- a/src/libisvd/gpu/env.c +++ b/src/lib/libisvd/gpu/env.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/env.c +/// \file lib/libisvd/gpu/env.c /// \brief The iSVD environment routines (GPU only). /// /// \author Mu Yang <> diff --git a/src/libisvd/gpu/stage/@x@_postprocess_gpu.h b/src/lib/libisvd/gpu/stage/@x@_postprocess_gpu.h similarity index 82% rename from src/libisvd/gpu/stage/@x@_postprocess_gpu.h rename to src/lib/libisvd/gpu/stage/@x@_postprocess_gpu.h index ce11b3a..9120fd6 100644 --- a/src/libisvd/gpu/stage/@x@_postprocess_gpu.h +++ b/src/lib/libisvd/gpu/stage/@x@_postprocess_gpu.h @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/stage/@x@_postprocess_gpu.h -/// \brief The GPU Postprocessing utilities (@xname@ precision) +/// \file lib/libisvd/gpu/stage/@x@_postprocess_gpu.h +/// \brief The GPU Postprocessing utilities (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -21,21 +21,22 @@ #include #include #include +#include #if !defined(DOXYGEN_SHOULD_SKIP_THIS) static void projectBlockCol( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *zt, - const isvd_int_t ldzt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - const mpi_int_t ut_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *zt, + const isvd_int_t ldzt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + const mpi_int_t ut_root ) { ISVD_UNUSED(s); @@ -73,21 +74,21 @@ static void projectBlockCol( size_t free_byte, total_byte; cudaMemGetInfo(&free_byte, &total_byte); if ( isvd_gpu_memory_limit > 0 ) free_byte = minl(free_byte, isvd_gpu_memory_limit); - size_t melem = free_byte / sizeof(@xtype@); + size_t melem = free_byte / sizeof(@xtype_____@); size_t nelem_used = m * l; isvd_int_t n_gpu = (melem - nelem_used) / (m + l); if ( n_gpu > (isvd_int_t)isvd_kBlockSizeGpu ) n_gpu = (n_gpu / isvd_kBlockSizeGpu) * isvd_kBlockSizeGpu; n_gpu = min(n_gpu, nj); if ( n_gpu <= 0 ) { fprintf(stderr, "No enough GPU memory. (Request at least %" PRId64 " bytes. Only %" PRId64 " bytes free.", - nelem_used * sizeof(@xtype@), melem * sizeof(@xtype@)); + nelem_used * sizeof(@xtype_____@), melem * sizeof(@xtype_____@)); isvd_assert_fail(); } // ====================================================================================================================== // // Allocate memory - @xtype@ *qt_; + @xtype_____@ *qt_; if ( use_ut ) { qt_ = ut; } else { @@ -95,22 +96,22 @@ static void projectBlockCol( } isvd_int_t ldqt_ = l; - @xtype@ *a_gpu; + @xtype_____@ *a_gpu; magma_@x@malloc(&a_gpu, m * n_gpu); isvd_int_t lda_gpu = (ordera == 'C') ? m : n_gpu; - @xtype@ *qt_gpu; + @xtype_____@ *qt_gpu; magma_@x@malloc(&qt_gpu, l * m); isvd_int_t ldqt_gpu = l; - @xtype@ *zt_gpu; + @xtype_____@ *zt_gpu; magma_@x@malloc(&zt_gpu, l * n_gpu); isvd_int_t ldzt_gpu = l; // ====================================================================================================================== // // Rearrange - MPI_Allgather(qt, mb*ldqt, MPI_@X_TYPE@, qt_, mb*ldqt, MPI_@X_TYPE@, param.mpi_comm); + MPI_Allgather(qt, mb*ldqt, MPI_@XTYPE@, qt_, mb*ldqt, MPI_@XTYPE@, param.mpi_comm); // ====================================================================================================================== // // Send data @@ -124,8 +125,8 @@ static void projectBlockCol( isvd_int_t idx; for ( idx = 0; idx < nj; idx += n_gpu ) { - const @xtype@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); - @xtype@ *zt_tmp = zt + ldzt*idx; + const @xtype_____@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); + @xtype_____@ *zt_tmp = zt + ldzt*idx; const isvd_int_t n_tmp = min(n_gpu, nj-idx); // Send A @@ -157,18 +158,18 @@ static void projectBlockCol( } static void projectBlockRow( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *zt, - const isvd_int_t ldzt, - @xtype@ *s, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t vt_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *zt, + const isvd_int_t ldzt, + @xtype_____@ *s, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t vt_root ) { ISVD_UNUSED(s); @@ -205,21 +206,21 @@ static void projectBlockRow( size_t free_byte, total_byte; cudaMemGetInfo(&free_byte, &total_byte); - size_t melem = free_byte / sizeof(@xtype@); + size_t melem = free_byte / sizeof(@xtype_____@); size_t nelem_used = mj * l; isvd_int_t n_gpu = (melem - nelem_used) / (mj + l); if ( n_gpu > (isvd_int_t)isvd_kBlockSizeGpu ) n_gpu = (n_gpu / isvd_kBlockSizeGpu) * isvd_kBlockSizeGpu; n_gpu = min(n_gpu, n); if ( n_gpu <= 0 ) { fprintf(stderr, "No enough GPU memory. (Request at least %" PRId64 " bytes. Only %" PRId64 " bytes free.", - nelem_used * sizeof(@xtype@), melem * sizeof(@xtype@)); + nelem_used * sizeof(@xtype_____@), melem * sizeof(@xtype_____@)); isvd_assert_fail(); } // ====================================================================================================================== // // Allocate memory - @xtype@ *zt_; + @xtype_____@ *zt_; if ( use_vt ) { zt_ = vt; } else { @@ -227,15 +228,15 @@ static void projectBlockRow( } isvd_int_t ldzt_ = l; - @xtype@ *a_gpu; + @xtype_____@ *a_gpu; magma_@x@malloc(&a_gpu, mj * n_gpu); isvd_int_t lda_gpu = (ordera == 'C') ? mj : n_gpu; - @xtype@ *qt_gpu; + @xtype_____@ *qt_gpu; magma_@x@malloc(&qt_gpu, l * mj); isvd_int_t ldqt_gpu = l; - @xtype@ *zt_gpu; + @xtype_____@ *zt_gpu; magma_@x@malloc(&zt_gpu, l * n_gpu); isvd_int_t ldzt_gpu = l; @@ -251,8 +252,8 @@ static void projectBlockRow( isvd_int_t idx; for ( idx = 0; idx < n; idx += n_gpu ) { - const @xtype@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); - @xtype@ *zt_tmp = zt_ + ldzt*idx; + const @xtype_____@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); + @xtype_____@ *zt_tmp = zt_ + ldzt*idx; const isvd_int_t n_tmp = min(n_gpu, n-idx); // Send A @@ -273,7 +274,7 @@ static void projectBlockRow( // ====================================================================================================================== // // Rearrange - MPI_Reduce_scatter_block(zt_, zt, nb*ldzt, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Reduce_scatter_block(zt_, zt, nb*ldzt, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // ====================================================================================================================== // // Deallocate memory diff --git a/src/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c b/src/lib/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c similarity index 89% rename from src/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c rename to src/lib/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c index 1126374..a8ff4b3 100644 --- a/src/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c +++ b/src/lib/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c -/// \brief The GPU Gramian Postprocessing (@xname@ precision) +/// \file lib/libisvd/gpu/stage/@x@_postprocess_gramian_gpu.c +/// \brief The GPU Gramian Postprocessing (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License diff --git a/src/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c b/src/lib/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c similarity index 89% rename from src/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c rename to src/lib/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c index b2fe752..05fdd82 100644 --- a/src/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c +++ b/src/lib/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c -/// \brief The GPU Symmetric Postprocessing (@xname@ precision) +/// \file lib/libisvd/gpu/stage/@x@_postprocess_symmetric_gpu.c +/// \brief The GPU Symmetric Postprocessing (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License diff --git a/src/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c b/src/lib/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c similarity index 89% rename from src/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c rename to src/lib/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c index adbdbda..82f68d7 100644 --- a/src/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c +++ b/src/lib/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c -/// \brief The GPU Tall-Skinny QR Postprocessing (@xname@ precision) +/// \file lib/libisvd/gpu/stage/@x@_postprocess_tall_skinny_qr_gpu.c +/// \brief The GPU Tall-Skinny QR Postprocessing (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License diff --git a/src/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c b/src/lib/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c similarity index 89% rename from src/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c rename to src/lib/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c index 8f476da..81b51ac 100644 --- a/src/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c +++ b/src/lib/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c -/// \brief The GPU Gaussian Projection Sketching (@xname@ precision) +/// \file lib/libisvd/gpu/stage/@x@_sketch_gaussian_projection_gpu.c +/// \brief The GPU Gaussian Projection Sketching (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License diff --git a/src/libisvd/gpu/stage/@x@_sketch_gpu.h b/src/lib/libisvd/gpu/stage/@x@_sketch_gpu.h similarity index 74% rename from src/libisvd/gpu/stage/@x@_sketch_gpu.h rename to src/lib/libisvd/gpu/stage/@x@_sketch_gpu.h index 199ce27..10bc0a4 100644 --- a/src/libisvd/gpu/stage/@x@_sketch_gpu.h +++ b/src/lib/libisvd/gpu/stage/@x@_sketch_gpu.h @@ -1,6 +1,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/gpu/stage/@x@_sketch_gpu.h -/// \brief The GPU Sketching utilities (@xname@ precision) +/// \file lib/libisvd/gpu/stage/@x@_sketch_gpu.h +/// \brief The GPU Sketching utilities (@xname@ precision). /// /// \author Mu Yang <> /// \copyright MIT License @@ -21,18 +21,19 @@ #include #include #include +#include #include #if !defined(DOXYGEN_SHOULD_SKIP_THIS) static void sketchBlockCol( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *yst, - const isvd_int_t ldyst, - const isvd_int_t seed, - const mpi_int_t mpi_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *yst, + const isvd_int_t ldyst, + const isvd_int_t seed, + const mpi_int_t mpi_root ) { ISVD_UNUSED(ldyst); @@ -63,35 +64,35 @@ static void sketchBlockCol( size_t free_byte, total_byte; cudaMemGetInfo(&free_byte, &total_byte); if ( isvd_gpu_memory_limit > 0 ) free_byte = minl(free_byte, isvd_gpu_memory_limit); - size_t melem = free_byte / sizeof(@xtype@); + size_t melem = free_byte / sizeof(@xtype_____@); size_t nelem_used = m * Nl; isvd_int_t n_gpu = (melem - nelem_used) / (m + Nl); if ( n_gpu > (isvd_int_t)isvd_kBlockSizeGpu ) n_gpu = (n_gpu / isvd_kBlockSizeGpu) * isvd_kBlockSizeGpu; n_gpu = min(n_gpu, nj); if ( n_gpu <= 0 ) { fprintf(stderr, "No enough GPU memory. (Request at least %" PRId64 " bytes. Only %" PRId64 " bytes free.", - nelem_used * sizeof(@xtype@), melem * sizeof(@xtype@)); + nelem_used * sizeof(@xtype_____@), melem * sizeof(@xtype_____@)); isvd_assert_fail(); } // ====================================================================================================================== // // Allocate memory - @xtype@ *omegat = isvd_@x@malloc(Nl * nj); + @xtype_____@ *omegat = isvd_@x@malloc(Nl * nj); isvd_int_t ldomegat = Nl; - @xtype@ *yst_ = isvd_@x@malloc(Nl * Pmb); + @xtype_____@ *yst_ = isvd_@x@malloc(Nl * Pmb); isvd_int_t ldyst_ = Nl; - @xtype@ *a_gpu; + @xtype_____@ *a_gpu; magma_@x@malloc(&a_gpu, m * n_gpu); isvd_int_t lda_gpu = (ordera == 'C') ? m : n_gpu; - @xtype@ *omegat_gpu; + @xtype_____@ *omegat_gpu; magma_@x@malloc(&omegat_gpu, Nl * n_gpu); isvd_int_t ldomegat_gpu = Nl; - @xtype@ *yst_gpu; + @xtype_____@ *yst_gpu; magma_@x@malloc(&yst_gpu, Nl * m); isvd_int_t ldyst_gpu = Nl; @@ -100,36 +101,18 @@ static void sketchBlockCol( isvd_int_t seed_ = seed; MPI_Bcast(&seed_, sizeof(seed_), MPI_BYTE, mpi_root, param.mpi_comm); - - ISVD_OMP_PARALLEL - { - omp_int_t omp_size = isvd_getOmpSize(); - omp_int_t omp_rank = isvd_getOmpRank(); - - isvd_int_t len = nj * Nl / omp_size; - isvd_int_t start = len * omp_rank; - if ( omp_rank == omp_size-1 ) { - len = nj * Nl - start; - } - - isvd_VSLStreamStatePtr stream; - isvd_vslNewStream(&stream, seed_); - isvd_vslSkipAheadStream(stream, (Nl * nb * param.mpi_rank + start) * 2); - isvd_v@x@RngGaussian(stream, len, omegat + start, 0.0, 1.0); - - isvd_vslDeleteStream(&stream); - } + isvd_v@x@RngGaussianDriver(seed_, Nl * nb * param.mpi_rank, nj * Nl, omegat, 0.0, 1.0); // ====================================================================================================================== // // Project - cudaMemset(yst_gpu, 0, ldyst_gpu * m * sizeof(@xtype@)); + cudaMemset(yst_gpu, 0, ldyst_gpu * m * sizeof(@xtype_____@)); char transa_ = (ordera == 'C') ? 'T' : 'N'; isvd_int_t idx; for ( idx = 0; idx < nj; idx += n_gpu ) { - const @xtype@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); - const @xtype@ *omegat_tmp = omegat + ldomegat*idx; + const @xtype_____@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); + const @xtype_____@ *omegat_tmp = omegat + ldomegat*idx; const isvd_int_t n_tmp = min(n_gpu, nj-idx); // Send A and Omega @@ -153,12 +136,13 @@ static void sketchBlockCol( // ====================================================================================================================== // // Rearrange - MPI_Reduce_scatter_block(yst_, yst, mb*ldyst_, MPI_@X_TYPE@, MPI_SUM, param.mpi_comm); + MPI_Reduce_scatter_block(yst_, yst, mb*ldyst_, MPI_@XTYPE@, MPI_SUM, param.mpi_comm); // ====================================================================================================================== // // Deallocate memory isvd_free(omegat); + isvd_free(yst_); magma_free(a_gpu); magma_free(omegat_gpu); magma_free(yst_gpu); @@ -166,14 +150,14 @@ static void sketchBlockCol( } static void sketchBlockRow( - const isvd_Param param, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *yst, - const isvd_int_t ldyst, - const isvd_int_t seed, - const mpi_int_t mpi_root + const isvd_Param param, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *yst, + const isvd_int_t ldyst, + const isvd_int_t seed, + const mpi_int_t mpi_root ) { // ====================================================================================================================== // @@ -198,32 +182,32 @@ static void sketchBlockRow( size_t free_byte, total_byte; cudaMemGetInfo(&free_byte, &total_byte); - size_t melem = free_byte / sizeof(@xtype@); + size_t melem = free_byte / sizeof(@xtype_____@); size_t nelem_used = mj * Nl; isvd_int_t n_gpu = (melem - nelem_used) / (mj + Nl); if ( n_gpu > (isvd_int_t)isvd_kBlockSizeGpu ) n_gpu = (n_gpu / isvd_kBlockSizeGpu) * isvd_kBlockSizeGpu; n_gpu = min(n_gpu, n); if ( n_gpu <= 0 ) { fprintf(stderr, "No enough GPU memory. (Request at least %" PRId64 " bytes. Only %" PRId64 " bytes free.", - nelem_used * sizeof(@xtype@), melem * sizeof(@xtype@)); + nelem_used * sizeof(@xtype_____@), melem * sizeof(@xtype_____@)); isvd_assert_fail(); } // ====================================================================================================================== // // Allocate memory - @xtype@ *omegat = isvd_@x@malloc(n * Nl); + @xtype_____@ *omegat = isvd_@x@malloc(n * Nl); isvd_int_t ldomegat = Nl; - @xtype@ *a_gpu; + @xtype_____@ *a_gpu; magma_@x@malloc(&a_gpu, mj * n_gpu); isvd_int_t lda_gpu = (ordera == 'C') ? mj : n_gpu; - @xtype@ *omegat_gpu; + @xtype_____@ *omegat_gpu; magma_@x@malloc(&omegat_gpu, Nl * n_gpu); isvd_int_t ldomegat_gpu = Nl; - @xtype@ *yst_gpu; + @xtype_____@ *yst_gpu; magma_@x@malloc(&yst_gpu, Nl * mj); isvd_int_t ldyst_gpu = Nl; @@ -232,36 +216,18 @@ static void sketchBlockRow( isvd_int_t seed_ = seed; MPI_Bcast(&seed_, sizeof(isvd_VSLStreamStatePtr), MPI_BYTE, mpi_root, param.mpi_comm); - - ISVD_OMP_PARALLEL - { - omp_int_t omp_size = isvd_getOmpSize(); - omp_int_t omp_rank = isvd_getOmpRank(); - - isvd_int_t len = n * Nl / omp_size; - isvd_int_t start = len * omp_rank; - if ( omp_rank == omp_size-1 ) { - len = n * Nl - start; - } - - isvd_VSLStreamStatePtr stream; - isvd_vslNewStream(&stream, seed_); - isvd_vslSkipAheadStream(stream, start * 2); - isvd_v@x@RngGaussian(stream, len, omegat + start, 0.0, 1.0); - - isvd_vslDeleteStream(&stream); - } + isvd_v@x@RngGaussianDriver(seed_, 0, n * Nl, omegat, 0.0, 1.0); // ====================================================================================================================== // // Project - cudaMemset(yst_gpu, 0, ldyst_gpu * mj * sizeof(@xtype@)); + cudaMemset(yst_gpu, 0, ldyst_gpu * mj * sizeof(@xtype_____@)); char transa_ = (ordera == 'C') ? 'T' : 'N'; isvd_int_t idx; for ( idx = 0; idx < n; idx += n_gpu ) { - const @xtype@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); - const @xtype@ *omegat_tmp = omegat + ldomegat*idx; + const @xtype_____@ *a_tmp = (ordera == 'C') ? (a + lda*idx) : (a+idx); + const @xtype_____@ *omegat_tmp = omegat + ldomegat*idx; const isvd_int_t n_tmp = min(n_gpu, n-idx); // Send A and Omega diff --git a/src/lib/libisvd/la.h b/src/lib/libisvd/la.h new file mode 100644 index 0000000..6d0e7f9 --- /dev/null +++ b/src/lib/libisvd/la.h @@ -0,0 +1,12 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file lib/libisvd/la.h +/// \brief The linear algebra header. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#ifndef LIBISVD_LA_H_ +#define LIBISVD_LA_H_ + +#endif // LIBISVD_LA_H_ diff --git a/src/libisvd/la/blas/dimm.c b/src/lib/libisvd/la/blas/dimm.c similarity index 89% rename from src/libisvd/la/blas/dimm.c rename to src/lib/libisvd/la/blas/dimm.c index 9c149e3..027b903 100644 --- a/src/libisvd/la/blas/dimm.c +++ b/src/lib/libisvd/la/blas/dimm.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/blas/dimm.c +/// \file lib/libisvd/la/blas/dimm.c /// \brief The BLAS-Like Dimm routine. /// /// \author Mu Yang <> @@ -7,14 +7,14 @@ /// #include -#include +#include #include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #define isvd_xDimm( side, m, n, alpha, a, b, ldb, xScal, xMul ) \ - const char side_ = isvd_arg2char("SIDE", side, "LR", nullptr); \ + const char side_ = isvd_arg2char("SIDE", side, "LR", NULL); \ if ( !side_ ) return; \ switch ( side_ ) { \ case 'L': { \ @@ -48,4 +48,4 @@ void isvd_zDimm( const CHAR1 side, const INT m, const INT n, const COMP8 alpha, const COMP8 *a, COMP8 *b, const INT ldb ) { isvd_xDimm(side, m, n, alpha, a, b, ldb, isvd_zScal, isvd_vzMul); } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/blas/dism.c b/src/lib/libisvd/la/blas/dism.c similarity index 89% rename from src/libisvd/la/blas/dism.c rename to src/lib/libisvd/la/blas/dism.c index 8edca0e..36c34d6 100644 --- a/src/libisvd/la/blas/dism.c +++ b/src/lib/libisvd/la/blas/dism.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/blas/dism.c +/// \file lib/libisvd/la/blas/dism.c /// \brief The BLAS-Like Dism routine. /// /// \author Mu Yang <> @@ -7,14 +7,14 @@ /// #include -#include +#include #include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #define isvd_xDism( side, m, n, alpha, a, b, ldb, xScal, xDiv ) \ - const char side_ = isvd_arg2char("SIDE", side, "LR", nullptr); \ + const char side_ = isvd_arg2char("SIDE", side, "LR", NULL); \ if ( !side_ ) return; \ switch ( side_ ) { \ case 'L': { \ @@ -48,4 +48,4 @@ void isvd_zDism( const CHAR1 side, const INT m, const INT n, const COMP8 alpha, const COMP8 *a, COMP8 *b, const INT ldb ) { isvd_xDism(side, m, n, alpha, a, b, ldb, isvd_zScal, isvd_vzDiv); } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/blas/gemmt.c b/src/lib/libisvd/la/blas/gemmt.c similarity index 96% rename from src/libisvd/la/blas/gemmt.c rename to src/lib/libisvd/la/blas/gemmt.c index b7011af..00fbec8 100644 --- a/src/libisvd/la/blas/gemmt.c +++ b/src/lib/libisvd/la/blas/gemmt.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/blas/gemmt.c +/// \file lib/libisvd/la/blas/gemmt.c /// \brief The BLAS-Like Gemmt routine. /// /// \author Mu Yang <> @@ -7,10 +7,10 @@ /// #include -#include +#include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -71,4 +71,4 @@ void isvd_zGemmt( #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/blas/iamax.c b/src/lib/libisvd/la/blas/iamax.c similarity index 92% rename from src/libisvd/la/blas/iamax.c rename to src/lib/libisvd/la/blas/iamax.c index eb49c68..254ed08 100644 --- a/src/libisvd/la/blas/iamax.c +++ b/src/lib/libisvd/la/blas/iamax.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/blas/iamax.c +/// \file lib/libisvd/la/blas/iamax.c /// \brief The BLAS-1 iAmax routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(__cplusplus) extern "C" { @@ -38,4 +38,4 @@ REAL8 isvd_dAmax( const INT n, const REAL8 *x, const INT incx ) { INT i = isvd_i COMP4 isvd_cAmax( const INT n, const COMP4 *x, const INT incx ) { INT i = isvd_icAmax(n, x, incx); return cabsf(x[i]); } COMP8 isvd_zAmax( const INT n, const COMP8 *x, const INT incx ) { INT i = isvd_izAmax(n, x, incx); return cabs(x[i]); } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/blas/iamin.c b/src/lib/libisvd/la/blas/iamin.c similarity index 95% rename from src/libisvd/la/blas/iamin.c rename to src/lib/libisvd/la/blas/iamin.c index 26a8994..158543c 100644 --- a/src/libisvd/la/blas/iamin.c +++ b/src/lib/libisvd/la/blas/iamin.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/blas/iamin.c +/// \file lib/libisvd/la/blas/iamin.c /// \brief The BLAS-1 iAmin routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -62,4 +62,4 @@ REAL8 isvd_dAmin( const INT n, const REAL8 *x, const INT incx ) { INT i = isvd_i COMP4 isvd_cAmin( const INT n, const COMP4 *x, const INT incx ) { INT i = isvd_icAmin(n, x, incx); return cabsf(x[i]); } COMP8 isvd_zAmin( const INT n, const COMP8 *x, const INT incx ) { INT i = isvd_izAmin(n, x, incx); return cabs(x[i]); } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/blas/omatcopy.c b/src/lib/libisvd/la/blas/omatcopy.c similarity index 91% rename from src/libisvd/la/blas/omatcopy.c rename to src/lib/libisvd/la/blas/omatcopy.c index 502ebfe..9d07df0 100644 --- a/src/libisvd/la/blas/omatcopy.c +++ b/src/lib/libisvd/la/blas/omatcopy.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/blas/omatcopy.c +/// \file lib/libisvd/la/blas/omatcopy.c /// \brief The BLAS-Like Omatcopy routine. /// /// \author Mu Yang <> @@ -7,10 +7,10 @@ /// #include -#include +#include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -46,11 +46,11 @@ void isvd_zOmatcopy( #else // ISVD_USE_MKL -static inline float rconjf( const float z ) { return z; } -static inline double rconj( const double z ) { return z; } +static inline REAL4 rconjf( const REAL4 z ) { return z; } +static inline REAL8 rconj( const REAL8 z ) { return z; } #define isvd_xOmatcopy( trans, m, n, alpha, a, lda, b, ldb, conj ) \ - const char trans_ = isvd_arg2char("TRANS", trans, "NTRC", nullptr); \ + const char trans_ = isvd_arg2char("TRANS", trans, "NTRC", NULL); \ if ( !trans_ ) return; \ switch ( trans_ ) { \ case 'N': { \ @@ -105,4 +105,4 @@ void isvd_zOmatcopy( #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/lib/libisvd/la/def.h b/src/lib/libisvd/la/def.h new file mode 100644 index 0000000..917a707 --- /dev/null +++ b/src/lib/libisvd/la/def.h @@ -0,0 +1,58 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file lib/libisvd/la/def.h +/// \brief The linear algebra definitions. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#ifndef LIBISVD_LA_DEF_H_ +#define LIBISVD_LA_DEF_H_ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif // __cplusplus + +#if !defined(DOXYGEN_SHOULD_SKIP_THIS) + +#if defined(_OPENMP) + #if !defined(ISVD_USE_OMP) + #define ISVD_USE_OMP + #endif // ISVD_USE_OMP +#else // _OPENMP + #undef ISVD_USE_OMP +#endif // _OPENMP + +#if defined(ISVD_USE_OMP) + #include + #define ISVD_OMP_PARALLEL _Pragma("omp parallel") +#else + #define ISVD_OMP_PARALLEL +#endif // ISVD_USE_OMP + +#if defined(ISVD_USE_ILP64) && !defined(MKL_ILP64) + #define MKL_ILP64 +#else // ISVD_USE_ILP64 + #undef MKL_ILP64 +#endif // ISVD_USE_ILP64 + +#if defined(ISVD_USE_MKL) + #define MKL_INT isvd_int_t + #define MKL_UINT isvd_uint_t + #define MKL_Complex8 isvd_c_val_t + #define MKL_Complex16 isvd_z_val_t +#endif // ISVD_USE_MKL + +#endif // DOXYGEN_SHOULD_SKIP_THIS + +#if defined(ISVD_USE_MKL) + #include +#endif // ISVD_USE_MKL + +#if defined(__cplusplus) +} +#endif // __cplusplus + +#endif // LIBISVD_LA_DEF_H_ diff --git a/src/libisvd/la/lapack/geinv.c b/src/lib/libisvd/la/lapack/geinv.c similarity index 84% rename from src/libisvd/la/lapack/geinv.c rename to src/lib/libisvd/la/lapack/geinv.c index 0b1f670..5050a11 100644 --- a/src/libisvd/la/lapack/geinv.c +++ b/src/lib/libisvd/la/lapack/geinv.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/lapack/geinv.c +/// \file lib/libisvd/la/lapack/geinv.c /// \brief The LAPACK Getrf+Getri routine. /// /// \author Mu Yang <> @@ -7,10 +7,10 @@ /// #include -#include +#include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -42,7 +42,7 @@ void isvd_sGeinv( const INT n, REAL4 *a, const INT lda ) { REAL4 qwork; INT lwork = -1, info; - sgetri_(&n, a, &lda, nullptr, &qwork, &lwork, &info); isvd_assert_pass(info); + sgetri_(&n, a, &lda, NULL, &qwork, &lwork, &info); isvd_assert_pass(info); lwork = qwork; INT *ipiv = isvd_imalloc(n); REAL4 *work = isvd_smalloc(lwork); @@ -55,7 +55,7 @@ void isvd_dGeinv( const INT n, REAL8 *a, const INT lda ) { REAL8 qwork; INT lwork = -1, info; - dgetri_(&n, a, &lda, nullptr, &qwork, &lwork, &info); isvd_assert_pass(info); + dgetri_(&n, a, &lda, NULL, &qwork, &lwork, &info); isvd_assert_pass(info); lwork = qwork; INT *ipiv = isvd_imalloc(n); REAL8 *work = isvd_dmalloc(lwork); @@ -68,7 +68,7 @@ void isvd_cGeinv( const INT n, COMP4 *a, const INT lda ) { COMP4 qwork; INT lwork = -1, info; - cgetri_(&n, a, &lda, nullptr, &qwork, &lwork, &info); isvd_assert_pass(info); + cgetri_(&n, a, &lda, NULL, &qwork, &lwork, &info); isvd_assert_pass(info); lwork = creal(qwork); INT *ipiv = isvd_imalloc(n); COMP4 *work = isvd_cmalloc(lwork); @@ -81,7 +81,7 @@ void isvd_zGeinv( const INT n, COMP8 *a, const INT lda ) { COMP8 qwork; INT lwork = -1, info; - zgetri_(&n, a, &lda, nullptr, &qwork, &lwork, &info); isvd_assert_pass(info); + zgetri_(&n, a, &lda, NULL, &qwork, &lwork, &info); isvd_assert_pass(info); lwork = creal(qwork); INT *ipiv = isvd_imalloc(n); COMP8 *work = isvd_zmalloc(lwork); @@ -91,4 +91,4 @@ void isvd_zGeinv( isvd_free(work); } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/lapack/gesvd.c b/src/lib/libisvd/la/lapack/gesvd.c similarity index 92% rename from src/libisvd/la/lapack/gesvd.c rename to src/lib/libisvd/la/lapack/gesvd.c index b44c16f..11dad35 100644 --- a/src/libisvd/la/lapack/gesvd.c +++ b/src/lib/libisvd/la/lapack/gesvd.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/lapack/gesvd.c +/// \file lib/libisvd/la/lapack/gesvd.c /// \brief The LAPACK Gesvd routine. /// /// \author Mu Yang <> @@ -7,11 +7,11 @@ /// #include -#include +#include #include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -61,7 +61,7 @@ void isvd_cGesvd( COMP4 *v, const INT ldvt ) { COMP4 qwork; INT lwork = -1, info; - cgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, v, &ldvt, &qwork, &lwork, nullptr, &info); isvd_assert_pass(info); + cgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, v, &ldvt, &qwork, &lwork, NULL, &info); isvd_assert_pass(info); lwork = crealf(qwork); COMP4 *work = isvd_cmalloc(lwork); REAL4 *rwork = isvd_smalloc(5*min(m, n)); @@ -74,7 +74,7 @@ void isvd_zGesvd( COMP8 *v, const INT ldvt ) { COMP8 qwork; INT lwork = -1, info; - zgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, v, &ldvt, &qwork, &lwork, nullptr, &info); isvd_assert_pass(info); + zgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, v, &ldvt, &qwork, &lwork, NULL, &info); isvd_assert_pass(info); lwork = creal(qwork); COMP8 *work = isvd_zmalloc(lwork); REAL8 *rwork = isvd_dmalloc(5*min(m, n)); @@ -84,4 +84,4 @@ void isvd_zGesvd( } //\} -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/lapack/lsame.c b/src/lib/libisvd/la/lapack/lsame.c similarity index 88% rename from src/libisvd/la/lapack/lsame.c rename to src/lib/libisvd/la/lapack/lsame.c index 169b964..af68d7b 100644 --- a/src/libisvd/la/lapack/lsame.c +++ b/src/lib/libisvd/la/lapack/lsame.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/lapack/lsame.c +/// \file lib/libisvd/la/lapack/lsame.c /// \brief The LAPACK Lsame routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -49,4 +49,4 @@ bool isvd_Lsamen( return true; } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/lapack/syev.c b/src/lib/libisvd/la/lapack/syev.c similarity index 87% rename from src/libisvd/la/lapack/syev.c rename to src/lib/libisvd/la/lapack/syev.c index 4ba8573..2ed9691 100644 --- a/src/libisvd/la/lapack/syev.c +++ b/src/lib/libisvd/la/lapack/syev.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/lapack/syev.c +/// \file lib/libisvd/la/lapack/syev.c /// \brief The LAPACK Syev routine. /// /// \author Mu Yang <> @@ -7,10 +7,10 @@ /// #include -#include +#include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if !defined(DOXYGEN_SHOULD_SKIP_THIS) @@ -57,7 +57,7 @@ void isvd_cSyev( const CHAR1 jobz, const CHAR1 uplo, const INT n, COMP4 *a, const INT lda, REAL4 *w ) { COMP4 qwork; INT lwork = -1, info; - cheev_(&jobz, &uplo, &n, a, &lda, w, &qwork, &lwork, nullptr, &info); isvd_assert_pass(info); + cheev_(&jobz, &uplo, &n, a, &lda, w, &qwork, &lwork, NULL, &info); isvd_assert_pass(info); lwork = creal(qwork); COMP4 *work = isvd_cmalloc(lwork); REAL4 *rwork = isvd_smalloc(5*n-2); @@ -69,7 +69,7 @@ void isvd_zSyev( const CHAR1 jobz, const CHAR1 uplo, const INT n, COMP8 *a, const INT lda, REAL8 *w ) { COMP8 qwork; INT lwork = -1, info; - zheev_(&jobz, &uplo, &n, a, &lda, w, &qwork, &lwork, nullptr, &info); isvd_assert_pass(info); + zheev_(&jobz, &uplo, &n, a, &lda, w, &qwork, &lwork, NULL, &info); isvd_assert_pass(info); lwork = creal(qwork); COMP8 *work = isvd_zmalloc(lwork); REAL8 *rwork = isvd_dmalloc(5*n-2); @@ -78,4 +78,4 @@ void isvd_zSyev( isvd_free(rwork); } -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/util/memory.c b/src/lib/libisvd/la/util/memory.c similarity index 57% rename from src/libisvd/util/memory.c rename to src/lib/libisvd/la/util/memory.c index a8f8eea..bdba571 100644 --- a/src/libisvd/util/memory.c +++ b/src/lib/libisvd/la/util/memory.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/util/memory.c +/// \file lib/libisvd/la/util/memory.c /// \brief The memory utilities. /// /// \author Mu Yang <> @@ -7,10 +7,12 @@ /// #include -#include +#include #include #include +@ISVD_TYPE_MACRO_DEFINE@ + #if defined(ISVD_USE_MKL) #define isvd_xmalloc( num, type ) (type*)(mkl_malloc(num * sizeof(type), 64)); #else // ISVD_USE_MKL @@ -26,24 +28,24 @@ /// \return The pointer to the array. /// //\{ -isvd_int_t* isvd_imalloc( const size_t num ) { - return isvd_xmalloc(num, isvd_int_t); +INT* isvd_imalloc( const size_t num ) { + return isvd_xmalloc(num, INT); } -float* isvd_smalloc( const size_t num ) { - return isvd_xmalloc(num, float); +REAL4* isvd_smalloc( const size_t num ) { + return isvd_xmalloc(num, REAL4); } -double* isvd_dmalloc( const size_t num ) { - return isvd_xmalloc(num, double); +REAL8* isvd_dmalloc( const size_t num ) { + return isvd_xmalloc(num, REAL8); } -_Complex float* isvd_cmalloc( const size_t num ) { - return isvd_xmalloc(num, _Complex float); +COMP4* isvd_cmalloc( const size_t num ) { + return isvd_xmalloc(num, COMP4); } -_Complex double* isvd_zmalloc( const size_t num ) { - return isvd_xmalloc(num, _Complex double); +COMP8* isvd_zmalloc( const size_t num ) { + return isvd_xmalloc(num, COMP8); } //\} @@ -73,24 +75,24 @@ void isvd_free( void *ptr ) { /// \param num The number of objects. /// //\{ -void isvd_imemset0( isvd_int_t *ptr, const size_t num ) { - isvd_xmemset0(ptr, num, isvd_int_t); +void isvd_imemset0( INT *ptr, const size_t num ) { + isvd_xmemset0(ptr, num, INT); } -void isvd_smemset0( float *ptr, const size_t num ) { - isvd_xmemset0(ptr, num, float); +void isvd_smemset0( REAL4 *ptr, const size_t num ) { + isvd_xmemset0(ptr, num, REAL4); } -void isvd_dmemset0( double *ptr, const size_t num ) { - isvd_xmemset0(ptr, num, double); +void isvd_dmemset0( REAL8 *ptr, const size_t num ) { + isvd_xmemset0(ptr, num, REAL8); } -void isvd_cmemset0( _Complex float *ptr, const size_t num ) { - isvd_xmemset0(ptr, num, _Complex float); +void isvd_cmemset0( COMP4 *ptr, const size_t num ) { + isvd_xmemset0(ptr, num, COMP4); } -void isvd_zmemset0( _Complex double *ptr, const size_t num ) { - isvd_xmemset0(ptr, num, _Complex double); +void isvd_zmemset0( COMP8 *ptr, const size_t num ) { + isvd_xmemset0(ptr, num, COMP8); } //\} @@ -105,23 +107,25 @@ void isvd_zmemset0( _Complex double *ptr, const size_t num ) { /// \param num The number of objects. /// //\{ -void isvd_imemcpy( isvd_int_t *dst, const isvd_int_t *src, const size_t num ) { - isvd_xmemcpy(dst, src, num, isvd_int_t); +void isvd_imemcpy( INT *dst, const INT *src, const size_t num ) { + isvd_xmemcpy(dst, src, num, INT); } -void isvd_smemcpy( float *dst, const float *src, const size_t num ) { - isvd_xmemcpy(dst, src, num, float); +void isvd_smemcpy( REAL4 *dst, const REAL4 *src, const size_t num ) { + isvd_xmemcpy(dst, src, num, REAL4); } -void isvd_dmemcpy( double *dst, const double *src, const size_t num ) { - isvd_xmemcpy(dst, src, num, double); +void isvd_dmemcpy( REAL8 *dst, const REAL8 *src, const size_t num ) { + isvd_xmemcpy(dst, src, num, REAL8); } -void isvd_cmemcpy( _Complex float *dst, const _Complex float *src, const size_t num ) { - isvd_xmemcpy(dst, src, num, _Complex float); +void isvd_cmemcpy( COMP4 *dst, const COMP4 *src, const size_t num ) { + isvd_xmemcpy(dst, src, num, COMP4); } -void isvd_zmemcpy( _Complex double *dst, const _Complex double *src, const size_t num ) { - isvd_xmemcpy(dst, src, num, _Complex double); +void isvd_zmemcpy( COMP8 *dst, const COMP8 *src, const size_t num ) { + isvd_xmemcpy(dst, src, num, COMP8); } //\} + +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/util/omp.c b/src/lib/libisvd/la/util/omp.c similarity index 61% rename from src/libisvd/util/omp.c rename to src/lib/libisvd/la/util/omp.c index b6cc124..0423710 100644 --- a/src/libisvd/util/omp.c +++ b/src/lib/libisvd/la/util/omp.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file include/c/isvd/util/omp.h +/// \file lib/libisvd/la/util/omp.c /// \brief The OpenMP utilities. /// /// \author Mu Yang <> @@ -7,7 +7,28 @@ /// #include -#include +#include + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \ingroup c_util_omp_module +/// \brief Returns the maximum number of threads +/// +/// \return The maximum number of threads. +/// +/// \attention This routine should be called outside of an OpenMP scope. +/// +omp_int_t isvd_getOmpMaxSize( void ) { + omp_int_t omp_size = 1; + + ISVD_OMP_PARALLEL + { + if ( isvd_getOmpRank() == 0 ) { + omp_size = isvd_getOmpSize(); + } + } + + return omp_size; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_util_omp_module @@ -15,6 +36,8 @@ /// /// \return The number of threads in the current team. /// +/// \attention This routine should be called within an OpenMP scope. +/// omp_int_t isvd_getOmpSize( void ) { #if defined(ISVD_USE_OMP) return omp_get_num_threads(); @@ -29,6 +52,8 @@ omp_int_t isvd_getOmpSize( void ) { /// /// \return The thread identification number within the current team. /// +/// \attention This routine should be called within an OpenMP scope. +/// omp_int_t isvd_getOmpRank( void ) { #if defined(ISVD_USE_OMP) return omp_get_thread_num(); diff --git a/src/libisvd/la/vml/div.c b/src/lib/libisvd/la/vml/div.c similarity index 91% rename from src/libisvd/la/vml/div.c rename to src/lib/libisvd/la/vml/div.c index 02a07cd..f8bc043 100644 --- a/src/libisvd/la/vml/div.c +++ b/src/lib/libisvd/la/vml/div.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/vml/div.c +/// \file lib/libisvd/la/vml/div.c /// \brief The VML Div routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -32,4 +32,4 @@ void isvd_vzDiv( const INT n, const COMP8 *a, const COMP8 *b, COMP8 *y ) { isvd_ #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/vml/mul.c b/src/lib/libisvd/la/vml/mul.c similarity index 91% rename from src/libisvd/la/vml/mul.c rename to src/lib/libisvd/la/vml/mul.c index 9d0bf89..5ab7fed 100644 --- a/src/libisvd/la/vml/mul.c +++ b/src/lib/libisvd/la/vml/mul.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/vml/mul.c +/// \file lib/libisvd/la/vml/mul.c /// \brief The VML Mul routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -32,4 +32,4 @@ void isvd_vzMul( const INT n, const COMP8 *a, const COMP8 *b, COMP8 *y ) { isvd_ #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/vml/sqrt.c b/src/lib/libisvd/la/vml/sqrt.c similarity index 90% rename from src/libisvd/la/vml/sqrt.c rename to src/lib/libisvd/la/vml/sqrt.c index 14325a9..1d1068c 100644 --- a/src/libisvd/la/vml/sqrt.c +++ b/src/lib/libisvd/la/vml/sqrt.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/vml/sqrt.c +/// \file lib/libisvd/la/vml/sqrt.c /// \brief The VML Sqrt routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -32,4 +32,4 @@ void isvd_vzSqrt( const INT n, const COMP8 *a, COMP8 *y ) { isvd_xSqrt(n, a, y, #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/vml/sub.c b/src/lib/libisvd/la/vml/sub.c similarity index 91% rename from src/libisvd/la/vml/sub.c rename to src/lib/libisvd/la/vml/sub.c index f8ae430..a0029df 100644 --- a/src/libisvd/la/vml/sub.c +++ b/src/lib/libisvd/la/vml/sub.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/vml/sub.c +/// \file lib/libisvd/la/vml/sub.c /// \brief The VML Sub routine. /// /// \author Mu Yang <> @@ -7,9 +7,9 @@ /// #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -32,4 +32,4 @@ void isvd_vzSub( const INT n, const COMP8 *a, const COMP8 *b, COMP8 *y ) { isvd_ #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/lib/libisvd/la/vsl/@x@_rng_gaussian_driver.c b/src/lib/libisvd/la/vsl/@x@_rng_gaussian_driver.c new file mode 100644 index 0000000..5cc15b6 --- /dev/null +++ b/src/lib/libisvd/la/vsl/@x@_rng_gaussian_driver.c @@ -0,0 +1,40 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file lib/libisvd/la/vsl/@x@_rng_gaussian_driver.c +/// \brief The VSL RngGaussian driver (@xname@ precision). +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// + +#include +#include +#include +#include +#include + +void isvd_v@x@RngGaussianDriver( + const isvd_int_t seed, + const isvd_int_t nskip, + const isvd_int_t n, + @xtype_____@ *r, + const @xtype_____@ a, + const @xtype_____@ sigma +) { + ISVD_OMP_PARALLEL + { + omp_int_t omp_size = isvd_getOmpSize(); + omp_int_t omp_rank = isvd_getOmpRank(); + + isvd_int_t length = n / omp_size; + isvd_int_t start = length * omp_rank; + if ( omp_rank == omp_size-1 ) { + length = n - start; + } + + isvd_VSLStreamStatePtr stream; + isvd_vslNewStream(&stream, seed); + isvd_vslSkipAheadStream(stream, (nskip + start) * 2); + isvd_v@x@RngGaussian(stream, length, r + start, a, sigma); + isvd_vslDeleteStream(&stream); + } +} diff --git a/src/libisvd/la/vsl/rng_gaussian.c b/src/lib/libisvd/la/vsl/rng_gaussian.c similarity index 90% rename from src/libisvd/la/vsl/rng_gaussian.c rename to src/lib/libisvd/la/vsl/rng_gaussian.c index 06f65f8..62ecefa 100644 --- a/src/libisvd/la/vsl/rng_gaussian.c +++ b/src/lib/libisvd/la/vsl/rng_gaussian.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/vsl/rng_gaussian.c +/// \file lib/libisvd/la/vsl/rng_gaussian.c /// \brief The VSL RngGaussian routine. /// /// \author Mu Yang <> @@ -7,10 +7,10 @@ /// #include -#include +#include #include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -34,4 +34,4 @@ void isvd_vdRngGaussian( isvd_VSLStreamStatePtr stream, const INT n, REAL8 *r, c #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/la/vsl/service.c b/src/lib/libisvd/la/vsl/service.c similarity index 92% rename from src/libisvd/la/vsl/service.c rename to src/lib/libisvd/la/vsl/service.c index 07f6092..8cd93c2 100644 --- a/src/libisvd/la/vsl/service.c +++ b/src/lib/libisvd/la/vsl/service.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/la/vsl/service.c +/// \file lib/libisvd/la/vsl/service.c /// \brief The VSL service routines. /// /// \author Mu Yang <> @@ -12,9 +12,9 @@ #endif // ISVD_USE_MKL #include -#include +#include -@ISVD_LA_BLAS_TYPE_DEFINE@ +@ISVD_TYPE_MACRO_DEFINE@ #if defined(ISVD_USE_MKL) @@ -57,4 +57,4 @@ void isvd_vslSkipAheadStream( isvd_VSLStreamStatePtr stream, const INT nskip ) { #endif // ISVD_USE_MKL -@ISVD_LA_BLAS_TYPE_UNDEF@ +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/libisvd/nogpu/@x@_stage.c b/src/lib/libisvd/nogpu/@x@_stage.c similarity index 53% rename from src/libisvd/nogpu/@x@_stage.c rename to src/lib/libisvd/nogpu/@x@_stage.c index bcc04a4..16b746a 100644 --- a/src/libisvd/nogpu/@x@_stage.c +++ b/src/lib/libisvd/nogpu/@x@_stage.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/nogpu/@x@_stage.c +/// \file lib/libisvd/nogpu/@x@_stage.c /// \brief The iSVD stages with NOGPU support. /// /// \author Mu Yang <> @@ -19,19 +19,19 @@ extern "C" { // Sketching void isvd_@x@SketchGaussianProjection_gpu( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - @xtype@ *yst, - const isvd_int_t ldyst, - const isvd_int_t seed, - const mpi_int_t mpi_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + @xtype_____@ *yst, + const isvd_int_t ldyst, + const isvd_int_t seed, + const mpi_int_t mpi_root ) { ISVD_UNUSED(param); ISVD_UNUSED(argv); @@ -52,24 +52,24 @@ void isvd_@x@SketchGaussianProjection_gpu( // Postprocessing void isvd_@x@PostprocessTallSkinnyQr_gpu( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t ut_root, - const mpi_int_t vt_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t ut_root, + const mpi_int_t vt_root ) { ISVD_UNUSED(param); ISVD_UNUSED(argv); @@ -94,24 +94,24 @@ void isvd_@x@PostprocessTallSkinnyQr_gpu( } void isvd_@x@PostprocessGramian_gpu( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t ut_root, - const mpi_int_t vt_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t ut_root, + const mpi_int_t vt_root ) { ISVD_UNUSED(param); ISVD_UNUSED(argv); @@ -136,24 +136,24 @@ void isvd_@x@PostprocessGramian_gpu( } void isvd_@x@PostprocessSymmetric_gpu( - const isvd_Param param, - const @xtype@ *argv, - const isvd_int_t argc, - @xtype@ *retv, - const isvd_int_t retc, - const char dista, - const char ordera, - const @xtype@ *a, - const isvd_int_t lda, - const @xtype@ *qt, - const isvd_int_t ldqt, - @xtype@ *s, - @xtype@ *ut, - const isvd_int_t ldut, - @xtype@ *vt, - const isvd_int_t ldvt, - const mpi_int_t ut_root, - const mpi_int_t vt_root + const isvd_Param param, + const @xtype_____@ *argv, + const isvd_int_t argc, + @xtype_____@ *retv, + const isvd_int_t retc, + const char dista, + const char ordera, + const @xtype_____@ *a, + const isvd_int_t lda, + const @xtype_____@ *qt, + const isvd_int_t ldqt, + @xtype_____@ *s, + @xtype_____@ *ut, + const isvd_int_t ldut, + @xtype_____@ *vt, + const isvd_int_t ldvt, + const mpi_int_t ut_root, + const mpi_int_t vt_root ) { ISVD_UNUSED(param); ISVD_UNUSED(argv); diff --git a/src/libisvd/nogpu/env.c b/src/lib/libisvd/nogpu/env.c similarity index 94% rename from src/libisvd/nogpu/env.c rename to src/lib/libisvd/nogpu/env.c index cbec278..0605c4d 100644 --- a/src/libisvd/nogpu/env.c +++ b/src/lib/libisvd/nogpu/env.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/nogpu/env.c +/// \file lib/libisvd/nogpu/env.c /// \brief The iSVD environment routines (NOGPU only). /// /// \author Mu Yang <> diff --git a/src/libisvd/util.h b/src/lib/libisvd/util.h similarity index 91% rename from src/libisvd/util.h rename to src/lib/libisvd/util.h index 8ad1edb..446e509 100644 --- a/src/libisvd/util.h +++ b/src/lib/libisvd/util.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/util.h +/// \file lib/libisvd/util.h /// \brief The utility header. /// /// \author Mu Yang <> diff --git a/src/libisvd/util/arg.h b/src/lib/libisvd/util/arg.h similarity index 95% rename from src/libisvd/util/arg.h rename to src/lib/libisvd/util/arg.h index 5eae2a7..4668477 100644 --- a/src/libisvd/util/arg.h +++ b/src/lib/libisvd/util/arg.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/util/arg.h +/// \file lib/libisvd/util/arg.h /// \brief The argument utilities. /// /// \author Mu Yang <> @@ -24,7 +24,7 @@ static inline isvd_int_t isvd_arg2char( const char *opts, const char *rets ) { - if ( rets == nullptr ) { + if ( rets == NULL ) { rets = opts; } @@ -56,7 +56,7 @@ static inline const char* isvd_arg2str( const char *rets[], const size_t nopts ) { - if ( rets == nullptr ) { + if ( rets == NULL ) { rets = opts; } diff --git a/src/libisvd/util/function.h b/src/lib/libisvd/util/function.h similarity index 95% rename from src/libisvd/util/function.h rename to src/lib/libisvd/util/function.h index becaa0e..cd1ef41 100644 --- a/src/libisvd/util/function.h +++ b/src/lib/libisvd/util/function.h @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/util/function.h +/// \file lib/libisvd/util/function.h /// \brief Some useful functions. /// /// \author Mu Yang <> diff --git a/src/libisvd/util/io.c b/src/lib/libisvd/util/io.c similarity index 68% rename from src/libisvd/util/io.c rename to src/lib/libisvd/util/io.c index 5bd5f6d..11306c7 100644 --- a/src/libisvd/util/io.c +++ b/src/lib/libisvd/util/io.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/util/io.c +/// \file lib/libisvd/util/io.c /// \brief The I/O utilities. /// /// \author Mu Yang <> @@ -9,6 +9,8 @@ #include #include +@ISVD_TYPE_MACRO_DEFINE@ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_util_io_module /// \brief Get a variable from stream. @@ -17,25 +19,27 @@ /// \param varp Pointer to the variable. /// //\{ -void isvd_ifget( FILE *stream, isvd_int_t *varp ) { +void isvd_ifget( FILE *stream, INT *varp ) { #if !defined(ISVD_USE_ILP64) - isvd_int_t info = fscanf(stream, "%"PRId32, varp); + INT info = fscanf(stream, "%" PRId32, varp); #else // ISVD_USE_ILP64 - isvd_int_t info = fscanf(stream, "%" PRId64, varp); + INT info = fscanf(stream, "%" PRId64, varp); #endif // ISVD_USE_ILP64 ISVD_UNUSED(info); isvd_assert_eq(info, 1); } -void isvd_sfget( FILE *stream, float *varp ) { - isvd_int_t info = fscanf(stream, "%f", varp); +void isvd_sfget( FILE *stream, REAL4 *varp ) { + INT info = fscanf(stream, "%f", varp); ISVD_UNUSED(info); isvd_assert_eq(info, 1); } -void isvd_dfget( FILE *stream, double *varp ) { - isvd_int_t info = fscanf(stream, "%lf", varp); +void isvd_dfget( FILE *stream, REAL8 *varp ) { + INT info = fscanf(stream, "%lf", varp); ISVD_UNUSED(info); isvd_assert_eq(info, 1); } //\} + +@ISVD_TYPE_MACRO_UNDEF@ diff --git a/src/lib/libisvd/util/memory.c b/src/lib/libisvd/util/memory.c new file mode 100644 index 0000000..f4e62ec --- /dev/null +++ b/src/lib/libisvd/util/memory.c @@ -0,0 +1,11 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file lib/libisvd/util/memory.c +/// \brief The memory utilities. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// +/// \note Implement in \ref lib/libisvd/la/util/memory.c +/// + +#include \ No newline at end of file diff --git a/src/libisvd/util/mpi.c b/src/lib/libisvd/util/mpi.c similarity index 86% rename from src/libisvd/util/mpi.c rename to src/lib/libisvd/util/mpi.c index b6e615a..30af4b7 100644 --- a/src/libisvd/util/mpi.c +++ b/src/lib/libisvd/util/mpi.c @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// \file src/libisvd/util/mpi.c +/// \file lib/libisvd/util/mpi.c /// \brief The MPI utilities. /// /// \author Mu Yang <> @@ -9,11 +9,6 @@ #include #include -#if !defined(DOXYGEN_SHOULD_SKIP_THIS) -#define MPI_COMPLEX_FLOAT MPI_COMPLEX8 -#define MPI_COMPLEX_DOUBLE MPI_COMPLEX16 -#endif // DOXYGEN_SHOULD_SKIP_THIS - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// \ingroup c_util_mpi_module /// \brief Returns the size of the group associated with a communicator. diff --git a/src/lib/libisvd/util/omp.c b/src/lib/libisvd/util/omp.c new file mode 100644 index 0000000..0a6a13b --- /dev/null +++ b/src/lib/libisvd/util/omp.c @@ -0,0 +1,11 @@ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// \file lib/libisvd/util/omp.c +/// \brief The OpenMP utilities. +/// +/// \author Mu Yang <> +/// \copyright MIT License +/// +/// \note Implement in \ref lib/libisvd/la/util/omp.c +/// + +#include \ No newline at end of file