diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index a8dcf46..20a2040 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -63,7 +63,7 @@ jobs: run: vcpkg update && vcpkg install cgal eigen3 msmpi # Checkout v2 : https://github.com/actions/checkout - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 # Check CMake Version - name: Check CMake (${{ matrix.mpi }}) diff --git a/Complex/CMakeLists.txt b/Complex/CMakeLists.txt index 5d062e6..9e53876 100644 --- a/Complex/CMakeLists.txt +++ b/Complex/CMakeLists.txt @@ -6,27 +6,27 @@ cmake_minimum_required(VERSION 3.10) find_package(OpenMP REQUIRED) -add_library(simplexBase STATIC simplexBase.cpp simplexBase.hpp) +add_library(simplexBase STATIC simplexBase.cpp) target_link_libraries(simplexBase PUBLIC utils pipePacket simplexTree simplexArrayList alphaComplex witnessComplex betaComplex) target_include_directories(simplexBase PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/Utils) -add_library(simplexArrayList STATIC simplexArrayList.cpp simplexArrayList.hpp) +add_library(simplexArrayList STATIC simplexArrayList.cpp) target_link_libraries(simplexArrayList PUBLIC simplexBase) target_include_directories(simplexArrayList PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(alphaComplex STATIC alphaComplex.cpp alphaComplex.hpp) +add_library(alphaComplex STATIC alphaComplex.cpp) target_link_libraries(alphaComplex PUBLIC utils kdTree simplexArrayList OpenMP::OpenMP_CXX) target_include_directories(alphaComplex PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(witnessComplex STATIC witnessComplex.cpp witnessComplex.hpp) +add_library(witnessComplex STATIC witnessComplex.cpp) target_link_libraries(witnessComplex PUBLIC simplexArrayList) target_include_directories(witnessComplex PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(simplexTree STATIC simplexTree.cpp simplexTree.hpp) +add_library(simplexTree STATIC simplexTree.cpp) target_link_libraries(simplexTree PUBLIC utils kdTree simplexBase) target_include_directories(simplexTree PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(betaComplex STATIC betaComplex.cpp betaComplex.hpp) +add_library(betaComplex STATIC betaComplex.cpp) target_link_libraries(betaComplex PUBLIC alphaComplex) target_include_directories(betaComplex PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/LHFmain/CMakeLists.txt b/LHFmain/CMakeLists.txt index 57c03e6..5a1351a 100644 --- a/LHFmain/CMakeLists.txt +++ b/LHFmain/CMakeLists.txt @@ -15,7 +15,7 @@ include_directories(SYSTEM ${MPI_INCLUDE_PATH}) find_package(OpenMP REQUIRED) #Add Source to this project's executable -add_library(LHFlib SHARED "LHF.cpp" "LHF.hpp") +add_library(LHFlib SHARED LHF.cpp) set_target_properties(LHFlib PROPERTIES VERSION ${PROJECT_VERSION}) set_target_properties(LHFlib PROPERTIES SOVERSION 1) set_target_properties(LHFlib PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/Pipes/CMakeLists.txt b/Pipes/CMakeLists.txt index 3681614..63b75da 100644 --- a/Pipes/CMakeLists.txt +++ b/Pipes/CMakeLists.txt @@ -49,64 +49,65 @@ endif() find_package(OpenMP REQUIRED) find_package(Eigen3 REQUIRED) find_package(TBB) +find_package(MPI REQUIRED) -add_library(pipePacket STATIC "pipePacket.cpp" "pipePacket.hpp") +add_library(pipePacket STATIC pipePacket.cpp) target_link_libraries(pipePacket PUBLIC simplexBase) target_include_directories(pipePacket PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/Complex) -add_library(basePipe STATIC "basePipe.cpp" "basePipe.hpp") +add_library(basePipe STATIC basePipe.cpp) target_link_libraries(basePipe PUBLIC distMatrixPipe neighGraphPipe ripsPipe betaSkeletonBasedComplex betaSubSkeletonComplex upscalePipe slidingWindow fastPersistence - incrementalPersistence naiveWindow qhullPipe delaunayPipe incrementalPipe) + incrementalPersistence naiveWindow qhullPipe delaunayPipe helixPipe helixDistPipe) target_include_directories(basePipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(distMatrixPipe STATIC "distMatrixPipe.cpp" "distMatrixPipe.hpp") +add_library(distMatrixPipe STATIC distMatrixPipe.cpp) target_link_libraries(distMatrixPipe PUBLIC utils basePipe) target_include_directories(distMatrixPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(neighGraphPipe STATIC "neighGraphPipe.cpp" "neighGraphPipe.hpp") +add_library(neighGraphPipe STATIC neighGraphPipe.cpp) target_link_libraries(neighGraphPipe PUBLIC basePipe) target_include_directories(neighGraphPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(ripsPipe STATIC "ripsPipe.cpp" "ripsPipe.hpp") +add_library(ripsPipe STATIC ripsPipe.cpp) target_link_libraries(ripsPipe PUBLIC utils basePipe) target_include_directories(ripsPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(betaSkeletonBasedComplex STATIC "betaSkeletonBasedComplex.cpp" "betaSkeletonBasedComplex.hpp") +add_library(betaSkeletonBasedComplex STATIC betaSkeletonBasedComplex.cpp) target_link_libraries(betaSkeletonBasedComplex PUBLIC basePipe alphaComplex qhullPipe readInput utils kdTree qhullcpp qhull_r) target_include_directories(betaSkeletonBasedComplex PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${qhullext_SOURCE_DIR}/src) -add_library(betaSubSkeletonComplex STATIC "betaSubSkeletonComplex.cpp" "betaSubSkeletonComplex.hpp") +add_library(betaSubSkeletonComplex STATIC betaSubSkeletonComplex.cpp) target_link_libraries(betaSubSkeletonComplex PUBLIC basePipe alphaComplex readInput utils kdTree) target_include_directories(betaSubSkeletonComplex PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(upscalePipe STATIC "upscalePipe.cpp" "upscalePipe.hpp") +add_library(upscalePipe STATIC upscalePipe.cpp) target_link_libraries(upscalePipe PUBLIC basePipe utils) target_include_directories(upscalePipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(slidingWindow STATIC "slidingWindow.cpp" "slidingWindow.hpp") +add_library(slidingWindow STATIC slidingWindow.cpp) target_link_libraries(slidingWindow PUBLIC basePipe readInput utils) target_include_directories(slidingWindow PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(fastPersistence STATIC "fastPersistence.cpp" "fastPersistence.hpp") +add_library(fastPersistence STATIC fastPersistence.cpp) target_link_libraries(fastPersistence PUBLIC basePipe utils unionFind) target_include_directories(fastPersistence PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(incrementalPersistence STATIC "incrementalPersistence.cpp" "incrementalPersistence.hpp") +add_library(incrementalPersistence STATIC incrementalPersistence.cpp) target_link_libraries(incrementalPersistence PUBLIC basePipe simplexBase simplexArrayList utils unionFind) target_include_directories(incrementalPersistence PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(naiveWindow STATIC "naiveWindow.cpp" "naiveWindow.hpp") +add_library(naiveWindow STATIC naiveWindow.cpp) target_link_libraries(naiveWindow PUBLIC basePipe readInput utils) target_include_directories(naiveWindow PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(qhullPipe STATIC "qhullPipe.cpp" "qhullPipe.hpp") +add_library(qhullPipe STATIC qhullPipe.cpp) target_link_libraries(qhullPipe PUBLIC basePipe utils alphaComplex qhullcpp qhull_r) target_include_directories(qhullPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${qhullext_SOURCE_DIR}/src) -add_library(delaunayPipe STATIC "delaunayPipe.cpp" "delaunayPipe.hpp") +add_library(delaunayPipe STATIC delaunayPipe.cpp) target_link_libraries(delaunayPipe PUBLIC basePipe alphaComplex utils CGAL::CGAL OpenMP::OpenMP_CXX) target_include_directories(delaunayPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) @@ -114,12 +115,17 @@ if(APPLE) add_compile_definitions(NO_PARALLEL_ALGORITHMS=1) endif() -add_library(incrementalPipe STATIC "incrementalPipe.cpp" "incrementalPipe.hpp") -target_link_libraries(incrementalPipe PUBLIC basePipe utils OpenMP::OpenMP_CXX) -target_include_directories(incrementalPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIR}) +add_library(helixPipe STATIC helixPipe.cpp) +target_link_libraries(helixPipe PUBLIC basePipe utils OpenMP::OpenMP_CXX) +target_include_directories(helixPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIR}) + +add_library(helixDistPipe STATIC helixDistPipe.cpp) +target_link_libraries(helixDistPipe PUBLIC basePipe readInput writeOutput multifileops utils OpenMP::OpenMP_CXX MPI::MPI_CXX) +target_include_directories(helixDistPipe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIR} ${MPI_INCLUDE_PATH}) if(TBB_FOUND) -target_link_libraries(incrementalPipe PUBLIC TBB::tbb) +target_link_libraries(helixPipe PUBLIC TBB::tbb) +target_link_libraries(helixDistPipe PUBLIC TBB::tbb) endif() diff --git a/Pipes/basePipe.cpp b/Pipes/basePipe.cpp index 36c96b1..31b1d7f 100644 --- a/Pipes/basePipe.cpp +++ b/Pipes/basePipe.cpp @@ -16,13 +16,14 @@ #include "fastPersistence.hpp" #include "ripsPipe.hpp" #include "naiveWindow.hpp" -// #include "betaSkeletonBasedComplex.hpp" -// #include "betaSubSkeletonComplex.hpp" +#include "betaSkeletonBasedComplex.hpp" +#include "betaSubSkeletonComplex.hpp" #include "upscalePipe.hpp" #include "qhullPipe.hpp" #include "slidingWindow.hpp" #include "delaunayPipe.hpp" -#include "incrementalPipe.hpp" +#include "helixPipe.hpp" +#include "helixDistPipe.hpp" template basePipe *basePipe::newPipe(const std::string &pipeType, const std::string &complexType) @@ -65,12 +66,15 @@ basePipe *basePipe::newPipe(const std::string &pipeType, con } else if (pipeType == "upscale") { - std::cout << "Building upscale" << std::endl; return new upscalePipe(); - //} else if (pipeType == "betaSkeletonBasedComplex"){ - // return new betaSkeletonBasedComplex(); - //} else if (pipeType == "betaSubSkeletonComplex"){ - // return new betaSubSkeletonComplex(); + } + else if (pipeType == "betaSkeletonBasedComplex") + { + return new betaSkeletonBasedComplex(); + } + else if (pipeType == "betaSubSkeletonComplex") + { + return new betaSubSkeletonComplex(); } else if (pipeType == "qhullPipe" || pipeType == "qhull" || pipeType == "alpha") { @@ -84,9 +88,13 @@ basePipe *basePipe::newPipe(const std::string &pipeType, con { return new delaunayPipe(); } - else if (pipeType == "incrementalPipe") + else if (pipeType == "helixPipe") + { + return new helixPipe(); + } + else if (pipeType == "helixDistPipe") { - return new incrementalPipe(); + return new helixDistPipe(); } return 0; diff --git a/Pipes/helixDistPipe.cpp b/Pipes/helixDistPipe.cpp new file mode 100644 index 0000000..0240378 --- /dev/null +++ b/Pipes/helixDistPipe.cpp @@ -0,0 +1,370 @@ +#include "helixDistPipe.hpp" +#include "utils.hpp" +#include "readInput.hpp" +#include "writeOutput.hpp" +#include "multifileops.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +#define WRITE_CSV_OUTPUTS + +template +static inline T dot(const std::vector &a, const std::vector &b) { return std::inner_product(a.begin(), a.end(), b.begin(), static_cast(0)); } // Dot product of two vectors + +template +helixDistPipe::helixDistPipe() : dim(0), data_set_size(0) +{ + this->pipeType = "helixDistPipe"; + return; +} + +// Solution to equation of a hyperplane +template +inline std::vector helixDistPipe::solvePlaneEquation(const std::vector &points) +{ + int numPoints = points.size(); + Eigen::MatrixXd A(numPoints, numPoints); + for (int i = 0; i < numPoints; i++) + for (int j = 0; j < numPoints; j++) + A(i, j) = inputData[points[i]][j]; + Eigen::VectorXd coefficients = A.completeOrthogonalDecomposition().solve(Eigen::VectorXd::Ones(numPoints)); + return std::vector(coefficients.data(), coefficients.data() + coefficients.size()); +} + +// Compute the seed simplex for initialization of algorithm +template +std::vector helixDistPipe::first_simplex() +{ + std::vector simplex; + if (data_set_size < dim + 2) + return simplex; // Not enough points + for (short i = 0; i < dim; i++) + simplex.push_back(i); // Pseudo Random initialization of Splitting Hyperplane + auto seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + auto rng = std::default_random_engine{}; + std::vector outer_points; + do + { + simplex.insert(simplex.end(), outer_points.begin(), outer_points.end()); + std::shuffle(simplex.begin(), simplex.end(), rng); + outer_points.clear(); + simplex.erase(simplex.begin() + dim, simplex.end()); + auto equation = solvePlaneEquation(simplex); + for (unsigned i = 0; i < data_set_size; i++) + if (std::find(simplex.begin(), simplex.end(), i) == simplex.end() && dot(inputData[i], equation) > 1) + outer_points.push_back(i); + } while (!outer_points.empty()); // Converge Hyperplane to Convex Hull + double radius = 0; + std::vector center; + for (short i = 0; i < data_set_size; i++) // BruteForce to Find last point for construction of simplex. + { + if (std::find(simplex.begin(), simplex.end(), i) != simplex.end()) + continue; + simplex.push_back(i); + center = utils::circumCenter(simplex, inputData); + radius = utils::vectors_distance(center, inputData[i]); + short point; + for (point = 0; point < data_set_size; point++) + { + if (std::find(simplex.begin(), simplex.end(), point) == simplex.end() && utils::vectors_distance(center, inputData[point]) < radius) + break; + } + if (point == data_set_size) + break; + simplex.pop_back(); + } + std::sort(simplex.begin(), simplex.end()); + return simplex; +} + +// Perform DFS walk on the cospherical region +template +void helixDistPipe::cospherical_handler(std::vector &simp, int &tp, short &omission) +{ + auto triangulation_point = tp; + auto temp = simp; + temp.push_back(triangulation_point); + std::sort(temp.begin(), temp.end()); + spherical_dsimplexes.insert(temp); + for (auto i : temp) + { + auto new_face = temp; + new_face.erase(std::find(new_face.begin(), new_face.end(), i)); + if (simp == new_face) + continue; + auto new_point = expand_d_minus_1_simplex(new_face, i); + if (new_point == -1) + continue; + new_face.push_back(new_point); + std::sort(new_face.begin(), new_face.end()); + spherical_dsimplexes.insert(new_face); + } +} + +#if 1 +// Compute the P_newpoint for provided Facet, P_context Pair +template +short helixDistPipe::expand_d_minus_1_simplex(std::vector &simp, short &omission) +{ + auto normal = solvePlaneEquation(simp); + auto p1 = utils::circumCenter(simp, inputData); + bool direction = (dot(normal, inputData[omission]) > 1); + std::vector radius_vec(data_set_size, 0); + double largest_radius = 0, smallest_radius = std::numeric_limits::max() / 2, ring_radius = utils::vectors_distance(p1, inputData[simp[0]]); + int triangulation_point = -1; + bool flag = true; + std::set simplex(simp.begin(), simp.end()); + simplex.insert(omission); + for (auto &new_point : search_space) + { + if (simplex.find(new_point) == simplex.end() && (direction ^ dot(normal, inputData[new_point]) > 1)) + { + simp.push_back(new_point); + auto temp = utils::vectors_distance(p1, inputData[new_point]); + if (ring_radius > temp) + { + auto temp_radius = utils::circumRadius(simp, distMatrix); + radius_vec[new_point] = (temp_radius >= 0) ? sqrt(temp_radius) : utils::vectors_distance(utils::circumCenter(simp, inputData), inputData[new_point]); + if (largest_radius < radius_vec[new_point]) + { + largest_radius = radius_vec[new_point]; + triangulation_point = new_point; + flag = false; + } + } + else if (flag && 2 * smallest_radius > temp) // reduce search space + { + auto temp_radius = utils::circumRadius(simp, distMatrix); + radius_vec[new_point] = (temp_radius >= 0) ? sqrt(temp_radius) : utils::vectors_distance(utils::circumCenter(simp, inputData), inputData[new_point]); + if (smallest_radius > radius_vec[new_point]) + { + smallest_radius = radius_vec[new_point]; + triangulation_point = new_point; + } + } + simp.pop_back(); + } + } + int count = 0; + if (triangulation_point != -1) + { + double triangulation_radius = radius_vec[triangulation_point]; + count = std::count_if(radius_vec.begin(), radius_vec.end(), [triangulation_radius](double val) + { return std::abs(1 - (val / triangulation_radius)) <= 0.000000000001; }); + if (count != 1) + { + // cospherical_handler(simp,triangulation_point,omission,distMatrix); + return -1; + } + } + return triangulation_point; +} + +#else +// Compute the P_newpoint for provided Facet, P_context Pair +short helixDistPipe::expand_d_minus_1_simplex(std::vector &simp, short &omission) +{ + // Default values + short triangulation_point = -1; + double largest_radius = 0; + double smallest_radius = std::numeric_limits::max() / 2; + + // Modify search_spaceF + search_space.erase(omission); + for (const auto &point : simp) + search_space.erase(point); + + // Mandatory requirements + auto normal = solvePlaneEquation(simp); + bool direction = dot(normal, inputData[omission]) > 1; + + auto center = circumCenter(simp, inputData); + + double ring_radius = vectors_distance(center, inputData[simp[0]]); + + bool flag = true; + return triangulation_point; +} +#endif + +void reduce(std::set> &outer_dsimplexes, std::map, short> &inner_d_1_shell, std::vector> &dsimplexes) +{ + std::map, short> outer_d_1_shell; + for (auto &new_simplex : outer_dsimplexes) + { + dsimplexes.push_back(new_simplex); + for (short i = 0; i < new_simplex.size(); i++) + { + std::vector key = new_simplex; + key.erase(key.begin() + i); + auto it = outer_d_1_shell.try_emplace(std::move(key), new_simplex[i]); + if (!it.second) + outer_d_1_shell.erase(it.first); // Create new shell and remove collided faces max only 2 can occur. + } + } + outer_dsimplexes.clear(); +#ifndef NO_PARALLEL_ALGORITHMS + std::for_each(std::execution::par_unseq, inner_d_1_shell.begin(), inner_d_1_shell.end(), [&](const auto &simp) + { outer_d_1_shell.erase(simp.first); }); // Remove faces from previous iteration +#else + std::for_each(inner_d_1_shell.begin(), inner_d_1_shell.end(), [&](const auto &simp) + { outer_d_1_shell.erase(simp.first); }); +#endif + inner_d_1_shell.clear(); + std::swap(inner_d_1_shell, outer_d_1_shell); + return; +} + +// run -> Run the configured functions of this line segment +template +void helixDistPipe::runPipe(pipePacket &inData) +{ + inputData = inData.inputData; + distMatrix = inData.distMatrix; + dim = inputData.size() > 0 ? inputData[0].size() : 0; + data_set_size = inputData.size(); + for (unsigned i = 0; i < inputData.size(); i++) + this->search_space.insert(i); + + int numProcesses, rank; + MPI_Comm_size(MPI_COMM_WORLD, &numProcesses); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + // Initialization of algorithm by serial processing + if (rank == 0) + { + // Refresh the directories + std::clog << "MPI configured with " << numProcesses << " processes" << std::endl; + std::filesystem::exists("output") && std::filesystem::remove_all("output"); + std::filesystem::exists("intermediate") && std::filesystem::remove_all("intermediate"); + std::filesystem::exists("input") && std::filesystem::remove_all("input"); + std::filesystem::create_directory("output") && std::filesystem::create_directory("intermediate") && std::filesystem::create_directory("input"); + // Perform initial iteration normally + std::vector> initial_dsimplexes = {first_simplex()}; + std::map, short> inner_d_1_shell; + for (auto &new_simplex : initial_dsimplexes) + { + for (auto &i : new_simplex) + { + std::vector key = new_simplex; + key.erase(std::find(key.begin(), key.end(), i)); + inner_d_1_shell.emplace(key, i); + } + } + // Compute 10000 facets per process to reduce no of iterations + for (int i = 0; i < 3; i++) + { + std::set> outer_dsimplexes; + for (auto &[facet, point] : inner_d_1_shell) + { + std::vector first_vector = facet; + short new_point = expand_d_minus_1_simplex(first_vector, point); + if (new_point == -1) + continue; + first_vector.push_back(new_point); + std::sort(first_vector.begin(), first_vector.end()); + outer_dsimplexes.insert(first_vector); + } + reduce(outer_dsimplexes, inner_d_1_shell, initial_dsimplexes); + } + std::clog << "Iter 0 on process " << rank << " Found " << initial_dsimplexes.size() << " dsimplexes" << std::endl; +#ifdef WRITE_CSV_OUTPUTS + std::sort(initial_dsimplexes.begin(), initial_dsimplexes.end()); + writeOutput::writeBinary(initial_dsimplexes, "output/0_0.bin"); +#endif + initial_dsimplexes.clear(); + if (inner_d_1_shell.empty()) + return; + writeOutput::writeBinary(inner_d_1_shell, "input/1.dat"); + } + + // Restrict other processes from proceeding until serial task is completed + MPI_Barrier(MPI_COMM_WORLD); + + // Compute layer by layer next simplexes + int iter_counter = 1; + while (true) + { + std::map, short> local_d_1_shell_map = readInput::readBinaryMap("input/" + std::to_string(iter_counter) + ".dat", numProcesses, rank); + if (local_d_1_shell_map.empty()) + break; + std::vector> local_dsimplexes_output; + while (!local_d_1_shell_map.empty()) // Compute dsimplexes for individual process + { + auto iter = local_d_1_shell_map.begin(); + std::vector first_vector = iter->first; + short omission = iter->second; + local_d_1_shell_map.erase(iter); + short new_point = expand_d_minus_1_simplex(first_vector, omission); + if (new_point == -1) + continue; + first_vector.insert(std::lower_bound(first_vector.begin(), first_vector.end(), new_point), new_point); + for (auto &i : first_vector) + { + std::vector key = first_vector; + key.erase(std::find(key.begin(), key.end(), i)); + local_d_1_shell_map.erase(key); + } + local_dsimplexes_output.push_back(std::move(first_vector)); + } + + std::clog << "Iter " << iter_counter << " on process " << rank << " Found " << local_dsimplexes_output.size() << " dsimplexes" << std::endl; + // Commit dsimplexes to file + +#ifdef WRITE_CSV_OUTPUTS + if (!local_dsimplexes_output.empty()) + { + std::sort(local_dsimplexes_output.begin(), local_dsimplexes_output.end()); + writeOutput::writeBinary(local_dsimplexes_output, "output/" + std::to_string(iter_counter) + "_" + std::to_string(rank) + ".bin"); + } +#endif + // Compute facets from current layer and store to intermediate file + std::map, short> outer_d_1_shell; + for (auto &new_simplex : local_dsimplexes_output) + { + for (short i = 0; i < new_simplex.size(); i++) + { + std::vector key = new_simplex; + key.erase(key.begin() + i); + auto it = outer_d_1_shell.try_emplace(std::move(key), new_simplex[i]); + it.first->second = (it.second ? new_simplex[i] : -1); + } + } + local_dsimplexes_output.clear(); + writeOutput::writeBinary(outer_d_1_shell, "intermediate/" + std::to_string(rank) + ".dat"); + outer_d_1_shell.clear(); + + // Wait for all process to commit facets + MPI_Barrier(MPI_COMM_WORLD); + + if (rank == 0) + { + // Perform custom multifile sort on the intermediate simplexes also remove duplicate entries + MultiFile, short>> facets("intermediate"); + facets.compressMap("input/" + std::to_string(iter_counter + 1) + ".dat", iter_counter); + } + + MPI_Barrier(MPI_COMM_WORLD); + std::filesystem::remove("intermediate/" + std::to_string(rank) + ".dat"); + iter_counter++; + } +#ifdef WRITE_CSV_OUTPUTS + if (rank == 0) + { + MultiFile> dsimplexes("output"); + std::clog << "Found " << dsimplexes.writeCSV("dsimplexes.csv") << " dsimplexes for datset" << std::endl; + } +#endif + + return; +} + +template class helixDistPipe; +template class helixDistPipe; +template class helixDistPipe; \ No newline at end of file diff --git a/Pipes/helixDistPipe.hpp b/Pipes/helixDistPipe.hpp new file mode 100644 index 0000000..df1eeb9 --- /dev/null +++ b/Pipes/helixDistPipe.hpp @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include +#include "basePipe.hpp" + +// base constructor +template +class helixDistPipe : public basePipe +{ +private: + std::vector> inputData; + std::vector> distMatrix; + std::map, short> inner_d_1_shell_map; + std::set> dsimplexes; + std::set> spherical_dsimplexes; + std::set search_space; + unsigned dim; + unsigned data_set_size; + std::vector solvePlaneEquation(const std::vector &points); + std::vector first_simplex(); + void cospherical_handler(std::vector &simp, int &tp, short &omission); + short expand_d_minus_1_simplex(std::vector &simp_vector, short &omission); + +public: + helixDistPipe(); + void runPipe(pipePacket &inData); +}; diff --git a/Pipes/incrementalPipe.cpp b/Pipes/helixPipe.cpp similarity index 80% rename from Pipes/incrementalPipe.cpp rename to Pipes/helixPipe.cpp index cba3ad6..8a59ca2 100644 --- a/Pipes/incrementalPipe.cpp +++ b/Pipes/helixPipe.cpp @@ -1,4 +1,4 @@ -#include +#include "helixPipe.hpp" #include #include #include @@ -6,14 +6,14 @@ #include #include #include -// #define PARALLEL +#define PARALLEL template T dot(const std::vector &a, const std::vector &b) { return std::inner_product(a.begin(), a.end(), b.begin(), static_cast(0)); } // Dot product of two vectors // Solution to equation of a hyperplane template -std::vector incrementalPipe::solvePlaneEquation(const std::vector &points) +std::vector helixPipe::solvePlaneEquation(const std::vector &points) { int numPoints = points.size(); Eigen::MatrixXd A(numPoints, numPoints); @@ -26,7 +26,7 @@ std::vector incrementalPipe::solvePlaneEquation(const std::vec // Compute the seed simplex for initialization of algorithm template -std::vector incrementalPipe::first_simplex() +std::vector helixPipe::first_simplex() { std::vector simplex; if (this->data_set_size < this->dim + 2) @@ -72,13 +72,13 @@ std::vector incrementalPipe::first_simplex() // Perform DFS walk on the cospherical region template -void incrementalPipe::cospherical_handler(std::vector &simp, int &tp, short &omission, std::vector> &distMatrix) +void helixPipe::cospherical_handler(std::vector &simp, int &tp, short &omission, std::vector> &distMatrix) { auto triangulation_point = tp; auto temp = simp; temp.push_back(triangulation_point); std::sort(temp.begin(), temp.end()); - this->dsimplexes.insert(temp); + this->spherical_dsimplexes.insert(temp); for (auto i : temp) { auto new_face = temp; @@ -90,13 +90,13 @@ void incrementalPipe::cospherical_handler(std::vector &simp, in continue; new_face.push_back(new_point); std::sort(new_face.begin(), new_face.end()); - this->dsimplexes.insert(new_face); + this->spherical_dsimplexes.insert(new_face); } } // Compute the P_newpoint for provided Facet, P_context Pair template -int incrementalPipe::expand_d_minus_1_simplex(std::vector &simp, short &omission, std::vector> &distMatrix) +int helixPipe::expand_d_minus_1_simplex(std::vector &simp, short &omission, std::vector> &distMatrix) { auto normal = solvePlaneEquation(simp); auto p1 = utils::circumCenter(simp, this->inputData); @@ -109,13 +109,14 @@ int incrementalPipe::expand_d_minus_1_simplex(std::vector &simp simplex.insert(omission); for (auto &new_point : this->search_space) { - if (simplex.find(new_point) == simplex.end() && (direction ^ (dot(normal, inputData[new_point]) > 1))) + if (simplex.find(new_point) == simplex.end() && (direction ^ dot(normal, inputData[new_point]) > 1)) { simp.push_back(new_point); auto temp = utils::vectors_distance(p1, inputData[new_point]); if (ring_radius > temp) { - radius_vec[new_point] = sqrt(utils::circumRadius(simp, distMatrix)); + auto temp_radius = utils::circumRadius(simp, distMatrix); + radius_vec[new_point] = (temp_radius >= 0) ? sqrt(temp_radius) : utils::vectors_distance(utils::circumCenter(simp, this->inputData), inputData[new_point]); if (largest_radius < radius_vec[new_point]) { largest_radius = radius_vec[new_point]; @@ -125,7 +126,8 @@ int incrementalPipe::expand_d_minus_1_simplex(std::vector &simp } else if (flag && 2 * smallest_radius > temp) // reduce search space { - radius_vec[new_point] = sqrt(utils::circumRadius(simp, distMatrix)); + auto temp_radius = utils::circumRadius(simp, distMatrix); + radius_vec[new_point] = (temp_radius >= 0) ? sqrt(temp_radius) : utils::vectors_distance(utils::circumCenter(simp, this->inputData), inputData[new_point]); if (smallest_radius > radius_vec[new_point]) { smallest_radius = radius_vec[new_point]; @@ -143,14 +145,26 @@ int incrementalPipe::expand_d_minus_1_simplex(std::vector &simp { return std::abs(1 - (val / triangulation_radius)) <= 0.000000000001; }); if (count != 1) { - // cospherical_handler(simp,triangulation_point,omission,distMatrix); +#ifdef PARALLEL + { + std::cout << "Triangulation radius is " << triangulation_radius << std::endl; +#pragma omp critical + { + // cospherical_handler(simp,triangulation_point,omission,distMatrix); + } + } +#else + { + // cospherical_handler(simp,triangulation_point,omission,distMatrix); + } +#endif return -1; } } return triangulation_point; } -void reduce(std::set> &outer_dsimplexes, std::vector, short>> &inner_d_1_shell, std::set> &dsimplexes) +void reduce(std::set> &outer_dsimplexes, std::vector, short>> &inner_d_1_shell, std::vector> &dsimplexes) { std::map, short> outer_d_1_shell; for (auto &new_simplex : outer_dsimplexes) @@ -181,7 +195,7 @@ void reduce(std::set> &outer_dsimplexes, std::vector Run the configured functions of this pipeline segment template -void incrementalPipe::runPipe(pipePacket &inData) +void helixPipe::runPipe(pipePacket &inData) { this->inputData = inData.inputData; this->dim = inputData[0].size(); @@ -224,7 +238,7 @@ void incrementalPipe::runPipe(pipePacket &inData) } #else { - for (auto &new_simplex : dsimplexes) + for (auto &new_simplex : this->dsimplexes) { for (auto &i : new_simplex) { @@ -244,8 +258,7 @@ void incrementalPipe::runPipe(pipePacket &inData) continue; first_vector.push_back(new_point); std::sort(first_vector.begin(), first_vector.end()); - if (!this->dsimplexes.insert(first_vector).second) - continue; + this->dsimplexes.push_back(first_vector); for (auto &i : first_vector) { if (i == new_point) @@ -265,15 +278,15 @@ void incrementalPipe::runPipe(pipePacket &inData) // basePipe constructor template -incrementalPipe::incrementalPipe() : inputData(*(new std::vector>())) +helixPipe::helixPipe() : inputData(*(new std::vector>())) { - this->pipeType = "incrementalPipe"; + this->pipeType = "helixPipe"; return; } // configPipe -> configure the function settings of this pipeline segment template -bool incrementalPipe::configPipe(std::map &configMap) +bool helixPipe::configPipe(std::map &configMap) { std::string strDebug; @@ -290,13 +303,13 @@ bool incrementalPipe::configPipe(std::map &c this->ut = utils(strDebug, this->outputFile); this->configured = true; - this->ut.writeDebug("incrementalPipe", "Configured with parameters { eps: " + configMap["epsilon"] + " , debug: " + strDebug + ", outputFile: " + this->outputFile + " }"); + this->ut.writeDebug("helixPipe", "Configured with parameters { eps: " + configMap["epsilon"] + " , debug: " + strDebug + ", outputFile: " + this->outputFile + " }"); return true; } // outputData -> used for tracking each stage of the pipeline's data output without runtime template -void incrementalPipe::outputData(pipePacket &inData) +void helixPipe::outputData(pipePacket &inData) { std::ofstream file; file.open("output/" + this->pipeType + "_output.csv"); @@ -306,6 +319,6 @@ void incrementalPipe::outputData(pipePacket &inData) return; } -template class incrementalPipe; -template class incrementalPipe; -template class incrementalPipe; \ No newline at end of file +template class helixPipe; +template class helixPipe; +template class helixPipe; \ No newline at end of file diff --git a/Pipes/helixPipe.hpp b/Pipes/helixPipe.hpp new file mode 100644 index 0000000..134739a --- /dev/null +++ b/Pipes/helixPipe.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "basePipe.hpp" +#include "utils.hpp" + +// basePipe constructor +template +class helixPipe : public basePipe { + private: + std::vector>& inputData; + std::map, short> inner_d_1_shell; + std::vector> dsimplexes; + std::set> spherical_dsimplexes; + std::vector search_space; + unsigned dim; + unsigned data_set_size; + std::vector solvePlaneEquation(const std::vector &points); + std::vector first_simplex(); + void cospherical_handler(std::vector &simp, int &tp, short &omission, std::vector>& distMatrix); + int expand_d_minus_1_simplex(std::vector &simp_vector, short &omission, std::vector>& distMatrix); + public: + helixPipe(); + void runPipe(pipePacket& inData); + bool configPipe(std::map &configMap); + void outputData(pipePacket&); +}; diff --git a/Pipes/incrementalPipe.hpp b/Pipes/incrementalPipe.hpp deleted file mode 100644 index f5a271d..0000000 --- a/Pipes/incrementalPipe.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "basePipe.hpp" -#include "utils.hpp" - -// basePipe constructor -template -class incrementalPipe : public basePipe -{ -private: - std::vector> &inputData; - std::map, short> inner_d_1_shell; - std::set> dsimplexes; - std::vector search_space; - unsigned dim; - unsigned data_set_size; - std::vector solvePlaneEquation(const std::vector &points); - std::vector first_simplex(); - void cospherical_handler(std::vector &simp, int &tp, short &omission, std::vector> &distMatrix); - int expand_d_minus_1_simplex(std::vector &simp_vector, short &omission, std::vector> &distMatrix); - -public: - incrementalPipe(); - void runPipe(pipePacket &inData); - bool configPipe(std::map &configMap); - void outputData(pipePacket &); -}; diff --git a/Preprocessing/CMakeLists.txt b/Preprocessing/CMakeLists.txt index c38bbd9..a21c55e 100644 --- a/Preprocessing/CMakeLists.txt +++ b/Preprocessing/CMakeLists.txt @@ -4,35 +4,35 @@ cmake_minimum_required(VERSION 3.5) -add_library(cluster STATIC cluster.cpp cluster.hpp) +add_library(cluster STATIC cluster.cpp) target_link_libraries(cluster PUBLIC utils kMeansPlusPlus) target_include_directories(cluster PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(preprocessor STATIC preprocessor.cpp preprocessor.hpp) +add_library(preprocessor STATIC preprocessor.cpp) target_link_libraries(preprocessor PUBLIC kMeansPlusPlus pipePacket) target_include_directories(preprocessor PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(kMeansPlusPlus STATIC kMeansPlusPlus.cpp kMeansPlusPlus.hpp) +add_library(kMeansPlusPlus STATIC kMeansPlusPlus.cpp) target_link_libraries(kMeansPlusPlus PUBLIC utils preprocessor cluster) target_include_directories(kMeansPlusPlus PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(streamingKmeans STATIC streamingKmeans.cpp streamingKmeans.hpp) +add_library(streamingKmeans STATIC streamingKmeans.cpp) target_link_libraries(streamingKmeans PUBLIC utils streamingUtils preprocessor) target_include_directories(streamingKmeans PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(streamingUtils STATIC streamingUtils.cpp streamingUtils.hpp) +add_library(streamingUtils STATIC streamingUtils.cpp) target_link_libraries(streamingUtils PUBLIC utils preprocessor) target_include_directories(streamingUtils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(denStream STATIC denStream.cpp denStream.hpp) +add_library(denStream STATIC denStream.cpp) target_link_libraries(denStream PUBLIC utils dbscan preprocessor) target_include_directories(denStream PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(dbscan STATIC dbscan.cpp dbscan.hpp) +add_library(dbscan STATIC dbscan.cpp) target_link_libraries(dbscan PUBLIC utils kdTree kDTree) target_include_directories(dbscan PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(kdTree STATIC kdTree.cpp kdTree.hpp) +add_library(kdTree STATIC kdTree.cpp) target_link_libraries(kdTree PUBLIC utils) target_include_directories(kdTree PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/Utils/CMakeLists.txt b/Utils/CMakeLists.txt index 65b5a9f..0125f73 100644 --- a/Utils/CMakeLists.txt +++ b/Utils/CMakeLists.txt @@ -12,23 +12,27 @@ if(APPLE) add_compile_definitions(NO_PARALLEL_ALGORITHMS=1) endif() -add_library(utils STATIC utils.cpp utils.hpp) +add_library(utils STATIC utils.cpp) target_link_libraries(utils PUBLIC kdTree Eigen3::Eigen OpenMP::OpenMP_CXX) target_include_directories(utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS}) -add_library(unionFind STATIC unionFind.cpp unionFind.hpp) +add_library(unionFind STATIC unionFind.cpp) target_include_directories(unionFind PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(readInput STATIC readInput.cpp readInput.hpp) +add_library(readInput STATIC readInput.cpp) target_include_directories(readInput PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(argParser STATIC argParser.cpp argParser.hpp) +add_library(argParser STATIC argParser.cpp) target_include_directories(argParser PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(writeOutput STATIC writeOutput.cpp writeOutput.hpp) +add_library(writeOutput STATIC writeOutput.cpp) target_link_libraries(writeOutput PUBLIC utils) target_include_directories(writeOutput PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +add_library(multifileops STATIC multifileops.cpp) +target_link_libraries(multifileops PUBLIC utils) +target_include_directories(multifileops PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + INSTALL (TARGETS utils LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) diff --git a/Utils/multifileops.cpp b/Utils/multifileops.cpp new file mode 100644 index 0000000..f025440 --- /dev/null +++ b/Utils/multifileops.cpp @@ -0,0 +1,302 @@ +#include +#include +#include +#include +#include +#include +#include +#include "multifileops.hpp" + +/** + * @brief Construct a new Vector File:: Vector File object + * + * @param filename + */ +VectorFile::VectorFile(const std::string &filename) : fileStream(filename, std::ios::in) +{ + if (!fileStream.is_open()) + throw std::runtime_error("Failed to open the file."); + updateCache(); +}; + +/** + * @brief + * + * @param other + * @return true + * @return false + */ +bool VectorFile::operator<(const VectorFile &other) const +{ + return cache < other.cache; +}; + +/** + * @brief + * + * @return true + * @return false + */ +bool VectorFile::updateCache() +{ + std::string line; + if (std::getline(fileStream, line)) + { + cache.clear(); + std::string cell; + std::istringstream lineStream(line); + while (std::getline(lineStream, cell, ',')) + cache.push_back(std::stoi(cell)); + return true; + } + else + { + fileStream.close(); + return false; + } +}; + +/** + * @brief Construct a new Vector Binary File:: Vector Binary File object + * + * @param filename + */ +VectorBinaryFile::VectorBinaryFile(const std::string &filename) : fileStream(filename, std::ios::in | std::ios::binary) +{ + if (!fileStream.is_open()) + throw std::runtime_error("Failed to open the file."); + fileStream.read(reinterpret_cast(&dim1), sizeof(dim1)); + fileStream.read(reinterpret_cast(&dim2), sizeof(dim2)); + cache.resize(dim2); + updateCache(); +}; + +/** + * @brief + * + * @param other + * @return true + * @return false + */ +bool VectorBinaryFile::operator<(const VectorBinaryFile &other) const +{ + return cache < other.cache; +}; + +/** + * @brief + * + * @return true + * @return false + */ +bool VectorBinaryFile::updateCache() +{ + if (dim1 > 0) + { + fileStream.read(reinterpret_cast(cache.data()), dim2 * sizeof(short)); + dim1--; + return true; + } + else + { + fileStream.close(); + return false; + } +}; + +/** + * @brief Construct a new Map Binary File:: Map Binary File object + * + * @param filename + */ +MapBinaryFile::MapBinaryFile(const std::string &filename) : fileStream(filename, std::ios::in | std::ios::binary) +{ + if (!fileStream.is_open()) + throw std::runtime_error("Failed to open the file." + filename); + fileStream.read(reinterpret_cast(&mapSize), sizeof(mapSize)); + fileStream.read(reinterpret_cast(&vectorSize), sizeof(vectorSize)); + cache.first.resize(vectorSize); + updateCache(); +}; + +/** + * @brief + * + * @param other + * @return true + * @return false + */ +bool MapBinaryFile::operator<(const MapBinaryFile &other) const +{ + return cache.first < other.cache.first; +}; + +/** + * @brief + * + * @return true + * @return false + */ +bool MapBinaryFile::updateCache() +{ + if (mapSize > 0) + { + fileStream.read(reinterpret_cast(cache.first.data()), vectorSize * sizeof(short)); + fileStream.read(reinterpret_cast(&cache.second), sizeof(short)); + mapSize--; + return true; + } + else + { + fileStream.close(); + return false; + } +}; + +/** + * @brief + * + * @tparam FileType + * @tparam baseType + * @return true + * @return false + */ +template +bool MultiFile::readValue() +{ + if (fileDataMap.empty()) + return false; + auto minElementIt = std::min_element(fileDataMap.begin(), fileDataMap.end()); + curr_element = minElementIt->cache; + if (!minElementIt->updateCache()) + fileDataMap.erase(minElementIt); + return true; +}; + +/** + * @brief + * + * @tparam FileType + * @tparam baseType + * @return true + * @return false + */ +template <> +bool MultiFile,short>>::readUnique() +{ + while (readValue()) + { + bool isUnique = true; + for (auto it = fileDataMap.begin(); it != fileDataMap.end(); ++it) + { + if (curr_element.first == it->cache.first) + { + isUnique &= curr_element.second == it->cache.second; + if (!it->updateCache()) + fileDataMap.erase(it--); + } + } + if (isUnique) + return true; + } + return false; +}; + +/** + * @brief Construct a new Multi F Ile< File Type, base Type>:: Multi F Ile object + * + * @tparam FileType + * @tparam baseType + * @param directory + */ +template +MultiFile::MultiFile(const std::string &directory) +{ + if (!std::filesystem::is_directory(directory)) + throw std::invalid_argument("Not a valid directory: " + directory); + for (const auto &entry : std::filesystem::directory_iterator(directory)) + if (entry.is_regular_file()) + fileDataMap.emplace_back(entry.path().string()); +}; + +/** + * @brief + * + * @tparam FileType + * @tparam baseType + * @param outputFileName + * @param iterationCounter + */ +template <> +void MultiFile, short>>::compressMap(const std::string &outputFileName, int iterationCounter) +{ + // Open the output file for writing + std::ofstream outputFile(outputFileName, std::ios::out | std::ios::binary); + + // Read from the previous iteration's data file + MapBinaryFile previousReader("input/" + std::to_string(iterationCounter) + ".dat"); + + // Initialize variables for map size and vector size + size_t mapSize = 0, vectorSize = previousReader.cache.first.size(); + + outputFile.write(reinterpret_cast(&mapSize), sizeof(mapSize)); // Write a placeholder for map size to the output file + outputFile.write(reinterpret_cast(&vectorSize), sizeof(vectorSize)); // Write the size of the vector to facilitate reconstruction + + // Read values and write them to the output file + while (readUnique()) + { + if (curr_element.second != -1) + { + while (previousReader.cache.first < curr_element.first && previousReader.updateCache()) + continue; + // Seek the reader from the previous iteration to the current value + if (previousReader.cache.first != curr_element.first) // Write binary to the file if curr_element was not processed in the previous iteration + { + outputFile.write(reinterpret_cast(curr_element.first.data()), vectorSize * sizeof(short)); + outputFile.write(reinterpret_cast(&curr_element.second), sizeof(short)); + mapSize++; + } + } + } + + // Seek to the beginning of the file and overwrite the mapSize binary with the original value + outputFile.seekp(0); + outputFile.write(reinterpret_cast(&mapSize), sizeof(mapSize)); + outputFile.close(); + + // Close the file streams and remove the previous iteration's data file + previousReader.fileStream.close(); + std::filesystem::remove("input/" + std::to_string(iterationCounter) + ".dat"); +}; + +/** + * @brief + * + * @tparam FileType + * @tparam baseType + * @param outputFileName + * @return size_t + */ +template <> +size_t MultiFile>::writeCSV(const std::string &outputFileName) +{ + std::ofstream outputFile(outputFileName, std::ios::out); + size_t size = 0; + while (readValue()) + { + for (auto it = fileDataMap.begin(); it != fileDataMap.end(); ++it) + { + if (curr_element == it->cache && !it->updateCache()) + fileDataMap.erase(it--); + } + for (size_t i = 0; i < curr_element.size() - 1; ++i) + outputFile << curr_element[i] << ','; // Add a comma to separate values (CSV format) + outputFile << curr_element[curr_element.size() - 1] << '\n'; // Add a newline character to separate rows + size++; + } + outputFile.close(); + return size; +}; + +template class MultiFile, short>>; +template class MultiFile>; \ No newline at end of file diff --git a/Utils/multifileops.hpp b/Utils/multifileops.hpp new file mode 100644 index 0000000..120397d --- /dev/null +++ b/Utils/multifileops.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include + +struct VectorFile +{ + std::ifstream fileStream; + std::vector cache; + + VectorFile(const std::string &filename); + bool operator<(const VectorFile &other) const; + bool updateCache(); +}; + +struct VectorBinaryFile +{ + std::ifstream fileStream; + std::vector cache; + size_t dim1; + size_t dim2; + + VectorBinaryFile(const std::string &filename); + bool operator<(const VectorBinaryFile &other) const; + bool updateCache(); +}; + +struct MapBinaryFile +{ + std::pair, short> cache; + std::ifstream fileStream; + size_t mapSize; + size_t vectorSize; + + MapBinaryFile(const std::string &filename); + bool operator<(const MapBinaryFile &other) const; + bool updateCache(); +}; + +template +class MultiFile +{ +private: + std::vector fileDataMap; + baseType curr_element; + bool readValue(); + bool readUnique(); + +public: + MultiFile(const std::string &directory); + void compressMap(const std::string &outputFileName, int iterationCounter); + size_t writeCSV(const std::string &outputFileName); +}; \ No newline at end of file diff --git a/Utils/readInput.cpp b/Utils/readInput.cpp index c5fbcc9..b78b9b0 100644 --- a/Utils/readInput.cpp +++ b/Utils/readInput.cpp @@ -196,6 +196,90 @@ std::vector> readInput::readMAT(const std::string &filename) return result; } +/** + * @brief + * + * @param filename + * @param numProcesses + * @param rank + * @return std::map, short> + */ +std::map, short> readInput::readBinaryMap(const std::string &filename, int numProcesses, int rank) +{ + std::ifstream file(filename, std::ios::in | std::ios::binary); + if (!file.is_open()) + { + std::cerr << "Failed to open the file for reading." << std::endl; + return {}; + } + + std::map, short> data; + + size_t mapSize; + size_t vectorSize; + + // Read the size of the map and vector + file.read(reinterpret_cast(&mapSize), sizeof(mapSize)); + file.read(reinterpret_cast(&vectorSize), sizeof(vectorSize)); + size_t block_size = ceil((double) mapSize / numProcesses); + size_t end = block_size * (rank+1); + size_t start = (rank == 0) ? 0 : end - block_size; + + // Read data using standard file operations + std::vector key(vectorSize); + short value; + + // Seek to the correct position in the file + file.seekg(sizeof(size_t) * 2 + (start * (vectorSize + 1)) * sizeof(short)); + + for (size_t i = start; i < end; ++i) + { + file.read(reinterpret_cast(key.data()), vectorSize * sizeof(short)); + file.read(reinterpret_cast(&value), sizeof(short)); + data.emplace(key, value); + } + + file.close(); + return data; +} + +/** + * @brief + * + * @param filename + * @return std::vector> + */ +std::vector> readInput::readBinaryVector(const std::string &filename) +{ + std::ifstream file(filename, std::ios::in | std::ios::binary); + + if (!file.is_open()) + { + std::cerr << "Failed to open the file for reading." << std::endl; + return {}; + } + + size_t dim1; + size_t dim2; + + // Read the dimensions of the vector + file.read(reinterpret_cast(&dim1), sizeof(dim1)); + file.read(reinterpret_cast(&dim2), sizeof(dim2)); + + std::vector> data; + data.reserve(dim1); + + while (dim1-- > 0) + { + std::vector vec(dim2); + file.read(reinterpret_cast(vec.data()), dim2 * sizeof(short)); + data.emplace_back(vec); + } + + file.close(); + return data; +} + /** @brief Initializes the input stream for reading from a file. @@ -234,4 +318,4 @@ bool readInput::streamRead(std::vector &row) return true; else return false; -} +} \ No newline at end of file diff --git a/Utils/readInput.hpp b/Utils/readInput.hpp index ddcaf78..ef5c8d2 100644 --- a/Utils/readInput.hpp +++ b/Utils/readInput.hpp @@ -6,6 +6,7 @@ #include #include #include +#include // Header file for readInput class - see readInput.cpp for descriptions @@ -20,6 +21,8 @@ class readInput readInput(); static std::vector> readCSV(const std::string &filename); static std::vector> readMAT(const std::string &filename); + static std::map, short> readBinaryMap(const std::string &filename, int numProcesses = 1, int rank = 0); + static std::vector> readBinaryVector(const std::string &filename); bool streamInit(const std::string &filename); bool streamRead(std::vector &); }; diff --git a/Utils/utils.cpp b/Utils/utils.cpp index 387c6f8..0666f82 100644 --- a/Utils/utils.cpp +++ b/Utils/utils.cpp @@ -830,32 +830,25 @@ std::vector utils::circumCenter(const std::vector &simplex, const Eigen::VectorXd matC(m, 1); std::vector circumCenter(n, 0.0); const Eigen::VectorXd &Sn = Eigen::Map(inputData[simplex.back()].data(), n); - int ii = 0; - for (auto i = simplex.begin(); i != simplex.end() - 1; ++i) + for (int i = 0; i < m - 1; ++i) { - int temp = ii; - const Eigen::VectorXd &d1 = Eigen::Map(inputData[*i].data(), n) - Sn; - for (auto j = i; j != simplex.end() - 1; ++j, ++temp) + const Eigen::VectorXd &d1 = Eigen::Map(inputData[simplex[i]].data(), n) - Sn; + for (int j = i; j < m - 1; ++j) { - const double dotProduct = d1.dot(Eigen::Map(inputData[*j].data(), n) - Sn); - matA(ii, temp) = dotProduct; - matA(temp, ii) = dotProduct; - if (i == j) - matC(ii) = dotProduct / 2.0; + const double dotProduct = d1.dot(Eigen::Map(inputData[simplex[j]].data(), n) - Sn); + matA(i, j) = dotProduct; + matA(j, i) = dotProduct; } - matA(ii, m - 1) = 0; - ii++; } - matA.row(ii).setConstant(1); - matC(ii) = 1; + matA.col(m - 1).setZero(); + matA.row(m - 1).setOnes(); + matC = matA.diagonal() / 2; + matC(m - 1) = 1; // Solve the linear Eigen::VectorXd rawCircumCenter = matA.inverse() * matC; - for (int i = 0; i < n; i++) - { - auto index = simplex.begin(); - for (int j = 0; j < m; ++j, ++index) - circumCenter[i] += rawCircumCenter(j) * inputData[*index][i]; - } + for (int i = 0; i < m; ++i) + for (int j = 0; j < n; ++j) + circumCenter[j] += rawCircumCenter(i) * inputData[simplex[i]][j]; return circumCenter; } @@ -868,19 +861,12 @@ std::vector utils::circumCenter(const std::vector &simplex, const */ double utils::circumRadius(const std::vector &simplex, const std::vector> &distMatrix) { - unsigned n = simplex.size(); + const unsigned n = simplex.size(); Eigen::MatrixXd matA(n, n); Eigen::MatrixXd matACap(n + 1, n + 1); - unsigned ii = 0; - for (auto i : simplex) - { - unsigned temp = 0; - for (auto j : simplex) - { - matA(ii, temp++) = (i <= j) ? distMatrix[i][j] * distMatrix[i][j] : distMatrix[j][i] * distMatrix[j][i]; - } - ii++; - } + for (unsigned i = 0; i < n; ++i) + for (unsigned j = 0; j < n; ++j) + matA(i, j) = simplex[i] <= simplex[j] ? distMatrix[simplex[i]][simplex[j]] : distMatrix[simplex[j]][simplex[i]]; matACap.block(1, 1, n, n) = matA; matACap.col(0).setConstant(1); matACap.row(0).setConstant(1); @@ -1767,7 +1753,7 @@ std::vector utils::serialize(const std::vector> &ori * @param beta * @return std::pair>, std::vector> */ -std::pair>, std::vector> utils::calculateBetaCentersandRadius(const std::vector &dsimplex, std::vector> &inputData, const std::vector> *distMatrix, double beta) +std::pair>, std::vector> utils::calculateBetaCentersandRadius(const std::vector &dsimplex, const std::vector> &inputData, const std::vector> *distMatrix, double beta) { std::vector> betacenters; std::vector betaradii; diff --git a/Utils/utils.hpp b/Utils/utils.hpp index f9610c5..62ad7c1 100644 --- a/Utils/utils.hpp +++ b/Utils/utils.hpp @@ -117,7 +117,7 @@ class utils template static std::set extractBoundaryPoints(const std::vector &); - static std::vector mapPartitionIndexing(const std::vector &, std::vector); + static std::vector mapPartitionIndexing(const std::vector &, std::vector); // Const static void print2DVector(const std::vector> &); static void print1DVector(const std::vector &); static void print1DVector(const std::set &); @@ -140,7 +140,7 @@ class utils static bool sortBySecond(const std::pair, double> &, const std::pair, double> &); - static double determinantOfMatrix(std::vector> mat, unsigned n); + static double determinantOfMatrix(std::vector> mat, unsigned n); // Const // Alpha (delaunay) static double circumRadius(const std::set &simplex, const std::vector> *distMatrix); static double circumRadius(const std::vector &simplex, const std::vector> &distMatrix); @@ -148,13 +148,13 @@ class utils static std::vector circumCenter(const std::vector &simplex, const std::vector> &inputData); static double simplexVolume(const std::set &simplex, const std::vector> *distMatrix, int dd); static double simplexVolume(const std::vector> &mat); - static std::vector> inverseOfMatrix(std::vector> mat, int n); + static std::vector> inverseOfMatrix(std::vector> mat, int n); // Const static std::vector> matrixMultiplication(const std::vector> &matA, const std::vector> &matB); - static std::pair, std::vector>> nullSpaceOfMatrix(const std::set &simplex, const std::vector> &inputdata, std::vector cc, double radius, bool lowerdimension = false); + static std::pair, std::vector>> nullSpaceOfMatrix(const std::set &simplex, const std::vector> &inputdata, std::vector cc, double radius, bool lowerdimension = false); // Const static std::vector> betaNeighbors(const std::vector> &, double beta, const std::string &betaMode); static std::vector> betaCentersCalculation(const std::vector &hpcoff, double beta, double circumRadius, const std::vector &circumCenter); - static std::pair>, std::vector> calculateBetaCentersandRadius(const std::vector &simplex, std::vector> &inputData, const std::vector> *distMatrix, double beta); + static std::pair>, std::vector> calculateBetaCentersandRadius(const std::vector &simplex, const std::vector> &inputData, const std::vector> *distMatrix, double beta); static std::vector serialize(const std::vector> &); static std::vector> deserialize(const std::vector &, unsigned); diff --git a/Utils/writeOutput.cpp b/Utils/writeOutput.cpp index 21d4ca1..803421b 100644 --- a/Utils/writeOutput.cpp +++ b/Utils/writeOutput.cpp @@ -15,21 +15,23 @@ #include #include #include "writeOutput.hpp" + /** * @brief Construct a new write Output::write Output object - * + * */ // writeOutput constructor, currently no needed information for the class constructor writeOutput::writeOutput() { } + /** - * @brief - * - * @param stats - * @param filename - * @return true - * @return false + * @brief + * + * @param stats + * @param filename + * @return true + * @return false */ // writeStat -> write the pipeline statistics to a csv formatted file bool writeOutput::writeStats(const std::string &stats, const std::string &filename) @@ -40,13 +42,14 @@ bool writeOutput::writeStats(const std::string &stats, const std::string &filena file.close(); return true; } + /** - * @brief - * - * @param stats - * @param filename - * @return true - * @return false + * @brief + * + * @param stats + * @param filename + * @return true + * @return false */ // writeRunLog -> write the run log to a csv formatted file bool writeOutput::writeRunLog(const std::string &stats, const std::string &filename) @@ -57,39 +60,46 @@ bool writeOutput::writeRunLog(const std::string &stats, const std::string &filen file.close(); return true; } + /** - * @brief - * - * @param data - * @param filename - * @return true - * @return false + * @brief + * + * @param data + * @param filename + * @return true + * @return false */ bool writeOutput::writeBarcodes(const std::vector &data, const std::string &filename) { std::ofstream file(filename + ".csv"); file << "dimension,birth,death\n"; - for (auto& row : data) + for (auto &row : data) file << std::to_string(row.bettiDim) << "," << std::to_string(row.birth) << "," << std::to_string(row.death) << '\n'; file.close(); return true; } + /** - * @brief - * - * @param data - * @param filename - * @return true - * @return false + * @brief + * + * @param data + * @param filename + * @return true + * @return false */ // writeCSV -> write a csv formatted file of data input bool writeOutput::writeCSV(const std::vector> &data, const std::string &filename) { - std::ofstream file(filename + ".csv"); + std::ofstream file(filename, std::ios::app); + if (!file.is_open()) + { + std::cerr << "Failed to open the file for writing: " << filename << std::endl; + return false; + } - for (auto& row : data) + for (auto &row : data) { for (size_t i = 0; i < row.size() - 1; ++i) { @@ -101,13 +111,14 @@ bool writeOutput::writeCSV(const std::vector> &data, const s file.close(); return true; } + /** - * @brief - * - * @param data - * @param filename - * @return true - * @return false + * @brief + * + * @param data + * @param filename + * @return true + * @return false */ // writeCSV -> write a csv formatted file of data input bool writeOutput::writeCSV(const std::string &data, const std::string &filename) @@ -117,14 +128,15 @@ bool writeOutput::writeCSV(const std::string &data, const std::string &filename) file.close(); return true; } + /** - * @brief - * - * @param data - * @param filename - * @param header - * @return true - * @return false + * @brief + * + * @param data + * @param filename + * @param header + * @return true + * @return false */ // writeCSV -> write a csv formatted file of data input bool writeOutput::writeCSV(const std::vector> &data, const std::string &filename, const std::string &header) @@ -133,7 +145,7 @@ bool writeOutput::writeCSV(const std::vector> &data, const s file << header; - for (auto& row : data) + for (auto &row : data) { for (size_t i = 0; i < row.size() - 1; ++i) { @@ -145,14 +157,15 @@ bool writeOutput::writeCSV(const std::vector> &data, const s file.close(); return true; } + /** - * @brief - * - * @param data - * @param filename - * @param header - * @return true - * @return false + * @brief + * + * @param data + * @param filename + * @param header + * @return true + * @return false */ // writeCSV -> write a csv formatted file of data input bool writeOutput::writeCSV(const std::string &data, const std::string &filename, const std::string &header) @@ -163,13 +176,85 @@ bool writeOutput::writeCSV(const std::string &data, const std::string &filename, file.close(); return true; } + /** - * @brief - * - * @param data - * @param filename - * @return true - * @return false + * @brief + * + * @param data + * @param filename + */ +void writeOutput::writeBinary(const std::map, short> &data, const std::string &filename) +{ + if (data.empty()) + return; + std::ofstream file(filename, std::ios::out | std::ios::binary); + + if (!file.is_open()) + { + std::cerr << "Failed to open the file for writing." << std::endl; + return; + } + + // Write the size of the map + const size_t mapSize = data.size(); + const size_t vectorSize = data.begin()->first.size(); + + // Write the size of the map and vector + file.write(reinterpret_cast(&mapSize), sizeof(mapSize)); + file.write(reinterpret_cast(&vectorSize), sizeof(vectorSize)); + + // Iterate through the map and write each key-value pair + for (const auto &entry : data) + { + // Write the vector elements + file.write(reinterpret_cast(entry.first.data()), vectorSize * sizeof(short)); + // Write the short value associated with the vector + file.write(reinterpret_cast(&entry.second), sizeof(short)); + } + + file.close(); +} + +/** + * @brief + * + * @param data + * @param filename + */ +void writeOutput::writeBinary(const std::vector> &data, const std::string &filename) +{ + if (data.empty()) + return; + std::ofstream file(filename, std::ios::out | std::ios::binary); + + if (!file.is_open()) + { + std::cerr << "Failed to open the file for writing." << std::endl; + return; + } + + // Write the size of the map + const size_t dim1 = data.size(); + const size_t dim2 = data.begin()->size(); + + // Write the size of the map and vector + file.write(reinterpret_cast(&dim1), sizeof(dim1)); + file.write(reinterpret_cast(&dim2), sizeof(dim2)); + + // Iterate through the map and write each key-value pair + for (const auto &entry : data) + file.write(reinterpret_cast(entry.data()), dim2 * sizeof(short)); + + file.close(); +} + +/** + * @brief + * + * @param data + * @param filename + * @return true + * @return false */ // writeMAT -> write in a mat formatted file of data input bool writeOutput::writeMAT(const std::vector> &data, const std::string &filename) @@ -178,9 +263,9 @@ bool writeOutput::writeMAT(const std::vector> &data, const s file << std::to_string(data.size()) << "\n" << std::to_string(data[0].size()) << "\n"; - for (auto& row : data) + for (auto &row : data) { - for (auto& column : row) + for (auto &column : row) { file << std::to_string(column) << "\n"; } @@ -188,31 +273,33 @@ bool writeOutput::writeMAT(const std::vector> &data, const s file.close(); return true; } + /** - * @brief - * - * @param data - * @return true - * @return false + * @brief + * + * @param data + * @return true + * @return false */ // writeConsole -> write data input to console (hopefully pretty-print) bool writeOutput::writeConsole(const std::vector &data) { std::cout << "dimension,birth,death" << std::endl; - for (auto& row : data) + for (auto &row : data) std::cout << std::to_string(row.bettiDim) << "," << std::to_string(row.birth) << "," << std::to_string(row.death) << std::endl; std::cout << std::endl; return true; } + /** - * @brief - * - * @param args - * @param ident - * @param wdStats - * @param runtime - * @return std::string + * @brief + * + * @param args + * @param ident + * @param wdStats + * @param runtime + * @return std::string */ std::string writeOutput::logRun(const std::map &args, const std::string &ident, const std::string &wdStats, const std::string &runtime) { diff --git a/Utils/writeOutput.hpp b/Utils/writeOutput.hpp index 3cfbaac..17d8fe1 100644 --- a/Utils/writeOutput.hpp +++ b/Utils/writeOutput.hpp @@ -14,6 +14,8 @@ class writeOutput static bool writeCSV(const std::string &, const std::string &, const std::string &); static bool writeCSV(const std::vector> &, const std::string &); static bool writeCSV(const std::vector> &, const std::string &, const std::string &); + static void writeBinary(const std::map, short> &, const std::string &); + static void writeBinary(const std::vector> &, const std::string &); static bool writeMAT(const std::vector> &, const std::string &); static bool writeBarcodes(const std::vector &, const std::string &); static bool writeConsole(const std::vector &);