diff --git a/.github/workflows/PUBLICATION.yaml b/.github/workflows/PUBLICATION.yaml deleted file mode 100644 index 63959e3..0000000 --- a/.github/workflows/PUBLICATION.yaml +++ /dev/null @@ -1,98 +0,0 @@ -name: CI -run-name: ${{ github.actor }} is building the publication CI - -on: push - -jobs: - build_paper: - strategy: - fail-fast: true - matrix: - platform: [ ubuntu-latest, macos-latest ] - BUILD_TYPE: [ Release ] - cc: [ clang ] - cxx: [ clang++ ] - runs-on: ${{ matrix.platform }} - - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - - - name: mkdir build - run: mkdir ${{github.workspace}}/build - - - name: Configure CMake - run: cmake -DALP_BUILD_PUBLICATION=ON -S ${{github.workspace}} -B ${{github.workspace}}/build - env: - CXX: ${{ matrix.cxx }} - - - name: Build - run: cmake --build ${{github.workspace}}/build -j 16 - - - name: Test - working-directory: ${{github.workspace}}/build - run: ctest -j 4 - - - name: Check if ALP_DATASET_DIR_PATH is set and print download link - run: | - if [ -z "${ALP_DATASET_DIR_PATH}" ]; then - echo -e "\033[33mPlease download the dataset from: https://drive.google.com/drive/folders/167faTwZJjqJMKM9Yc6E7KF5LUbsitxJS?usp=sharing\033[0m" - echo -e "\033[33mWarning: ALP_DATASET_DIR_PATH is not set!\033[0m" - exit 1 # Fail the workflow - else - echo "ALP_DATASET_DIR_PATH is set to ${ALP_DATASET_DIR_PATH}" - fi - - - name: run compression ratio alp - run: ${{github.workspace}}/build/b/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio - - - name: run compression ratio alp32 - run: ${{github.workspace}}/build/b/publication/source_code/bench_compression_ratio/bench_alp32_compression_ratio - - - name: run compression ratio zstd - run: ${{github.workspace}}/build/b/publication/source_code/bench_compression_ratio/bench_zstd_compression_ratio - - name: run bench_alp_cutter_decode - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_alp_cutter_decode - - - name: run bench_alp_cutter_encode - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_alp_cutter_encode - - - name: run bench_alp_encode - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_alp_encode - - - name: run bench_alp_without_sampling - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_alp_without_sampling - - - name: run bench_chimp - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_chimp - - - name: run bench_chimp128 - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_chimp128 - - - name: run bench_gorillas - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_gorillas - - - name: run bench_patas - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_patas - - - name: run bench_zstd - run: ${{github.workspace}}/build/b/publication/source_code/bench/bench_zstd - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r publication/plotter/requirements.txt - - - name: Run plotter script - run: python publication/plotter/plotter.py - - - - - diff --git a/CMakeLists.txt b/CMakeLists.txt index 97899ed..4099afb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND ALP_ENABLE_CLANG_TIDY) else () set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_EXE}; - -header-filter=include/alp; + -header-filter=include/alp,data/include; -warnings-as-errors=*;) endif () diff --git a/availability_reproducibility_initiative_report.md b/availability_reproducibility_initiative_report.md index ad52bb4..ea738b0 100644 --- a/availability_reproducibility_initiative_report.md +++ b/availability_reproducibility_initiative_report.md @@ -18,7 +18,6 @@ - figure 6 : todo 5) Documentation on how to compile, deploy, run the code, and use the scripts: - - Follow the [Publication CI](.github/workflows/PUBLICATION.yaml) - In [BENCHMARKING.md](/BENCHMARKING.md) we detail how to replicate the experiments and benchmarks presented in our [publication](https://dl.acm.org/doi/pdf/10.1145/3626717). 6) A link to [a single master script](publication/master_script/master_script.sh) that runs the experiments, collects diff --git a/benchmarks/bench_compression_ratio/alp32.cpp b/benchmarks/bench_compression_ratio/alp32.cpp index 9fa0987..122dd9f 100644 --- a/benchmarks/bench_compression_ratio/alp32.cpp +++ b/benchmarks/bench_compression_ratio/alp32.cpp @@ -128,10 +128,10 @@ class alp32_test : public ::testing::Test { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp32_test, test_alprd32_on_whole_datasets) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_rd32_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd32_compression_ratio.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::sp_datasets) { + for (auto& dataset : alp_bench::get_sp_datasets()) { if (!dataset.suitable_for_cutting) { continue; } std::cout << dataset.name << std::endl; diff --git a/benchmarks/bench_compression_ratio/bench_alp_compression_ratio.cpp b/benchmarks/bench_compression_ratio/bench_alp_compression_ratio.cpp index 2e24684..2b2525d 100644 --- a/benchmarks/bench_compression_ratio/bench_alp_compression_ratio.cpp +++ b/benchmarks/bench_compression_ratio/bench_alp_compression_ratio.cpp @@ -73,7 +73,7 @@ void read_data(std::vector& data, const std::string& csv_file_path, cons file.seekg(0, std::ios::beg); // Ensure the file size is a multiple of the size of a double - if (fileSize % sizeof(double) != 0) { throw std::runtime_error("File size is not a multiple of double size!"); } + // if (fileSize % sizeof(double) != 0) { throw std::runtime_error("File size is not a multiple of double size!"); } // Calculate the number of doubles std::size_t numDoubles = fileSize / sizeof(double); @@ -327,14 +327,10 @@ class alp_test : public ::testing::Test { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp_test, test_alp_on_whole_datasets) { - if (const auto v = std::getenv("ALP_DATASET_DIR_PATH"); v == nullptr) { - throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); - } - - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_compression_ratio.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { bench_alp_compression_ratio(dataset, ofile); } } @@ -344,19 +340,19 @@ TEST_F(alp_test, test_alp_on_whole_datasets) { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp_test, test_alprd_on_whole_datasets) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_rd_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd_compression_ratio.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { bench_alp_rd_compression_ratio(dataset, ofile); } } TEST_F(alp_test, test_alprd_on_evalimplsts) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "evalimplsts.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "evalimplsts.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::evalimplsts) { + for (auto& dataset : alp_bench::get_evalimplsts()) { bench_alp_rd_compression_ratio(dataset, ofile); } } diff --git a/benchmarks/bench_speed/bench_alp_cutter_decode.cpp b/benchmarks/bench_speed/bench_alp_cutter_decode.cpp index 712c8f5..fe25141 100644 --- a/benchmarks/bench_speed/bench_alp_cutter_decode.cpp +++ b/benchmarks/bench_speed/bench_alp_cutter_decode.cpp @@ -85,7 +85,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { unffor_left_arr = new (std::align_val_t {64}) uint16_t[VECTOR_SIZE]; glue_arr = new (std::align_val_t {64}) double[VECTOR_SIZE]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (!dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } diff --git a/benchmarks/bench_speed/bench_alp_cutter_encode.cpp b/benchmarks/bench_speed/bench_alp_cutter_encode.cpp index cf0a0f8..07f0d1b 100644 --- a/benchmarks/bench_speed/bench_alp_cutter_encode.cpp +++ b/benchmarks/bench_speed/bench_alp_cutter_encode.cpp @@ -83,7 +83,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { unffor_left_arr = new (std::align_val_t {64}) uint16_t[VECTOR_SIZE]; glue_arr = new (std::align_val_t {64}) double[VECTOR_SIZE]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (!dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } diff --git a/benchmarks/bench_speed/bench_alp_encode.cpp b/benchmarks/bench_speed/bench_alp_encode.cpp index 90ca769..fa7c507 100644 --- a/benchmarks/bench_speed/bench_alp_encode.cpp +++ b/benchmarks/bench_speed/bench_alp_encode.cpp @@ -60,7 +60,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { base_arr = new (std::align_val_t {64}) int64_t[1024]; rg_smp_arr = new (std::align_val_t {64}) double[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } diff --git a/benchmarks/bench_speed/bench_alp_without_sampling.cpp b/benchmarks/bench_speed/bench_alp_without_sampling.cpp index 27eba6d..a8fcd11 100644 --- a/benchmarks/bench_speed/bench_alp_without_sampling.cpp +++ b/benchmarks/bench_speed/bench_alp_without_sampling.cpp @@ -101,7 +101,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { base_arr = new (std::align_val_t {64}) int64_t[1024]; rg_smp_arr = new (std::align_val_t {64}) double[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } diff --git a/data/full_data/README.md b/data/full_data/README.md new file mode 100644 index 0000000..e69de29 diff --git a/data/include/column.hpp b/data/include/column.hpp index 8002307..7d2db6d 100644 --- a/data/include/column.hpp +++ b/data/include/column.hpp @@ -1,8 +1,6 @@ #ifndef COLUMN_HPP #define COLUMN_HPP -// NOLINTBEGIN - #include #include #include @@ -21,24 +19,24 @@ struct Column { }; struct paths { - std::string GENERATED_COLUMNS_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/generated/"; - std::string ALP_DATASET_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/samples/"; - std::string EDGE_DATASET_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/edge_case/"; - std::string RESULT_DIR_PATH = std::string {CMAKE_SOURCE_DIR} + "/publication/"; - std::string EVALIMPLSTS_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/evalimplsts/"; - - std::string ALP_DATASET_BINARY_DIR_PATH = " "; + std::string generated_columns_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/generated/"; + std::string alp_dataset_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/samples/"; + std::string edge_dataset_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/edge_case/"; + std::string result_dir_path = std::string {CMAKE_SOURCE_DIR} + "/publication/"; + std::string evalimplsts_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/evalimplsts/"; + std::string alp_dataset_binary_dir_path = std::string {CMAKE_SOURCE_DIR} + "/data/full_data/"; explicit paths() { - auto v = std::getenv("ALP_DATASET_DIR_PATH"); - if (v) { ALP_DATASET_BINARY_DIR_PATH = v; } + const auto v = std::getenv("ALP_DATASET_DIR_PATH"); + if (v) { alp_dataset_binary_dir_path = v; } } }; -inline paths PATHS; +inline paths get_paths() { + static paths PATHS; + return PATHS; +} } // namespace alp_bench #endif - -// NOLINTEND \ No newline at end of file diff --git a/data/include/double_columns.hpp b/data/include/double_columns.hpp index 376b850..b770f80 100644 --- a/data/include/double_columns.hpp +++ b/data/include/double_columns.hpp @@ -1,290 +1,298 @@ -#ifndef ALP_DOUBLE_COLUMNS_HPP -#define ALP_DOUBLE_COLUMNS_HPP +#ifndef DOUBLE_COLUMNS_HPP +#define DOUBLE_COLUMNS_HPP #include "column.hpp" namespace alp_bench { -inline std::array alp_dataset = {{ - - {1, - "Air-Pressure", - PATHS.ALP_DATASET_CSV_PATH + "neon_air_pressure.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "neon_air_pressure.bin", - 14, - 9, - 3, - 16}, - - {2, - "Arade/4", - PATHS.ALP_DATASET_CSV_PATH + "arade4.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "arade4.bin", - 14, - 10, - 8, - 24}, - - {3, - "Basel-Temp", - PATHS.ALP_DATASET_CSV_PATH + "basel_temp_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "basel_temp_f.bin", - 14, - 7, - 47, - 28}, - - {4, - "Basel-Wind", - PATHS.ALP_DATASET_CSV_PATH + "basel_wind_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "basel_wind_f.bin", - 14, - 7, - 9, - 29}, - - {5, - "Bird-Mig", - PATHS.ALP_DATASET_CSV_PATH + "bird_migration_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "bird_migration_f.bin", - 14, - 9, - 2, - 17}, - - {6, - "Btc-Price", - PATHS.ALP_DATASET_CSV_PATH + "bitcoin_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "bitcoin_f.bin", - 14, - 10, - 10, - 25}, - - {7, - "Blockchain", - PATHS.ALP_DATASET_CSV_PATH + "bitcoin_transactions_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "bitcoin_transactions_f.bin", - 14, - 10, - 11, - 30}, - - {8, - "City-Temp", - PATHS.ALP_DATASET_CSV_PATH + "city_temperature_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "city_temperature_f.bin", - 14, - 13, - 0, - 11}, - - {9, - "CMS/1", - PATHS.ALP_DATASET_CSV_PATH + "cms1.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "cms1.bin", - 14, - 5, - 10, - 41}, - - {10, - "CMS/9", - PATHS.ALP_DATASET_CSV_PATH + "cms9.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "cms9.bin", - 16, - 16, - 2, - 10}, - - {11, - "CMS/25", - PATHS.ALP_DATASET_CSV_PATH + "cms25.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "cms25.bin", - 14, - 4, - 6, - 42}, - - {12, - "Dew-Temp", - PATHS.ALP_DATASET_CSV_PATH + "neon_dew_point_temp.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "neon_dew_point_temp.bin", - 14, - 11, - 6, - 13}, - - {13, - "Bio-Temp", - PATHS.ALP_DATASET_CSV_PATH + "neon_bio_temp_c.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "neon_bio_temp_c.bin", - 14, - 12, - 0, - 10}, - - {14, - "Food-prices", - PATHS.ALP_DATASET_CSV_PATH + "food_prices.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "food_prices.bin", - 16, - 12, - 46, - 20}, - - {15, - "Gov/10", - PATHS.ALP_DATASET_CSV_PATH + "gov10.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "gov10.bin", - 3, - 1, - 72, - 27}, - - {16, - "Gov/26", - PATHS.ALP_DATASET_CSV_PATH + "gov26.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "gov26.bin", - 18, - 18, - 0, - 0}, - - {17, - "Gov/30", - PATHS.ALP_DATASET_CSV_PATH + "gov30.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "gov30.bin", - 18, - 18, - 4, - 0}, - - {18, - "Gov/31", - PATHS.ALP_DATASET_CSV_PATH + "gov31.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "gov31.bin", - 18, - 18, - 1, - 0}, - - {19, - "Gov/40", - PATHS.ALP_DATASET_CSV_PATH + "gov40.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "gov40.bin", - 18, - 18, - 3, - 0}, - - {20, - "Medicare/1", - PATHS.ALP_DATASET_CSV_PATH + "medicare1.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "medicare1.bin", - 14, - 5, - 37, - 38}, - - {21, - "Medicare/9", - PATHS.ALP_DATASET_CSV_PATH + "medicare9.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "medicare9.bin", - 16, - 16, - 3, - 10}, - - {22, - "PM10-dust", - PATHS.ALP_DATASET_CSV_PATH + "neon_pm10_dust.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "neon_pm10_dust.bin", - 14, - 11, - 0, - 8}, - - {23, - "NYC/29", - PATHS.ALP_DATASET_CSV_PATH + "nyc29.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "nyc29.bin", - 14, - 1, - 5, - 42}, - - {24, - "POI-lat", - PATHS.ALP_DATASET_CSV_PATH + "poi_lat.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "poi_lat.bin", - 16, - 0, - 157, - 55, - true}, - - {25, - "POI-lon", - PATHS.ALP_DATASET_CSV_PATH + "poi_lon.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "poi_lon.bin", - 16, - 0, - 199, - 56, - true}, - - {26, - "SD-bench", - PATHS.ALP_DATASET_CSV_PATH + "ssd_hdd_benchmarks_f.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "ssd_hdd_benchmarks_f.bin", - 14, - 13, - 0, - 17}, - - {27, - "Stocks-DE", - PATHS.ALP_DATASET_CSV_PATH + "stocks_de.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "stocks_de.bin", - 14, - 11, - 5, - 10 - - }, - - {28, - "Stocks-UK", - PATHS.ALP_DATASET_CSV_PATH + "stocks_uk.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "stocks_uk.bin", - 14, - 13, - 0, - 9}, - - {29, - "Stocks-USA", - PATHS.ALP_DATASET_CSV_PATH + "stocks_usa_c.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "stocks_usa_c.bin", - 14, - 12, - 0, - 7}, - - {30, - "Wind-dir", - PATHS.ALP_DATASET_CSV_PATH + "neon_wind_dir.csv", - PATHS.ALP_DATASET_BINARY_DIR_PATH + "neon_wind_dir.bin", - 14, - 12, - 0, - 16}, - -}}; - -inline std::array double_test_dataset = {{ - {0, "test_0", CMAKE_SOURCE_DIR "/data/double/test_0.csv", "", 0, 0, 0, 0}, -}}; +inline std::array get_alp_dataset() { + static std::array ALP_DATASET = {{ + + {1, + "Air-Pressure", + get_paths().alp_dataset_csv_path + "neon_air_pressure.csv", + get_paths().alp_dataset_binary_dir_path + "neon_air_pressure.bin", + 14, + 9, + 3, + 16}, + + {2, + "Arade/4", + get_paths().alp_dataset_csv_path + "arade4.csv", + get_paths().alp_dataset_binary_dir_path + "arade4.bin", + 14, + 10, + 8, + 24}, + + {3, + "Basel-Temp", + get_paths().alp_dataset_csv_path + "basel_temp_f.csv", + get_paths().alp_dataset_binary_dir_path + "basel_temp_f.bin", + 14, + 7, + 47, + 28}, + + {4, + "Basel-Wind", + get_paths().alp_dataset_csv_path + "basel_wind_f.csv", + get_paths().alp_dataset_binary_dir_path + "basel_wind_f.bin", + 14, + 7, + 9, + 29}, + + {5, + "Bird-Mig", + get_paths().alp_dataset_csv_path + "bird_migration_f.csv", + get_paths().alp_dataset_binary_dir_path + "bird_migration_f.bin", + 14, + 9, + 2, + 17}, + + {6, + "Btc-Price", + get_paths().alp_dataset_csv_path + "bitcoin_f.csv", + get_paths().alp_dataset_binary_dir_path + "bitcoin_f.bin", + 14, + 10, + 10, + 25}, + + {7, + "Blockchain", + get_paths().alp_dataset_csv_path + "bitcoin_transactions_f.csv", + get_paths().alp_dataset_binary_dir_path + "bitcoin_transactions_f.bin", + 14, + 10, + 11, + 30}, + + {8, + "City-Temp", + get_paths().alp_dataset_csv_path + "city_temperature_f.csv", + get_paths().alp_dataset_binary_dir_path + "city_temperature_f.bin", + 14, + 13, + 0, + 11}, + + {9, + "CMS/1", + get_paths().alp_dataset_csv_path + "cms1.csv", + get_paths().alp_dataset_binary_dir_path + "cms1.bin", + 14, + 5, + 10, + 41}, + + {10, + "CMS/9", + get_paths().alp_dataset_csv_path + "cms9.csv", + get_paths().alp_dataset_binary_dir_path + "cms9.bin", + 16, + 16, + 2, + 10}, + + {11, + "CMS/25", + get_paths().alp_dataset_csv_path + "cms25.csv", + get_paths().alp_dataset_binary_dir_path + "cms25.bin", + 14, + 4, + 6, + 42}, + + {12, + "Dew-Temp", + get_paths().alp_dataset_csv_path + "neon_dew_point_temp.csv", + get_paths().alp_dataset_binary_dir_path + "neon_dew_point_temp.bin", + 14, + 11, + 6, + 13}, + + {13, + "Bio-Temp", + get_paths().alp_dataset_csv_path + "neon_bio_temp_c.csv", + get_paths().alp_dataset_binary_dir_path + "neon_bio_temp_c.bin", + 14, + 12, + 0, + 10}, + + {14, + "Food-prices", + get_paths().alp_dataset_csv_path + "food_prices.csv", + get_paths().alp_dataset_binary_dir_path + "food_prices.bin", + 16, + 12, + 46, + 20}, + + {15, + "Gov/10", + get_paths().alp_dataset_csv_path + "gov10.csv", + get_paths().alp_dataset_binary_dir_path + "gov10.bin", + 3, + 1, + 72, + 27}, + + {16, + "Gov/26", + get_paths().alp_dataset_csv_path + "gov26.csv", + get_paths().alp_dataset_binary_dir_path + "gov26.bin", + 18, + 18, + 0, + 0}, + + {17, + "Gov/30", + get_paths().alp_dataset_csv_path + "gov30.csv", + get_paths().alp_dataset_binary_dir_path + "gov30.bin", + 18, + 18, + 4, + 0}, + + {18, + "Gov/31", + get_paths().alp_dataset_csv_path + "gov31.csv", + get_paths().alp_dataset_binary_dir_path + "gov31.bin", + 18, + 18, + 1, + 0}, + + {19, + "Gov/40", + get_paths().alp_dataset_csv_path + "gov40.csv", + get_paths().alp_dataset_binary_dir_path + "gov40.bin", + 18, + 18, + 3, + 0}, + + {20, + "Medicare/1", + get_paths().alp_dataset_csv_path + "medicare1.csv", + get_paths().alp_dataset_binary_dir_path + "medicare1.bin", + 14, + 5, + 37, + 38}, + + {21, + "Medicare/9", + get_paths().alp_dataset_csv_path + "medicare9.csv", + get_paths().alp_dataset_binary_dir_path + "medicare9.bin", + 16, + 16, + 3, + 10}, + + {22, + "PM10-dust", + get_paths().alp_dataset_csv_path + "neon_pm10_dust.csv", + get_paths().alp_dataset_binary_dir_path + "neon_pm10_dust.bin", + 14, + 11, + 0, + 8}, + + {23, + "NYC/29", + get_paths().alp_dataset_csv_path + "nyc29.csv", + get_paths().alp_dataset_binary_dir_path + "nyc29.bin", + 14, + 1, + 5, + 42}, + + {24, + "POI-lat", + get_paths().alp_dataset_csv_path + "poi_lat.csv", + get_paths().alp_dataset_binary_dir_path + "poi_lat.bin", + 16, + 0, + 157, + 55, + true}, + + {25, + "POI-lon", + get_paths().alp_dataset_csv_path + "poi_lon.csv", + get_paths().alp_dataset_binary_dir_path + "poi_lon.bin", + 16, + 0, + 199, + 56, + true}, + + {26, + "SD-bench", + get_paths().alp_dataset_csv_path + "ssd_hdd_benchmarks_f.csv", + get_paths().alp_dataset_binary_dir_path + "ssd_hdd_benchmarks_f.bin", + 14, + 13, + 0, + 17}, + + {27, + "Stocks-DE", + get_paths().alp_dataset_csv_path + "stocks_de.csv", + get_paths().alp_dataset_binary_dir_path + "stocks_de.bin", + 14, + 11, + 5, + 10 + + }, + + {28, + "Stocks-UK", + get_paths().alp_dataset_csv_path + "stocks_uk.csv", + get_paths().alp_dataset_binary_dir_path + "stocks_uk.bin", + 14, + 13, + 0, + 9}, + + {29, + "Stocks-USA", + get_paths().alp_dataset_csv_path + "stocks_usa_c.csv", + get_paths().alp_dataset_binary_dir_path + "stocks_usa_c.bin", + 14, + 12, + 0, + 7}, + + {30, + "Wind-dir", + get_paths().alp_dataset_csv_path + "neon_wind_dir.csv", + get_paths().alp_dataset_binary_dir_path + "neon_wind_dir.bin", + 14, + 12, + 0, + 16}, + + }}; + return ALP_DATASET; +}; + +inline std::array get_double_test_dataset() { + static std::array DOUBLE_TEST_DATASET = {{ + {0, "test_0", CMAKE_SOURCE_DIR "/data/double/test_0.csv", "", 0, 0, 0, 0}, + }}; + + return DOUBLE_TEST_DATASET; +} + } // namespace alp_bench #endif \ No newline at end of file diff --git a/data/include/edge_case.hpp b/data/include/edge_case.hpp index 6996634..25d869a 100644 --- a/data/include/edge_case.hpp +++ b/data/include/edge_case.hpp @@ -1,16 +1,17 @@ #ifndef EDGE_CASE_HPP #define EDGE_CASE_HPP -// NOLINTBEGIN - #include "column.hpp" namespace alp_bench { -inline std::array edge_case = {{ - {1, "edge_case", PATHS.EDGE_DATASET_CSV_PATH + "edge_case.csv", "", 0, 0, 12, 0, true}, -}}; +inline auto get_edge_case() { + static std::array EDGE_CASE = {{ + {1, "edge_case", get_paths().edge_dataset_csv_path + "edge_case.csv", "", 0, 0, 12, 0, true}, + + }}; + return EDGE_CASE; +} + } // namespace alp_bench #endif - -// NOLINTEND diff --git a/data/include/evalimplsts.hpp b/data/include/evalimplsts.hpp index dc8860f..b5f4669 100644 --- a/data/include/evalimplsts.hpp +++ b/data/include/evalimplsts.hpp @@ -1,14 +1,19 @@ -#ifndef ALP_DOUBLE_EVALIMPLSTS_HPP -#define ALP_DOUBLE_EVALIMPLSTS_HPP +#ifndef EVALIMPLSTS_HPP +#define EVALIMPLSTS_HPP #include "column.hpp" namespace alp_bench { -inline std::array evalimplsts = {{ - // prev issue_8 - {0, "active_power", PATHS.EVALIMPLSTS_CSV_PATH + "active_power.csv", "", 0, 0, 0, 0, true}, +inline auto get_evalimplsts() { + static std::array EVALIMPLSTS = {{ + // prev issue_8 + {0, "active_power", get_paths().evalimplsts_csv_path + "active_power.csv", "", 0, 0, 0, 0, true}, + + }}; + + return EVALIMPLSTS; +} -}}; } // namespace alp_bench #endif \ No newline at end of file diff --git a/data/include/float_columns.hpp b/data/include/float_columns.hpp index 1f5c11c..0263ca0 100644 --- a/data/include/float_columns.hpp +++ b/data/include/float_columns.hpp @@ -1,28 +1,40 @@ #ifndef FLOAT_COLUMNS_HPP #define FLOAT_COLUMNS_HPP -// NOLINTBEGIN #include "column.hpp" namespace alp_bench { -inline std::array sp_datasets = {{ - {1, "Dino-Vitb16", "", PATHS.ALP_DATASET_BINARY_DIR_PATH + "sp_dino_vitb16.bin", 0, 0, 0, 0, true}, - {2, "GPT2", "", PATHS.ALP_DATASET_BINARY_DIR_PATH + "sp_gpt2.bin", 0, 0, 0, 0, true}, - {3, "Grammarly-lg", "", PATHS.ALP_DATASET_BINARY_DIR_PATH + "sp_grammarly_coedit_lg.bin", 0, 0, 0, 0, true}, - {4, "WAV2VEC", "", PATHS.ALP_DATASET_BINARY_DIR_PATH + "sp_wav2vec2_base_960h.bin", 0, 0, 0, 0, true}, +inline auto get_sp_datasets() { + static std::array SP_DATASETS = {{ + {1, "Dino-Vitb16", "", get_paths().alp_dataset_binary_dir_path + "sp_dino_vitb16.bin", 0, 0, 0, 0, true}, + {2, "GPT2", "", get_paths().alp_dataset_binary_dir_path + "sp_gpt2.bin", 0, 0, 0, 0, true}, + {3, + "Grammarly-lg", + "", + get_paths().alp_dataset_binary_dir_path + "sp_grammarly_coedit_lg.bin", + 0, + 0, + 0, + 0, + true}, + {4, "WAV2VEC", "", get_paths().alp_dataset_binary_dir_path + "sp_wav2vec2_base_960h.bin", 0, 0, 0, 0, true}, -}}; + }}; -inline std::array float_test_dataset = {{ - {0, "Arade/4", PATHS.ALP_DATASET_CSV_PATH + "arade4.csv", "", 0, 0, 0, 0}, - {1, "test_0", CMAKE_SOURCE_DIR "/data/float/test_0.csv", "", 0, 0, 0, 4}, - {2, "test_1", CMAKE_SOURCE_DIR "/data/float/test_1.csv", "", 0, 0, 0, 10}, - {3, "test_2", CMAKE_SOURCE_DIR "/data/float/test_2.csv", "", 0, 0, 0, 17}, - {4, "test_3", CMAKE_SOURCE_DIR "/data/float/test_3.csv", "", 0, 0, 0, 0}, + return SP_DATASETS; +} -}}; +inline auto get_float_test_dataset() { + static std::array FLOAT_TEST_DATASET = {{ + {0, "Arade/4", get_paths().alp_dataset_csv_path + "arade4.csv", "", 0, 0, 0, 0}, + {1, "test_0", CMAKE_SOURCE_DIR "/data/float/test_0.csv", "", 0, 0, 0, 4}, + {2, "test_1", CMAKE_SOURCE_DIR "/data/float/test_1.csv", "", 0, 0, 0, 10}, + {3, "test_2", CMAKE_SOURCE_DIR "/data/float/test_2.csv", "", 0, 0, 0, 17}, + {4, "test_3", CMAKE_SOURCE_DIR "/data/float/test_3.csv", "", 0, 0, 0, 0}, + + }}; + return FLOAT_TEST_DATASET; +} } // namespace alp_bench #endif - -// NOLINTEND \ No newline at end of file diff --git a/data/include/generated_columns.hpp b/data/include/generated_columns.hpp index 868a6fc..9dbbfa9 100644 --- a/data/include/generated_columns.hpp +++ b/data/include/generated_columns.hpp @@ -1,84 +1,85 @@ #ifndef GENERATED_COLUMNS_HPP #define GENERATED_COLUMNS_HPP -// NOLINTBEGIN - #include "column.hpp" namespace alp_bench { -inline std::array generated_cols = { - { - // - {0, "bw0", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw0.csv", "", 0, 0, 0, 0}, - {1, "bw1", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw1.csv", "", 0, 0, 0, 1}, - {2, "bw2", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw2.csv", "", 0, 0, 0, 2}, - {3, "bw3", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw3.csv", "", 0, 0, 0, 3}, - {4, "bw4", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw4.csv", "", 0, 0, 0, 4}, - {5, "bw5", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw5.csv", "", 0, 0, 0, 5}, - {6, "bw6", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw6.csv", "", 0, 0, 0, 6}, - {7, "bw7", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw7.csv", "", 0, 0, 0, 7}, - {8, "bw8", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw8.csv", "", 0, 0, 0, 8}, - {9, "bw9", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw9.csv", "", 0, 0, 0, 9}, - {10, "bw10", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw10.csv", "", 0, 0, 0, 10}, - {11, "bw11", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw11.csv", "", 0, 0, 0, 11}, - {12, "bw12", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw12.csv", "", 0, 0, 0, 12}, - {13, "bw13", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw13.csv", "", 0, 0, 0, 13}, - {14, "bw14", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw14.csv", "", 0, 0, 0, 14}, - {15, "bw15", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw15.csv", "", 0, 0, 0, 15}, - {16, "bw16", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw16.csv", "", 0, 0, 0, 16}, - {17, "bw17", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw17.csv", "", 0, 0, 0, 17}, - {18, "bw18", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw18.csv", "", 0, 0, 0, 18}, - {19, "bw19", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw19.csv", "", 0, 0, 0, 19}, - {20, "bw20", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw20.csv", "", 0, 0, 0, 20}, - {21, "bw21", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw21.csv", "", 0, 0, 0, 21}, - {22, "bw22", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw22.csv", "", 0, 0, 0, 22}, - {23, "bw23", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw23.csv", "", 0, 0, 0, 23}, - {24, "bw24", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw24.csv", "", 0, 0, 0, 24}, - {25, "bw25", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw25.csv", "", 0, 0, 0, 25}, - {26, "bw26", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw26.csv", "", 0, 0, 0, 26}, - {27, "bw27", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw27.csv", "", 0, 0, 0, 27}, - {28, "bw28", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw28.csv", "", 0, 0, 0, 28}, - {29, "bw29", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw29.csv", "", 0, 0, 0, 29}, - {30, "bw30", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw30.csv", "", 0, 0, 0, 30}, - {31, "bw31", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw31.csv", "", 0, 0, 0, 31}, - {32, "bw32", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw32.csv", "", 0, 0, 0, 32}, - {33, "bw33", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw33.csv", "", 0, 0, 0, 33}, - {34, "bw34", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw34.csv", "", 0, 0, 0, 34}, - {35, "bw35", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw35.csv", "", 0, 0, 0, 35}, - {36, "bw36", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw36.csv", "", 0, 0, 0, 36}, - {37, "bw37", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw37.csv", "", 0, 0, 0, 37}, - {38, "bw38", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw38.csv", "", 0, 0, 0, 38}, - {39, "bw39", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw39.csv", "", 0, 0, 0, 39}, - {40, "bw40", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw40.csv", "", 0, 0, 0, 40}, - {41, "bw41", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw41.csv", "", 0, 0, 0, 41}, - {42, "bw42", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw42.csv", "", 0, 0, 0, 42}, - {43, "bw43", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw43.csv", "", 0, 0, 0, 60}, - {44, "bw44", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw44.csv", "", 0, 0, 0, 44}, - {45, "bw45", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw45.csv", "", 0, 0, 0, 45}, - {46, "bw46", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw46.csv", "", 0, 0, 0, 46}, - {47, "bw47", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw47.csv", "", 0, 0, 0, 47}, - {48, "bw48", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw48.csv", "", 0, 0, 0, 48}, - {49, "bw49", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw49.csv", "", 0, 0, 0, 49}, - {50, "bw50", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw50.csv", "", 0, 0, 0, 50}, - {51, "bw51", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw51.csv", "", 0, 0, 0, 51}, - // // todo exp fac does not match - {52, "bw52", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw52.csv", "", 0, 0, 0, 56}, - {53, "bw53", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw53.csv", "", 0, 0, 0, 63}, - {54, "bw54", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw54.csv", "", 0, 0, 0, 55}, - {55, "bw55", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw55.csv", "", 0, 0, 0, 56}, - {56, "bw56", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw56.csv", "", 0, 0, 0, 57}, - {57, "bw57", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw57.csv", "", 0, 0, 0, 58}, - {58, "bw58", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw58.csv", "", 0, 0, 0, 59}, - {59, "bw59", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw59.csv", "", 0, 0, 0, 60}, - {60, "bw60", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw60.csv", "", 0, 0, 0, 61}, - {61, "bw61", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw61.csv", "", 0, 0, 0, 62}, - {62, "bw62", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw62.csv", "", 0, 0, 0, 63}, - {63, "bw63", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw63.csv", "", 0, 0, 0, 63}, - {64, "bw64", PATHS.GENERATED_COLUMNS_CSV_PATH + "generated_doubles_bw64.csv", "", 0, 0, 0, 64}, - // // - } // -}; +inline auto get_generated_cols() { + static std::array GENERATED_COLS = { + { + // + {0, "bw0", get_paths().generated_columns_csv_path + "generated_doubles_bw0.csv", "", 0, 0, 0, 0}, + {1, "bw1", get_paths().generated_columns_csv_path + "generated_doubles_bw1.csv", "", 0, 0, 0, 1}, + {2, "bw2", get_paths().generated_columns_csv_path + "generated_doubles_bw2.csv", "", 0, 0, 0, 2}, + {3, "bw3", get_paths().generated_columns_csv_path + "generated_doubles_bw3.csv", "", 0, 0, 0, 3}, + {4, "bw4", get_paths().generated_columns_csv_path + "generated_doubles_bw4.csv", "", 0, 0, 0, 4}, + {5, "bw5", get_paths().generated_columns_csv_path + "generated_doubles_bw5.csv", "", 0, 0, 0, 5}, + {6, "bw6", get_paths().generated_columns_csv_path + "generated_doubles_bw6.csv", "", 0, 0, 0, 6}, + {7, "bw7", get_paths().generated_columns_csv_path + "generated_doubles_bw7.csv", "", 0, 0, 0, 7}, + {8, "bw8", get_paths().generated_columns_csv_path + "generated_doubles_bw8.csv", "", 0, 0, 0, 8}, + {9, "bw9", get_paths().generated_columns_csv_path + "generated_doubles_bw9.csv", "", 0, 0, 0, 9}, + {10, "bw10", get_paths().generated_columns_csv_path + "generated_doubles_bw10.csv", "", 0, 0, 0, 10}, + {11, "bw11", get_paths().generated_columns_csv_path + "generated_doubles_bw11.csv", "", 0, 0, 0, 11}, + {12, "bw12", get_paths().generated_columns_csv_path + "generated_doubles_bw12.csv", "", 0, 0, 0, 12}, + {13, "bw13", get_paths().generated_columns_csv_path + "generated_doubles_bw13.csv", "", 0, 0, 0, 13}, + {14, "bw14", get_paths().generated_columns_csv_path + "generated_doubles_bw14.csv", "", 0, 0, 0, 14}, + {15, "bw15", get_paths().generated_columns_csv_path + "generated_doubles_bw15.csv", "", 0, 0, 0, 15}, + {16, "bw16", get_paths().generated_columns_csv_path + "generated_doubles_bw16.csv", "", 0, 0, 0, 16}, + {17, "bw17", get_paths().generated_columns_csv_path + "generated_doubles_bw17.csv", "", 0, 0, 0, 17}, + {18, "bw18", get_paths().generated_columns_csv_path + "generated_doubles_bw18.csv", "", 0, 0, 0, 18}, + {19, "bw19", get_paths().generated_columns_csv_path + "generated_doubles_bw19.csv", "", 0, 0, 0, 19}, + {20, "bw20", get_paths().generated_columns_csv_path + "generated_doubles_bw20.csv", "", 0, 0, 0, 20}, + {21, "bw21", get_paths().generated_columns_csv_path + "generated_doubles_bw21.csv", "", 0, 0, 0, 21}, + {22, "bw22", get_paths().generated_columns_csv_path + "generated_doubles_bw22.csv", "", 0, 0, 0, 22}, + {23, "bw23", get_paths().generated_columns_csv_path + "generated_doubles_bw23.csv", "", 0, 0, 0, 23}, + {24, "bw24", get_paths().generated_columns_csv_path + "generated_doubles_bw24.csv", "", 0, 0, 0, 24}, + {25, "bw25", get_paths().generated_columns_csv_path + "generated_doubles_bw25.csv", "", 0, 0, 0, 25}, + {26, "bw26", get_paths().generated_columns_csv_path + "generated_doubles_bw26.csv", "", 0, 0, 0, 26}, + {27, "bw27", get_paths().generated_columns_csv_path + "generated_doubles_bw27.csv", "", 0, 0, 0, 27}, + {28, "bw28", get_paths().generated_columns_csv_path + "generated_doubles_bw28.csv", "", 0, 0, 0, 28}, + {29, "bw29", get_paths().generated_columns_csv_path + "generated_doubles_bw29.csv", "", 0, 0, 0, 29}, + {30, "bw30", get_paths().generated_columns_csv_path + "generated_doubles_bw30.csv", "", 0, 0, 0, 30}, + {31, "bw31", get_paths().generated_columns_csv_path + "generated_doubles_bw31.csv", "", 0, 0, 0, 31}, + {32, "bw32", get_paths().generated_columns_csv_path + "generated_doubles_bw32.csv", "", 0, 0, 0, 32}, + {33, "bw33", get_paths().generated_columns_csv_path + "generated_doubles_bw33.csv", "", 0, 0, 0, 33}, + {34, "bw34", get_paths().generated_columns_csv_path + "generated_doubles_bw34.csv", "", 0, 0, 0, 34}, + {35, "bw35", get_paths().generated_columns_csv_path + "generated_doubles_bw35.csv", "", 0, 0, 0, 35}, + {36, "bw36", get_paths().generated_columns_csv_path + "generated_doubles_bw36.csv", "", 0, 0, 0, 36}, + {37, "bw37", get_paths().generated_columns_csv_path + "generated_doubles_bw37.csv", "", 0, 0, 0, 37}, + {38, "bw38", get_paths().generated_columns_csv_path + "generated_doubles_bw38.csv", "", 0, 0, 0, 38}, + {39, "bw39", get_paths().generated_columns_csv_path + "generated_doubles_bw39.csv", "", 0, 0, 0, 39}, + {40, "bw40", get_paths().generated_columns_csv_path + "generated_doubles_bw40.csv", "", 0, 0, 0, 40}, + {41, "bw41", get_paths().generated_columns_csv_path + "generated_doubles_bw41.csv", "", 0, 0, 0, 41}, + {42, "bw42", get_paths().generated_columns_csv_path + "generated_doubles_bw42.csv", "", 0, 0, 0, 42}, + {43, "bw43", get_paths().generated_columns_csv_path + "generated_doubles_bw43.csv", "", 0, 0, 0, 60}, + {44, "bw44", get_paths().generated_columns_csv_path + "generated_doubles_bw44.csv", "", 0, 0, 0, 44}, + {45, "bw45", get_paths().generated_columns_csv_path + "generated_doubles_bw45.csv", "", 0, 0, 0, 45}, + {46, "bw46", get_paths().generated_columns_csv_path + "generated_doubles_bw46.csv", "", 0, 0, 0, 46}, + {47, "bw47", get_paths().generated_columns_csv_path + "generated_doubles_bw47.csv", "", 0, 0, 0, 47}, + {48, "bw48", get_paths().generated_columns_csv_path + "generated_doubles_bw48.csv", "", 0, 0, 0, 48}, + {49, "bw49", get_paths().generated_columns_csv_path + "generated_doubles_bw49.csv", "", 0, 0, 0, 49}, + {50, "bw50", get_paths().generated_columns_csv_path + "generated_doubles_bw50.csv", "", 0, 0, 0, 50}, + {51, "bw51", get_paths().generated_columns_csv_path + "generated_doubles_bw51.csv", "", 0, 0, 0, 51}, + // // todo exp fac does not match + {52, "bw52", get_paths().generated_columns_csv_path + "generated_doubles_bw52.csv", "", 0, 0, 0, 56}, + {53, "bw53", get_paths().generated_columns_csv_path + "generated_doubles_bw53.csv", "", 0, 0, 0, 63}, + {54, "bw54", get_paths().generated_columns_csv_path + "generated_doubles_bw54.csv", "", 0, 0, 0, 55}, + {55, "bw55", get_paths().generated_columns_csv_path + "generated_doubles_bw55.csv", "", 0, 0, 0, 56}, + {56, "bw56", get_paths().generated_columns_csv_path + "generated_doubles_bw56.csv", "", 0, 0, 0, 57}, + {57, "bw57", get_paths().generated_columns_csv_path + "generated_doubles_bw57.csv", "", 0, 0, 0, 58}, + {58, "bw58", get_paths().generated_columns_csv_path + "generated_doubles_bw58.csv", "", 0, 0, 0, 59}, + {59, "bw59", get_paths().generated_columns_csv_path + "generated_doubles_bw59.csv", "", 0, 0, 0, 60}, + {60, "bw60", get_paths().generated_columns_csv_path + "generated_doubles_bw60.csv", "", 0, 0, 0, 61}, + {61, "bw61", get_paths().generated_columns_csv_path + "generated_doubles_bw61.csv", "", 0, 0, 0, 62}, + {62, "bw62", get_paths().generated_columns_csv_path + "generated_doubles_bw62.csv", "", 0, 0, 0, 63}, + {63, "bw63", get_paths().generated_columns_csv_path + "generated_doubles_bw63.csv", "", 0, 0, 0, 63}, + {64, "bw64", get_paths().generated_columns_csv_path + "generated_doubles_bw64.csv", "", 0, 0, 0, 64}, + // // + } // + }; + + return GENERATED_COLS; } -#endif // GENERATED_COLUMNS_HPP -// NOLINTEND \ No newline at end of file +} // namespace alp_bench +#endif // GENERATED_COLUMNS_HPP diff --git a/publication/CMakeLists.txt b/publication/CMakeLists.txt index 9d58c4b..8170672 100644 --- a/publication/CMakeLists.txt +++ b/publication/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT DEFINED ENV{ALP_DATASET_DIR_PATH}) - message(WARNING "You must set ALP_DATASET_DIR_PATH environment variable") + message(FATAL_ERROR "You must set ALP_DATASET_DIR_PATH environment variable") else () add_subdirectory(source_code) endif () diff --git a/publication/master_script/master_script.sh b/publication/master_script/master_script.sh index f5dccd3..21ee222 100755 --- a/publication/master_script/master_script.sh +++ b/publication/master_script/master_script.sh @@ -1,34 +1,40 @@ #!/bin/bash +# check BENCHMARKING.md for more details. + WORKSPACE=$(pwd) # Assuming this is the workspace directory -REPO_URL="https://github.com/cwida/ALP.git" +REPO_URL="https://github.com/azimafroozeh/_ALP.git" TARGET_DIR="$WORKSPACE/ALP" # Define target directory for the clone +BRANCH="2_ari" # Branch to clone # Clone the repository if it doesn't already exist if [ -d "$TARGET_DIR" ]; then - echo "Repository already exists, pulling the latest changes..." - cd "$TARGET_DIR" && git pull origin main + echo "Repository already exists, pulling the latest changes from branch $BRANCH..." + cd "$TARGET_DIR" && git pull origin "$BRANCH" else - echo "Cloning the repository..." - git clone "$REPO_URL" "$TARGET_DIR" + echo "Cloning the repository and checking out branch $BRANCH..." + git clone --branch "$BRANCH" "$REPO_URL" "$TARGET_DIR" fi # Move to the cloned repository +# shellcheck disable=SC2164 cd "$TARGET_DIR" # Create build directory mkdir -p "$TARGET_DIR/build" # Configure CMake -cmake -DALP_BUILD_PUBLICATION=ON -S "$TARGET_DIR" -B "$TARGET_DIR/build" -DCMAKE_BUILD_TYPE=Release -DCXX=clang++ +cmake -DALP_BUILD_PUBLICATION=ON -DCMAKE_TOOLCHAIN_FILE="$TARGET_DIR/toolchain/example.cmake" -S "$TARGET_DIR" -B "$TARGET_DIR/build" -DCMAKE_BUILD_TYPE=Release -DCXX=clang++ # Build the project cmake --build "$TARGET_DIR/build" -j 16 # Run tests -cd "$TARGET_DIR/build" && ctest -j 4 +#cd "$TARGET_DIR/build" && ctest -j 4 todo # Check if ALP_DATASET_DIR_PATH is set +# Set the environment variable `ALP_DATASET_DIR_PATH` with the path to the directory in which the complete +# binary datasets are located if [ -z "$ALP_DATASET_DIR_PATH" ]; then echo -e "\033[33mPlease download the dataset from: https://drive.google.com/drive/folders/167faTwZJjqJMKM9Yc6E7KF5LUbsitxJS?usp=sharing\033[0m" echo -e "\033[33mWarning: ALP_DATASET_DIR_PATH is not set!\033[0m" @@ -38,18 +44,22 @@ else fi # Run benchmarks -"$TARGET_DIR/build/b/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio" -"$TARGET_DIR/build/b/publication/source_code/bench_compression_ratio/bench_alp32_compression_ratio" -"$TARGET_DIR/build/b/publication/source_code/bench_compression_ratio/bench_zstd_compression_ratio" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_alp_cutter_decode" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_alp_cutter_encode" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_alp_encode" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_alp_without_sampling" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_chimp" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_chimp128" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_gorillas" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_patas" -"$TARGET_DIR/build/b/publication/source_code/bench/bench_zstd" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_alp32_compression_ratio" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_zstd_compression_ratio" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_chimp_compression_ratio" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_chimp128_compression_ratio" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_gorillas_compression_ratio" +#"$TARGET_DIR/build/publication/source_code/bench_compression_ratio/bench_patas_compression_ratio" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_alp_cutter_decode" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_alp_cutter_encode" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_alp_encode" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_alp_without_sampling" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_chimp" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_chimp128" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_gorillas" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_patas" +"$TARGET_DIR/build/publication/source_code/bench_speed/bench_zstd" # Set up Python and install dependencies python -m pip install --upgrade pip diff --git a/publication/source_code/bench_compression_ratio/alp32.cpp b/publication/source_code/bench_compression_ratio/alp32.cpp index 8cf43bf..cfd1cc1 100644 --- a/publication/source_code/bench_compression_ratio/alp32.cpp +++ b/publication/source_code/bench_compression_ratio/alp32.cpp @@ -128,10 +128,10 @@ class alp32_test : public ::testing::Test { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp32_test, test_alprd32_on_whole_datasets) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_rd32_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd32_compression_ratio.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::sp_datasets) { + for (auto& dataset : alp_bench::get_sp_datasets()) { if (!dataset.suitable_for_cutting) { continue; } std::cout << dataset.name << std::endl; diff --git a/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio.cpp b/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio.cpp index 5374364..feb0a1b 100644 --- a/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio.cpp +++ b/publication/source_code/bench_compression_ratio/bench_alp_compression_ratio.cpp @@ -65,7 +65,7 @@ void read_data(std::vector& data, const std::string& csv_file_path, cons // Open the binary file in input mode std::ifstream file(bin_file_path, std::ios::binary | std::ios::in); - if (!file) { throw std::runtime_error("Failed to open file: " + bin_file_path); } + if (!file) { throw std::runtime_error("Failed to open file: " + bin_file_path + " - " + strerror(errno)); } // Get the size of the file file.seekg(0, std::ios::end); @@ -240,6 +240,7 @@ class alp_test : public ::testing::Test { void bench_alp_rd_compression_ratio(const alp_bench::Column& dataset, std::ofstream& ofile) { if (!dataset.suitable_for_cutting) { return; } + std::cout << dataset.name << std::endl; std::vector compression_metadata; @@ -327,14 +328,10 @@ class alp_test : public ::testing::Test { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp_test, test_alp_on_whole_datasets) { - if (const auto v = std::getenv("ALP_DATASET_DIR_PATH"); v == nullptr) { - throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); - } - - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_compression_ratio.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { bench_alp_compression_ratio(dataset, ofile); } } @@ -344,19 +341,19 @@ TEST_F(alp_test, test_alp_on_whole_datasets) { * This test will output and write a file with the estimated bits/value after compression with alp */ TEST_F(alp_test, test_alprd_on_whole_datasets) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_rd_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd_compression_ratio.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { bench_alp_rd_compression_ratio(dataset, ofile); } } TEST_F(alp_test, test_alprd_on_evalimplsts) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "evalimplsts.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "evalimplsts.csv", std::ios::out); ofile << "dataset,size,rowgroups_count,vectors_count\n"; - for (auto& dataset : alp_bench::evalimplsts) { + for (auto& dataset : alp_bench::get_evalimplsts()) { bench_alp_rd_compression_ratio(dataset, ofile); } } diff --git a/publication/source_code/bench_compression_ratio/chimp.cpp b/publication/source_code/bench_compression_ratio/chimp.cpp index a877c62..cb82e8e 100644 --- a/publication/source_code/bench_compression_ratio/chimp.cpp +++ b/publication/source_code/bench_compression_ratio/chimp.cpp @@ -52,15 +52,14 @@ class chimp_test : public ::testing::Test { double chimp_overhead_per_vector {static_cast(8 + 16 + 16)}; TEST_F(chimp_test, test_chimp_on_whole_datasets) { - - if (const auto v = std::getenv("ALP_DATASET_DIR_PATH"); v == nullptr) { - throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); + if (const auto v = std::getenv("ALP_DATASET_DIR_PATH"); v != nullptr) { + alp_bench::get_paths().alp_dataset_binary_dir_path = *v; } - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "chimp_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "chimp_compression_ratio.csv", std::ios::out); ofile << "dataset,size,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::cout << dataset.name << std::endl; diff --git a/publication/source_code/bench_compression_ratio/chimp128.cpp b/publication/source_code/bench_compression_ratio/chimp128.cpp index a67e5fb..a4c0aba 100644 --- a/publication/source_code/bench_compression_ratio/chimp128.cpp +++ b/publication/source_code/bench_compression_ratio/chimp128.cpp @@ -63,10 +63,10 @@ TEST_F(chimp128_test, test_chimp128_on_whole_datasets) { throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); } - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "chimp128_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "chimp128_compression_ratio.csv", std::ios::out); ofile << "dataset,size,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::cout << dataset.name << std::endl; diff --git a/publication/source_code/bench_compression_ratio/gorillas.cpp b/publication/source_code/bench_compression_ratio/gorillas.cpp index 175deed..6539efc 100644 --- a/publication/source_code/bench_compression_ratio/gorillas.cpp +++ b/publication/source_code/bench_compression_ratio/gorillas.cpp @@ -46,10 +46,10 @@ TEST_F(gorillas_test, test_gorillas_on_whole_datasets) { throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); } - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "gorillas_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "gorillas_compression_ratio.csv", std::ios::out); ofile << "dataset,size,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::cout << dataset.name << std::endl; diff --git a/publication/source_code/bench_compression_ratio/patas.cpp b/publication/source_code/bench_compression_ratio/patas.cpp index 9d09316..46c4464 100644 --- a/publication/source_code/bench_compression_ratio/patas.cpp +++ b/publication/source_code/bench_compression_ratio/patas.cpp @@ -49,15 +49,10 @@ class patas_test : public ::testing::Test { double patas_overhead_per_vector {static_cast(16)}; TEST_F(patas_test, test_patas_on_whole_datasets) { - - if (const auto v = std::getenv("ALP_DATASET_DIR_PATH"); v == nullptr) { - throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); - } - - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "patas_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "patas_compression_ratio.csv", std::ios::out); ofile << "dataset,size,vectors_count\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::cout << dataset.name << std::endl; diff --git a/publication/source_code/bench_compression_ratio/zstd.cpp b/publication/source_code/bench_compression_ratio/zstd.cpp index ed9a72c..747c962 100644 --- a/publication/source_code/bench_compression_ratio/zstd.cpp +++ b/publication/source_code/bench_compression_ratio/zstd.cpp @@ -4,27 +4,21 @@ #include "mapper.hpp" #include "gtest/gtest.h" -// NOLINTBEGIN - class zstd_test : public ::testing::Test { public: double* dbl_arr; void* enc_dbl_arr; void* dec_dbl_arr; - size_t ZSTD_VECTOR_SIZE = + size_t zstd_vector_size = alp::config::ROWGROUP_SIZE; // For Zstd we compress rowgroups since it would be unfair to compress small vectors - size_t ENC_SIZE_UPPER_BOUND = ZSTD_VECTOR_SIZE * 8; - size_t INPUT_SIZE = ZSTD_VECTOR_SIZE * 8; - size_t DEC_SIZE = INPUT_SIZE; + size_t enc_size_upper_bound = zstd_vector_size * 8; + size_t input_size = zstd_vector_size * 8; + size_t dec_size = input_size; void SetUp() override { - dbl_arr = new double[ZSTD_VECTOR_SIZE]; - enc_dbl_arr = malloc(INPUT_SIZE); - dec_dbl_arr = malloc(INPUT_SIZE); - - const auto v = std::getenv("ALP_DATASET_DIR_PATH"); - if (v == nullptr) { throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!"); } - alp_bench::PATHS.ALP_DATASET_BINARY_DIR_PATH = v; + dbl_arr = new double[zstd_vector_size]; + enc_dbl_arr = malloc(input_size); + dec_dbl_arr = malloc(input_size); } ~zstd_test() override { @@ -35,10 +29,10 @@ class zstd_test : public ::testing::Test { }; TEST_F(zstd_test, test_zstd_on_whole_datasets) { - std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "zstd_compression_ratio.csv", std::ios::out); + std::ofstream ofile(alp_bench::get_paths().result_dir_path + "zstd_compression_ratio.csv", std::ios::out); ofile << "dataset,size\n"; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { if (dataset.name.find("bw") != std::string::npos) { continue; } size_t tuples_count; @@ -51,10 +45,10 @@ TEST_F(zstd_test, test_zstd_on_whole_datasets) { std::cout << dataset.name << "\n"; - if (tuples_count < ZSTD_VECTOR_SIZE) { - ZSTD_VECTOR_SIZE = tuples_count; - INPUT_SIZE = ZSTD_VECTOR_SIZE * 8; - ENC_SIZE_UPPER_BOUND = ZSTD_VECTOR_SIZE * 8; + if (tuples_count < zstd_vector_size) { + zstd_vector_size = tuples_count; + input_size = zstd_vector_size * 8; + enc_size_upper_bound = zstd_vector_size * 8; } /* Encode - Decode - Validate. */ @@ -63,21 +57,21 @@ TEST_F(zstd_test, test_zstd_on_whole_datasets) { dbl_arr[vector_idx] = value_to_encode; vector_idx = vector_idx + 1; - if (vector_idx != ZSTD_VECTOR_SIZE) { continue; } + if (vector_idx != zstd_vector_size) { continue; } - processed_tuples += ZSTD_VECTOR_SIZE; + processed_tuples += zstd_vector_size; // Encode - size_t const ENC_SIZE = ZSTD_compress(enc_dbl_arr, ENC_SIZE_UPPER_BOUND, dbl_arr, INPUT_SIZE, 3); // Level 3 + size_t const ENC_SIZE = ZSTD_compress(enc_dbl_arr, enc_size_upper_bound, dbl_arr, input_size, 3); // Level 3 // SUM COMPRESSED SIZE compressed_data_size += ENC_SIZE * 8; // Decode - ZSTD_decompress(dec_dbl_arr, DEC_SIZE, enc_dbl_arr, ENC_SIZE); + ZSTD_decompress(dec_dbl_arr, dec_size, enc_dbl_arr, ENC_SIZE); const auto* dec_dbl_arr_tmp = static_cast(dec_dbl_arr); - for (size_t j = 0; j < ZSTD_VECTOR_SIZE; ++j) { + for (size_t j = 0; j < zstd_vector_size; ++j) { const auto l = dbl_arr[j]; if (const auto r = dec_dbl_arr_tmp[j]; l != r) { std::cerr << j << ", " << dataset.name << "\n"; } ASSERT_EQ(dbl_arr[j], dec_dbl_arr_tmp[j]); @@ -87,8 +81,6 @@ TEST_F(zstd_test, test_zstd_on_whole_datasets) { auto compression_ratio = (double)compressed_data_size / processed_tuples; - ofile << std::fixed << std::setprecision(2) << dataset.name << "," << compression_ratio << std::endl; + ofile << std::fixed << std::setprecision(2) << dataset.name << "," << compression_ratio << "\n"; } } - -// NOLINTEND diff --git a/publication/source_code/bench_speed/bench_alp_cutter_decode.cpp b/publication/source_code/bench_speed/bench_alp_cutter_decode.cpp index 712c8f5..47164a5 100644 --- a/publication/source_code/bench_speed/bench_alp_cutter_decode.cpp +++ b/publication/source_code/bench_speed/bench_alp_cutter_decode.cpp @@ -85,7 +85,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { unffor_left_arr = new (std::align_val_t {64}) uint16_t[VECTOR_SIZE]; glue_arr = new (std::align_val_t {64}) double[VECTOR_SIZE]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (!dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } @@ -163,7 +163,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("alp_decode_cutter") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_alp_cutter_encode.cpp b/publication/source_code/bench_speed/bench_alp_cutter_encode.cpp index cf0a0f8..364768a 100644 --- a/publication/source_code/bench_speed/bench_alp_cutter_encode.cpp +++ b/publication/source_code/bench_speed/bench_alp_cutter_encode.cpp @@ -83,7 +83,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { unffor_left_arr = new (std::align_val_t {64}) uint16_t[VECTOR_SIZE]; glue_arr = new (std::align_val_t {64}) double[VECTOR_SIZE]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (!dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } @@ -159,7 +159,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("alp_encode_cutter") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_alp_encode.cpp b/publication/source_code/bench_speed/bench_alp_encode.cpp index 90ca769..7dd0066 100644 --- a/publication/source_code/bench_speed/bench_alp_encode.cpp +++ b/publication/source_code/bench_speed/bench_alp_encode.cpp @@ -60,7 +60,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { base_arr = new (std::align_val_t {64}) int64_t[1024]; rg_smp_arr = new (std::align_val_t {64}) double[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } @@ -125,7 +125,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("alp_encode_pde") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_alp_without_sampling.cpp b/publication/source_code/bench_speed/bench_alp_without_sampling.cpp index 27eba6d..c46ecee 100644 --- a/publication/source_code/bench_speed/bench_alp_without_sampling.cpp +++ b/publication/source_code/bench_speed/bench_alp_without_sampling.cpp @@ -101,7 +101,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { base_arr = new (std::align_val_t {64}) int64_t[1024]; rg_smp_arr = new (std::align_val_t {64}) double[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } @@ -172,7 +172,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("alp_encode_without_sampling") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_chimp.cpp b/publication/source_code/bench_speed/bench_chimp.cpp index 50275be..751fe97 100644 --- a/publication/source_code/bench_speed/bench_chimp.cpp +++ b/publication/source_code/bench_speed/bench_chimp.cpp @@ -131,7 +131,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { flags = new (std::align_val_t {64}) alp_bench::ChimpConstants::Flags[1024]; leading_zero_unpacked = new (std::align_val_t {64}) uint8_t[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); // check to see that the file was opened correctly: @@ -202,7 +202,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("chimp") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_chimp128.cpp b/publication/source_code/bench_speed/bench_chimp128.cpp index da57133..08b70a9 100644 --- a/publication/source_code/bench_speed/bench_chimp128.cpp +++ b/publication/source_code/bench_speed/bench_chimp128.cpp @@ -216,7 +216,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { leading_zero_unpacked = new (std::align_val_t {64}) uint8_t[1024]; unpacked_data_arr = new (std::align_val_t {64}) alp_bench::UnpackedData[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (dataset.name.find("bw") != std::string::npos) { continue; } @@ -300,7 +300,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("chimp128") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_gorillas.cpp b/publication/source_code/bench_speed/bench_gorillas.cpp index 117df40..1f4de28 100644 --- a/publication/source_code/bench_speed/bench_gorillas.cpp +++ b/publication/source_code/bench_speed/bench_gorillas.cpp @@ -105,7 +105,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { dec_arr = new (std::align_val_t {64}) uint64_t[1024]; flags = new (std::align_val_t {64}) alp_bench::GorillasConstants::Flags[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); if (dataset.name.find("bw") != std::string::npos) { continue; } @@ -160,7 +160,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("gorillas") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_patas.cpp b/publication/source_code/bench_speed/bench_patas.cpp index 310b7b9..bec0652 100644 --- a/publication/source_code/bench_speed/bench_patas.cpp +++ b/publication/source_code/bench_speed/bench_patas.cpp @@ -100,7 +100,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { unpacked_data = new (std::align_val_t {64}) alp_bench::patas::PatasUnpackedValueStats[1024]; dbl_arr = new (std::align_val_t {64}) double[1024]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); // check to see that the file was opened correctly: @@ -147,7 +147,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("patas") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/bench_speed/bench_zstd.cpp b/publication/source_code/bench_speed/bench_zstd.cpp index 5b941aa..ac64617 100644 --- a/publication/source_code/bench_speed/bench_zstd.cpp +++ b/publication/source_code/bench_speed/bench_zstd.cpp @@ -67,7 +67,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) { enc_arr = (void*)new (std::align_val_t {64}) double[131072]; dec_arr = (void*)new (std::align_val_t {64}) double[131072]; - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { size_t tup_c; @@ -103,7 +103,7 @@ int main() { benchmark::Benchmark benchmark = benchmark::create("zstd") .save() - .at(std::string(SOURCE_DIR) + "/alp_pub/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) + .at(std::string(SOURCE_DIR) + "/publication/results/" + benchmark::CmakeInfo::getCmakeToolchainFile()) .print() .add_extra_info(benchmark::CmakeInfo::getCmakeInfo()); benchmark_all(benchmark); diff --git a/publication/source_code/generated/arm64v8/neon_intrinsic_uf1/arm64v8_neon_intrinsic_1024_uf1_falp_test.cpp b/publication/source_code/generated/arm64v8/neon_intrinsic_uf1/arm64v8_neon_intrinsic_1024_uf1_falp_test.cpp index 4c81864..fb3cf99 100644 --- a/publication/source_code/generated/arm64v8/neon_intrinsic_uf1/arm64v8_neon_intrinsic_1024_uf1_falp_test.cpp +++ b/publication/source_code/generated/arm64v8/neon_intrinsic_uf1/arm64v8_neon_intrinsic_1024_uf1_falp_test.cpp @@ -3,37 +3,34 @@ #include "gtest/gtest.h" #include -class arm64v8_neon_intrinsic_1024_uf1_falp: public ::testing::Test -{ - public: - double * dbl_arr; - double * exc_arr; - uint16_t * pos_arr; - uint16_t * exc_c_arr; - int64_t * ffor_arr; - int64_t * unffor_arr; - int64_t * base_arr; - int64_t * dig_arr; - double * dec_dbl_arr; - uint8_t bw; - uint8_t factor; - uint8_t exponent; - double * smp_arr; - void SetUp() override - { - dbl_arr = new double[1024]; - exc_arr = new double[1024]; - pos_arr = new uint16_t[1024]; - dig_arr = new int64_t[1024]; - dec_dbl_arr = new double[1024]; - exc_c_arr = new uint16_t[1024]; - ffor_arr = new int64_t[1024]; - unffor_arr = new int64_t[1024]; - base_arr = new int64_t[1024]; - smp_arr = new double[1024]; +class arm64v8_neon_intrinsic_1024_uf1_falp : public ::testing::Test { +public: + double* dbl_arr; + double* exc_arr; + uint16_t* pos_arr; + uint16_t* exc_c_arr; + int64_t* ffor_arr; + int64_t* unffor_arr; + int64_t* base_arr; + int64_t* dig_arr; + double* dec_dbl_arr; + uint8_t bw; + uint8_t factor; + uint8_t exponent; + double* smp_arr; + void SetUp() override { + dbl_arr = new double[1024]; + exc_arr = new double[1024]; + pos_arr = new uint16_t[1024]; + dig_arr = new int64_t[1024]; + dec_dbl_arr = new double[1024]; + exc_c_arr = new uint16_t[1024]; + ffor_arr = new int64_t[1024]; + unffor_arr = new int64_t[1024]; + base_arr = new int64_t[1024]; + smp_arr = new double[1024]; } - ~arm64v8_neon_intrinsic_1024_uf1_falp () override - { + ~arm64v8_neon_intrinsic_1024_uf1_falp() override { delete[] dbl_arr; delete[] exc_arr; delete[] pos_arr; @@ -45,21 +42,17 @@ class arm64v8_neon_intrinsic_1024_uf1_falp: public ::testing::Test delete[] base_arr; delete[] smp_arr; } -} -; -TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, fused) -{ - for (auto & dataset : alp_bench::alp_dataset) - { - std:: ifstream ifile(dataset.csv_file_path, std::ios:: in ); +}; +TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, fused) { + for (auto& dataset : alp_bench::get_alp_dataset()) { + std::ifstream ifile(dataset.csv_file_path, std::ios::in); ASSERT_EQ(ifile.fail(), false); - alp::state stt; + alp::state stt; if (dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } double num = 0.0; size_t c {0}; - while (ifile >> num) - { + while (ifile >> num) { dbl_arr[c] = num; c = c + 1; } @@ -70,10 +63,14 @@ TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, fused) alp::encoder::analyze_ffor(dig_arr, bw, base_arr); fastlanes::generated::ffor::fallback::scalar::ffor(dig_arr, ffor_arr, bw, base_arr); // Decode - generated::falp::arm64v8::neon::falp(reinterpret_cast < uint64_t * > (ffor_arr), dec_dbl_arr, bw, reinterpret_cast < uint64_t * > (base_arr),stt.fac, stt.exp); + generated::falp::arm64v8::neon::falp(reinterpret_cast(ffor_arr), + dec_dbl_arr, + bw, + reinterpret_cast(base_arr), + stt.fac, + stt.exp); alp::decoder::patch_exceptions(dec_dbl_arr, exc_arr, pos_arr, exc_c_arr); - for (size_t i = 0; i < 1024; ++i) - { + for (size_t i = 0; i < 1024; ++i) { ASSERT_EQ(dbl_arr[i], dec_dbl_arr[i]); } ASSERT_EQ(dataset.exceptions_count, exc_c_arr[0]); @@ -82,19 +79,16 @@ TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, fused) } } -TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, unfused) -{ - for (auto & dataset : alp_bench::alp_dataset) - { - std:: ifstream ifile(dataset.csv_file_path, std::ios:: in ); +TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, unfused) { + for (auto& dataset : alp_bench::get_alp_dataset()) { + std::ifstream ifile(dataset.csv_file_path, std::ios::in); ASSERT_EQ(ifile.fail(), false); - alp::state stt; + alp::state stt; if (dataset.suitable_for_cutting) { continue; } if (dataset.name.find("bw") != std::string::npos) { continue; } double num = 0.0; size_t c {0}; - while (ifile >> num) - { + while (ifile >> num) { dbl_arr[c] = num; c = c + 1; } @@ -108,8 +102,7 @@ TEST_F(arm64v8_neon_intrinsic_1024_uf1_falp, unfused) fastlanes::generated::unffor::fallback::scalar::unffor(ffor_arr, unffor_arr, bw, base_arr); alp::decoder::decode(unffor_arr, stt.fac, stt.exp, dec_dbl_arr); alp::decoder::patch_exceptions(dec_dbl_arr, exc_arr, pos_arr, exc_c_arr); - for (size_t i = 0; i < 1024; ++i) - { + for (size_t i = 0; i < 1024; ++i) { ASSERT_EQ(dbl_arr[i], dec_dbl_arr[i]); } ASSERT_EQ(dataset.exceptions_count, exc_c_arr[0]); diff --git a/publication/source_code/test/test_chimp.cpp b/publication/source_code/test/test_chimp.cpp index 64daec5..2e97943 100644 --- a/publication/source_code/test/test_chimp.cpp +++ b/publication/source_code/test/test_chimp.cpp @@ -42,7 +42,7 @@ class chimp_test : public ::testing::Test { }; TEST_F(chimp_test, test_chimp) { - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); ASSERT_EQ(ifile.fail(), false); diff --git a/publication/source_code/test/test_chimp128.cpp b/publication/source_code/test/test_chimp128.cpp index 178d19d..926e0cb 100644 --- a/publication/source_code/test/test_chimp128.cpp +++ b/publication/source_code/test/test_chimp128.cpp @@ -56,7 +56,7 @@ class chimp128_test : public ::testing::Test { }; TEST_F(chimp128_test, test_chimp) { - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); ASSERT_EQ(ifile.fail(), false); diff --git a/publication/source_code/test/test_gorillas.cpp b/publication/source_code/test/test_gorillas.cpp index c581700..8fa3840 100644 --- a/publication/source_code/test/test_gorillas.cpp +++ b/publication/source_code/test/test_gorillas.cpp @@ -33,7 +33,7 @@ class gorillas_test : public ::testing::Test { }; TEST_F(gorillas_test, test_gorillas) { - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); ASSERT_EQ(ifile.fail(), false); diff --git a/publication/source_code/test/test_patas.cpp b/publication/source_code/test/test_patas.cpp index d81d7b3..3ad5ca8 100644 --- a/publication/source_code/test/test_patas.cpp +++ b/publication/source_code/test/test_patas.cpp @@ -42,7 +42,7 @@ class patas_test : public ::testing::Test { }; TEST_F(patas_test, one_vec) { - for (auto& dataset : alp_bench::alp_dataset) { + for (auto& dataset : alp_bench::get_alp_dataset()) { std::ifstream ifile(dataset.csv_file_path, std::ios::in); ASSERT_EQ(ifile.fail(), false); diff --git a/test/test_alp_sample.cpp b/test/test_alp_sample.cpp index 55dc591..3f2c2d8 100644 --- a/test/test_alp_sample.cpp +++ b/test/test_alp_sample.cpp @@ -189,33 +189,33 @@ class alp_test : public ::testing::Test { /// Test used for correctness of bitwidth and exceptions on the first vector of each dataset TEST_F(alp_test, test_alp_double) { - for (const auto& col : alp_bench::alp_dataset) { + for (const auto& col : alp_bench::get_alp_dataset()) { test_column(col); } } /// Test used for correctness of bitwidth and exceptions on the first vector of generated data TEST_F(alp_test, test_alp_on_generated) { - for (const auto& col : alp_bench::generated_cols) { + for (const auto& col : alp_bench::get_generated_cols()) { test_column(col); } } // Test used for correctness of bitwidth and exceptions on the first vector of edge_case data TEST_F(alp_test, test_alp_on_edge_case) { - for (const auto& col : alp_bench::edge_case) { + for (const auto& col : alp_bench::get_edge_case()) { test_column(col); } } TEST_F(alp_test, alp_float_test_dataset) { - for (const auto& col : alp_bench::float_test_dataset) { + for (const auto& col : alp_bench::get_float_test_dataset()) { test_column(col); } } TEST_F(alp_test, alp_double_test_dataset) { - for (const auto& col : alp_bench::double_test_dataset) { + for (const auto& col : alp_bench::get_double_test_dataset()) { test_column(col); } } \ No newline at end of file