Skip to content

Commit

Permalink
Availability & Reproducibility (#16)
Browse files Browse the repository at this point in the history
- no CI for publication, instead single master_script
  • Loading branch information
azimafroozeh authored Sep 23, 2024
1 parent 0e90f0b commit e9c0488
Show file tree
Hide file tree
Showing 40 changed files with 583 additions and 675 deletions.
98 changes: 0 additions & 98 deletions .github/workflows/PUBLICATION.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND ALP_ENABLE_CLANG_TIDY)
else ()
set(CMAKE_CXX_CLANG_TIDY
${CLANG_TIDY_EXE};
-header-filter=include/alp;
-header-filter=include/alp,data/include;
-warnings-as-errors=*;)

endif ()
Expand Down
1 change: 0 additions & 1 deletion availability_reproducibility_initiative_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
- figure 6 : todo

5) Documentation on how to compile, deploy, run the code, and use the scripts:
- Follow the [Publication CI](.github/workflows/PUBLICATION.yaml)
- In [BENCHMARKING.md](/BENCHMARKING.md) we detail how to replicate the experiments and benchmarks presented in
our [publication](https://dl.acm.org/doi/pdf/10.1145/3626717).
6) A link to [a single master script](publication/master_script/master_script.sh) that runs the experiments, collects
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/bench_compression_ratio/alp32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ class alp32_test : public ::testing::Test {
* This test will output and write a file with the estimated bits/value after compression with alp
*/
TEST_F(alp32_test, test_alprd32_on_whole_datasets) {
std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_rd32_compression_ratio.csv", std::ios::out);
std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd32_compression_ratio.csv", std::ios::out);
ofile << "dataset,size,rowgroups_count,vectors_count\n";

for (auto& dataset : alp_bench::sp_datasets) {
for (auto& dataset : alp_bench::get_sp_datasets()) {
if (!dataset.suitable_for_cutting) { continue; }

std::cout << dataset.name << std::endl;
Expand Down
18 changes: 7 additions & 11 deletions benchmarks/bench_compression_ratio/bench_alp_compression_ratio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void read_data(std::vector<double>& data, const std::string& csv_file_path, cons
file.seekg(0, std::ios::beg);

// Ensure the file size is a multiple of the size of a double
if (fileSize % sizeof(double) != 0) { throw std::runtime_error("File size is not a multiple of double size!"); }
// if (fileSize % sizeof(double) != 0) { throw std::runtime_error("File size is not a multiple of double size!"); }
// Calculate the number of doubles
std::size_t numDoubles = fileSize / sizeof(double);

Expand Down Expand Up @@ -327,14 +327,10 @@ class alp_test : public ::testing::Test {
* This test will output and write a file with the estimated bits/value after compression with alp
*/
TEST_F(alp_test, test_alp_on_whole_datasets) {
if (const auto v = std::getenv("ALP_DATASET_DIR_PATH"); v == nullptr) {
throw std::runtime_error("Environment variable ALP_DATASET_DIR_PATH is not set!");
}

std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_compression_ratio.csv", std::ios::out);
std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_compression_ratio.csv", std::ios::out);
ofile << "dataset,size,rowgroups_count,vectors_count\n";

for (auto& dataset : alp_bench::alp_dataset) {
for (auto& dataset : alp_bench::get_alp_dataset()) {
bench_alp_compression_ratio(dataset, ofile);
}
}
Expand All @@ -344,19 +340,19 @@ TEST_F(alp_test, test_alp_on_whole_datasets) {
* This test will output and write a file with the estimated bits/value after compression with alp
*/
TEST_F(alp_test, test_alprd_on_whole_datasets) {
std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "alp_rd_compression_ratio.csv", std::ios::out);
std::ofstream ofile(alp_bench::get_paths().result_dir_path + "alp_rd_compression_ratio.csv", std::ios::out);
ofile << "dataset,size,rowgroups_count,vectors_count\n";

for (auto& dataset : alp_bench::alp_dataset) {
for (auto& dataset : alp_bench::get_alp_dataset()) {
bench_alp_rd_compression_ratio(dataset, ofile);
}
}

TEST_F(alp_test, test_alprd_on_evalimplsts) {
std::ofstream ofile(alp_bench::PATHS.RESULT_DIR_PATH + "evalimplsts.csv", std::ios::out);
std::ofstream ofile(alp_bench::get_paths().result_dir_path + "evalimplsts.csv", std::ios::out);
ofile << "dataset,size,rowgroups_count,vectors_count\n";

for (auto& dataset : alp_bench::evalimplsts) {
for (auto& dataset : alp_bench::get_evalimplsts()) {
bench_alp_rd_compression_ratio(dataset, ofile);
}
}
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_speed/bench_alp_cutter_decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) {
unffor_left_arr = new (std::align_val_t {64}) uint16_t[VECTOR_SIZE];
glue_arr = new (std::align_val_t {64}) double[VECTOR_SIZE];

for (auto& dataset : alp_bench::alp_dataset) {
for (auto& dataset : alp_bench::get_alp_dataset()) {
std::ifstream ifile(dataset.csv_file_path, std::ios::in);
if (!dataset.suitable_for_cutting) { continue; }
if (dataset.name.find("bw") != std::string::npos) { continue; }
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_speed/bench_alp_cutter_encode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) {
unffor_left_arr = new (std::align_val_t {64}) uint16_t[VECTOR_SIZE];
glue_arr = new (std::align_val_t {64}) double[VECTOR_SIZE];

for (auto& dataset : alp_bench::alp_dataset) {
for (auto& dataset : alp_bench::get_alp_dataset()) {
std::ifstream ifile(dataset.csv_file_path, std::ios::in);
if (!dataset.suitable_for_cutting) { continue; }
if (dataset.name.find("bw") != std::string::npos) { continue; }
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_speed/bench_alp_encode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) {
base_arr = new (std::align_val_t {64}) int64_t[1024];
rg_smp_arr = new (std::align_val_t {64}) double[1024];

for (auto& dataset : alp_bench::alp_dataset) {
for (auto& dataset : alp_bench::get_alp_dataset()) {
std::ifstream ifile(dataset.csv_file_path, std::ios::in);
if (dataset.suitable_for_cutting) { continue; }
if (dataset.name.find("bw") != std::string::npos) { continue; }
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_speed/bench_alp_without_sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void benchmark_all(benchmark::Benchmark& benchmark) {
base_arr = new (std::align_val_t {64}) int64_t[1024];
rg_smp_arr = new (std::align_val_t {64}) double[1024];

for (auto& dataset : alp_bench::alp_dataset) {
for (auto& dataset : alp_bench::get_alp_dataset()) {
std::ifstream ifile(dataset.csv_file_path, std::ios::in);
if (dataset.suitable_for_cutting) { continue; }
if (dataset.name.find("bw") != std::string::npos) { continue; }
Expand Down
Empty file added data/full_data/README.md
Empty file.
26 changes: 12 additions & 14 deletions data/include/column.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef COLUMN_HPP
#define COLUMN_HPP

// NOLINTBEGIN

#include <array>
#include <cstdint>
#include <string>
Expand All @@ -21,24 +19,24 @@ struct Column {
};

struct paths {
std::string GENERATED_COLUMNS_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/generated/";
std::string ALP_DATASET_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/samples/";
std::string EDGE_DATASET_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/edge_case/";
std::string RESULT_DIR_PATH = std::string {CMAKE_SOURCE_DIR} + "/publication/";
std::string EVALIMPLSTS_CSV_PATH = std::string {CMAKE_SOURCE_DIR} + "/data/evalimplsts/";

std::string ALP_DATASET_BINARY_DIR_PATH = " ";
std::string generated_columns_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/generated/";
std::string alp_dataset_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/samples/";
std::string edge_dataset_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/edge_case/";
std::string result_dir_path = std::string {CMAKE_SOURCE_DIR} + "/publication/";
std::string evalimplsts_csv_path = std::string {CMAKE_SOURCE_DIR} + "/data/evalimplsts/";
std::string alp_dataset_binary_dir_path = std::string {CMAKE_SOURCE_DIR} + "/data/full_data/";

explicit paths() {
auto v = std::getenv("ALP_DATASET_DIR_PATH");
if (v) { ALP_DATASET_BINARY_DIR_PATH = v; }
const auto v = std::getenv("ALP_DATASET_DIR_PATH");
if (v) { alp_dataset_binary_dir_path = v; }
}
};

inline paths PATHS;
inline paths get_paths() {
static paths PATHS;
return PATHS;
}

} // namespace alp_bench

#endif

// NOLINTEND
Loading

0 comments on commit e9c0488

Please sign in to comment.