diff --git a/benchmark/self_delimiting_codes/Makefile b/benchmark/self_delimiting_codes/Makefile new file mode 100644 index 000000000..0292393b7 --- /dev/null +++ b/benchmark/self_delimiting_codes/Makefile @@ -0,0 +1,83 @@ +include ../../Make.helper +SRC_DIR = src +BIN_DIR = bin +LIBS = -lsdsl +RES_FILE = results/result.csv #result file of benchmark +VAT_FILE = results/vat.csv #vector assignment table (vector name -> sdsl type) +TC_FILE = results/tc.csv #test case table (contains only test case names) + +#utility +empty:= +space:= $(empty) $(empty) +comma:= , + +#load test cases +TC_IDS := $(call config_ids,test_case.config) +TC_SRC := $(foreach TC_ID,$(TC_IDS),\ + $(call config_select,test_case.config,$(TC_ID),2)) +TC_FILES := $(foreach TC_ID,$(TC_IDS),\ + $(if $(findstring BWT_MTF,$(call config_select,test_case.config,$(TC_ID),6)),\ + ../tmp/BWT_MTF.$(TC_ID),\ + $(call config_select,test_case.config,$(TC_ID),2))) + +all: $(RES_FILE) + +timing: $(RES_FILE) + @cd visualize;make + +#compilation of bwt - mtf - transform algorithm +$(BIN_DIR)/gen_bwt_mtf: $(SRC_DIR)/gen_bwt_mtf.cpp + @$(MY_CXX) $(MY_CXX_FLAGS) $(C_OPTIONS) -L$(LIB_DIR) "$(SRC_DIR)/gen_bwt_mtf.cpp"\ + -I$(INC_DIR) -o "$(BIN_DIR)/gen_bwt_mtf" $(LIBS) -ldivsufsort -ldivsufsort64 + +#generation of MTF of BWT +../tmp/BWT_MTF.%: $(TC_SRC) $(BIN_DIR)/gen_bwt_mtf + $(eval TC_ID:=$*) + $(eval TC_PATH:=$(call config_select,test_case.config,$(TC_ID),2)) + $(eval NUM_BYTE:=$(call config_select,test_case.config,$(TC_ID),5)) + @$(BIN_DIR)/gen_bwt_mtf $(TC_PATH) ../tmp/BWT_MTF.$(TC_ID) ../tmp $(NUM_BYTE) + +#compilation and creation of vector assignment table +$(BIN_DIR)/sdcbenchmark: $(SRC_DIR)/sdc_benchmark.cpp vectors.config compile_options.config + $(eval VTYPES := $(subst $(space),$(comma),$(strip $(call config_column,vectors.config,2)))) + $(eval VNAMES := $(subst $(space),\"$(comma)\",$(strip $(call config_column,vectors.config,3)))) + $(eval VNAMES := $(addprefix {\",$(VNAMES))) + $(eval VNAMES := $(addsuffix \"},$(VNAMES))) + $(eval C_OPTIONS:=$(call config_ids,compile_options.config)) + @echo "Compiling build for vectors $(VNAMES)" + @$(MY_CXX) $(MY_CXX_FLAGS) $(C_OPTIONS) -DVTYPES="$(VTYPES)" -DVNAMES="$(VNAMES)" -L$(LIB_DIR)\ + "$(SRC_DIR)/sdc_benchmark.cpp" -I$(INC_DIR) -o "$(BIN_DIR)/sdcbenchmark" $(LIBS) + $(eval V_IDS := $(call config_ids,vectors.config)) + $(eval V_ASSIGNMENTTABLE := $(subst $(space),\n,$(strip $(foreach V_ID,$(V_IDS),\ + $(call config_select,vectors.config,$(V_ID),3);$(call config_select,vectors.config,$(V_ID),2))))) + @echo "Writing Vector Assignment Table" + @echo "vector;sdsltype" > $(VAT_FILE) + @echo "$(V_ASSIGNMENTTABLE)" >> $(VAT_FILE) + +#execution and creation of test case table +$(RES_FILE): test_case.config $(TC_FILES) $(BIN_DIR)/sdcbenchmark + $(eval ARGS := $(foreach TC_ID,$(TC_IDS),\ + $(call config_select,test_case.config,$(TC_ID),3) $(space) \ + $(if $(findstring BWT_MTF,$(call config_select,test_case.config,$(TC_ID),6)),\ + ../tmp/BWT_MTF.$(TC_ID),\ + $(call config_select,test_case.config,$(TC_ID),2)) $(space) \ + $(call config_select,test_case.config,$(TC_ID),5) ) ) + @echo "Executing Benchmark" + @$(BIN_DIR)/sdcbenchmark $(ARGS) | tee $(RES_FILE) + $(eval TC_TABLE := $(subst $(space),\n,$(strip $(call config_column,test_case.config,3)))) + @echo "Writing Test Case file" + @echo "testcase\\nOverall" > $(TC_FILE) + @echo "$(TC_TABLE)" >> $(TC_FILE) + +include ../Make.download + +clean-build: + @echo "Remove executables" + rm -f $(BIN_DIR)/* + +clean-result: + @echo "Remove results" + rm -f results/* + +cleanall: clean-build clean-result + @cd visualize;make cleanall diff --git a/benchmark/self_delimiting_codes/README.md b/benchmark/self_delimiting_codes/README.md new file mode 100644 index 000000000..643c61762 --- /dev/null +++ b/benchmark/self_delimiting_codes/README.md @@ -0,0 +1,55 @@ +# Benchmarking self delimiting codes + +## Methodology + +Explored dimensions: + + * self - delimiting code implementations + * test cases + * methods (`encoding`, `decoding`) + +## Directory structure + + * [bin](./bin): Contains the executables of the project. + * [results](./results): Contains the results of the experiments. + * [src](./src): Contains the source code of the benchmark. + * [visualize](./visualize): Contains LaTex files and a makefile for generating a report + +## Prerequisites + + * To run the test on larger test cases (>= 200 MB), you should have at least 2 GB + of free memory (some vectors have very poor compression). + * For the visualization you need the following software: + - [pdflatex][LT] to generate the pdf reports. + - [pgfplots][PGFP] version 1.10 installed in [LT] to generate plots in pdf reports. + +## Usage + + * `make timing` compiles the programs, downloads or generates + the test instances, builds the compression vectors, + runs the performance tests and generates a report located at + `visualize/self_delimiting_codes.pdf`. The raw numbers of the encoding / decoding + rates and compression can be found in the file `results/result.csv`. + The used test cases can be found in file `results/tc.csv`. + The tested vectors can be found in file `results/vat.csv`. + The default benchmark took about 6 hours on my machine (Asus P50IJ + Pentium(R) Dual-Core CPU T4500 @ 2.30GHz 2GB). + * All created binaries and test results can be deleted + by calling `make cleanall`. + +## Customization of the benchmark + +The project contains several configuration files: + + * [vectors.config][VCONFIG]: Specify different compression vectors and their used coders. + * [test_case.config][TCCONFIG]: Specify test instances by ID, path, LaTeX-name + for the report, and download URL. + * [compile_options.config][CCONFIG]: Specify compile options by option string. + +Note that the benchmark will execute every combination of vectors and test cases. + +[LT]: http://www.tug.org/applications/pdftex/ "pdflatex" +[PGFP]: http://www.ctan.org/pkg/pgfplots "pgfplots" +[VCONFIG]: ./vectors.config "vectors.config" +[TCCONFIG]: ./test_case.config "test_case.config" +[CCONFIG]: ./compile_options.config "compile_options.config" diff --git a/benchmark/self_delimiting_codes/bin/.gitignore b/benchmark/self_delimiting_codes/bin/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/benchmark/self_delimiting_codes/bin/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/benchmark/self_delimiting_codes/compile_options.config b/benchmark/self_delimiting_codes/compile_options.config new file mode 100644 index 000000000..cebc3e9f2 --- /dev/null +++ b/benchmark/self_delimiting_codes/compile_options.config @@ -0,0 +1,2 @@ +# Compile options +-O3 -funroll-loops -fomit-frame-pointer -ffast-math -DNDEBUG diff --git a/benchmark/self_delimiting_codes/results/.gitignore b/benchmark/self_delimiting_codes/results/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/benchmark/self_delimiting_codes/results/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/benchmark/self_delimiting_codes/src/gen_bwt_mtf.cpp b/benchmark/self_delimiting_codes/src/gen_bwt_mtf.cpp new file mode 100644 index 000000000..7de1b777b --- /dev/null +++ b/benchmark/self_delimiting_codes/src/gen_bwt_mtf.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include + +using namespace sdsl; + +//routine to save a vector in different formats, see lower implementations +template +void saveVector(const INT_VECTOR &v, const char *dest); + +//main function to generate MTF of BWT of an integer vector. +// CSA_WT: used wavelet - tree - based suffix array implementation +// INT_VECTOR: used integer vector for extracting BWT +// num_byte: value indicating how result has to be opened / saved +// srcfile: file from which to generate +// destfile: file where to save result +// tmpdir: directory used for temporary results +// conf_bwt_key: key what is able to fetch bwt after suffix array construction +template +void gen_bwt_mtf(const char *srcfile, const char *destfile, const char *tmpdir, + const char *conf_bwt_key) { + //utility for CSA generation + cache_config cc(false, tmpdir, "gen_bwt_mtf_"); + INT_VECTOR bwt; + + //create suffix array + CSA_WT wt; + construct(wt, srcfile, cc, num_byte); + + //compute alphabet table from suffix array + std::vector alph_tbl( wt.sigma ); + for (uint64_t i = 0; i < wt.sigma; i++) { + alph_tbl.push_back( wt.comp2char[i] ); + } + + //fetch bwt + load_from_file(bwt, cache_file_name(conf_bwt_key, cc)); + + //create mtf + for (uint64_t i = 0; i < bwt.size(); i++) { + uint64_t c = bwt[i]; + //find c in alphabet table and move it to front + uint64_t j = 0; + do { + uint64_t tmp = alph_tbl[j]; + alph_tbl[j++] = c; + c = tmp; + } while (c != alph_tbl.front()); + //and write it's index to mtf transform of bwt + bwt[i] = j-1; + } + + //save everything + saveVector( bwt, destfile ); + + //and free resources + util::delete_all_files(cc.file_map); +} + +//functions for saving an integer vector in different formats +//generic version (raw output) +template +void saveVector(const INT_VECTOR &v, const char *dest) { + std::ofstream out(dest); + out.write((char *)v.data(), num_byte * v.size()); +} +//serialization of integer vector +template<> +void saveVector, 0>(const int_vector<> &v, const char *dest) { + store_to_file(v, dest); +} +//decimal digits +template<> +void saveVector, 'd'>(const int_vector<> &v, const char *dest) { + std::ofstream out(dest); + if (v.size()) out << v[0]; + for (uint64_t i = 1; i < v.size(); i++) { + out << " " << v[i]; + } +} + +//main function +int main(int argc, char* argv[]) { + if (argc != 5) { + std::cout<<"Usage: input_file output_file temp_dir num_byte" << std::endl; + return 1; + } + std::cout << "Calculate MTF Transform of BWT of " << argv[1] + << " and store it to " << argv[2] << std::endl; + + typedef csa_wt<> csa_wt_byte; + typedef csa_wt, 64, 64, sa_order_sa_sampling<>, int_vector<>, int_alphabet<>> csa_wt_int; + + switch (argv[4][0]) { + case 'd': //decimal digits + gen_bwt_mtf, 'd'>(argv[1], argv[2], argv[3], conf::KEY_BWT_INT); + return 0; + case '0': //serialized integer vector + gen_bwt_mtf, 0>(argv[1], argv[2], argv[3], conf::KEY_BWT_INT); + return 0; + case '1': //byte integer vector + gen_bwt_mtf, 1>(argv[1], argv[2], argv[3], conf::KEY_BWT); + return 0; + case '2': //2 byte integer vector + gen_bwt_mtf, 2>(argv[1], argv[2], argv[3], conf::KEY_BWT_INT); + return 0; + case '4': //4 byte integer vector + gen_bwt_mtf, 4>(argv[1], argv[2], argv[3], conf::KEY_BWT_INT); + return 0; + case '8': //8 byte integer vector + gen_bwt_mtf, 8>(argv[1], argv[2], argv[3], conf::KEY_BWT_INT); + return 0; + default: + std::cout << "Illegal num_byte, allowed are 'd', 0, 1, 2, 4, 8" << std::endl; + return 1; + } +} diff --git a/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp b/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp new file mode 100644 index 000000000..2eaf40315 --- /dev/null +++ b/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp @@ -0,0 +1,235 @@ +#include +#include +#include +#include + +/**** Benchmark for self - delimiting codes *********************************** +For information about usage of this benchmark, see displayUsage - function. + +To compile this benchmark, the following macros have to be defined +(e.g. by passing them to compiler): +- VTYPES: a comma - separated list of sdsl vector types to be testet, + e.g. vlc_vector,vlc_vector +- VNAMES: symbolic names of the corresponding vector types, in same order + as in macro VTYPES, defined as a character array. + According to the upper sample on macro VTYPES, VNAMES could be defined as + {"VLC Vector with Elias Gamma Coder","VLC Vector with Elias Delta Coder"} +*/ + + +//assert that needed macros are defined +#ifndef VTYPES +#error "Macro VTYPES with comma - separated list of vector types has to be \ + defined for compiling benchmark" +#endif + +#ifndef VNAMES +#error "Macro VNAMES with an array of characters has to be \ + defined for compiling benchmark" +#endif + +using namespace std; +using namespace sdsl; +using namespace std::chrono; +using timer = std::chrono::high_resolution_clock; + +const char *(vectornames[]) = VNAMES; +const size_t vectorcount = sizeof(vectornames) / sizeof(vectornames[0]); + +struct iv_testresult { //testcase for one defined int vector + double enc_MBperSec; //encoding rate: megabytes per second + double dec_MBperSec; //decoding rate: megabytes per second + double comp_percent; //compression rate: needed space in percentage compared + //to original integer vector +}; + +//benchmark method declaration +template //used vectors for benchmark +bool runTestcase( const int_vector<> &iv, iv_testresult *result ); + +//stuff for nice printing +void displayUsage(const char *pname); +void displayHeading(); +void displayResult( const char *testcase, const iv_testresult *result ); + +int main(const int argc, const char **argv) +{ + //check args + if ((argc - 1) % 3 != 0) { + displayUsage(argv[0]); + return 1; + } + + //set up needed structures + const size_t testcasecount = (argc - 1) / 3; + iv_testresult overallresult[vectorcount]; + + //prepare overall result + for (size_t i = 0; i < vectorcount; i++) { + overallresult[i].enc_MBperSec = 0.0; + overallresult[i].dec_MBperSec = 0.0; + overallresult[i].comp_percent = 0.0; + } + + //start fetching test cases and run benchmark + displayHeading(); + for (size_t i = 0; i < testcasecount; i++) { + const char *testcase = argv[3*i + 1]; + const char *file = argv[3*i + 2]; //file of saved vector + const char *type = argv[3*i + 3]; //type of saved vector + uint8_t v_type = type[0]=='d' ? 'd' : type[0] - '0'; + + //load vector + int_vector<> iv; + if (!load_vector_from_file(iv, file, v_type)) { + cerr << "ERROR: vector from file " << file + << " with type " << type << " could not be loaded" + << endl; + displayUsage(argv[0]); + return 1; + } + + //run test + iv_testresult result[vectorcount]; + if (!runTestcase( iv, result )) { + cerr << "Testcase " << testcase << "failed" << endl; + return 1; + } + + //print result + displayResult( testcase, result ); + + //and sum up results for overall result + for (size_t j = 0; j < vectorcount; j++) { + overallresult[j].enc_MBperSec += result[j].enc_MBperSec; + overallresult[j].dec_MBperSec += result[j].dec_MBperSec; + overallresult[j].comp_percent += result[j].comp_percent; + } + } + + //build average for overall result + for (size_t i = 0; i < vectorcount; i++) { + overallresult[i].enc_MBperSec /= testcasecount; + overallresult[i].dec_MBperSec /= testcasecount; + overallresult[i].comp_percent /= testcasecount; + } + + //and display overall results + displayResult( "Overall", overallresult ); + return 0; +} + +//// BENCHMARK METHODS //////////////////////////////////////////////////////// +template //used compression vector type +bool runSingleTest( const int_vector<> &testcase, iv_testresult &result ) { + //test encoding rate by constructing Vector + auto start = timer::now(); + Vector test( testcase ); + auto stop = timer::now(); + result.enc_MBperSec = size_in_mega_bytes( testcase ) + / duration_cast(stop-start).count() * 1000.0; + + //care for compression rate + result.comp_percent = size_in_mega_bytes(test) + / size_in_mega_bytes(testcase) * 100.0; + + //and finally for decoding rate + //use a trick to decode all values: since (currently) all vectors are + //using sample tables, access the element right before the next sampling + //entry, so everything between 2 samples has to be decoded. + size_t sample_dens = test.get_sample_dens(); + start = timer::now(); + size_t i = sample_dens - 1; + for (; i < test.size(); i += sample_dens) { + test[i]; //acess element right before next sample entry + } + //and finally access last element if not done yet + if (i != test.size() + sample_dens - 1) + test[test.size() - 1]; + stop = timer::now(); + result.dec_MBperSec = size_in_mega_bytes( testcase ) + / duration_cast(stop-start).count() * 1000.0; + + return true; //may use this return type for error detection in future +} + +template //used vectors for benchmark +bool runTestcase( const int_vector<> &testcase, iv_testresult *result ) { + size_t i = 0; + //do variadic template pack expansion + bool testfine[] = { runSingleTest( testcase, result[i++] )... }; + bool testsfine = true; + for (i = 0; i < vectorcount; i++) { + if (!testfine[i]) { + cerr << "Test on Vector " << vectornames[i] + << "failed" << endl; + testsfine = false; + } + } + return testsfine; +} + +//// DISPLAYING OF RESULTS //////////////////////////////////////////////////// + +void displayUsage(const char *pname) { + cerr << "USAGE: " << pname << " [testcase file vectortype]*" + << endl; + cerr << "DESCRIPTION:" << endl; + cerr << "\tThis Program runs a benchmark on self-delimiting " + << "Codes." << endl; + cerr << "\tProgram needs triples of parameters " + << "for each test case, see Parameter section." << endl; + cerr << "\tProgram will test a couple of compression vectors " + << endl + << "\ton measured encoding and decoding rates," << endl + << "\tplus the compression rate in percent " + << "(compared to the original integer vector)" << endl + << "\tfor each testcase." + << endl + << "\tAdditionally, an overall result on different " + << endl << "\tcompression vectors is printed." << endl; + cerr << "\tThe generated output uses a CSV format, so " + << "you may save it to a csv file for better visability" + << endl << "\tand other utilites." << endl; + cerr << "PARAMETERS: The parameters have to be passed as " + << " triples for each test case." << endl + << "\tA Triple consist of " << endl + << "\t\t- testcase: A name for the test case" << endl + << "\t\t- file: a path to the file where the test case" << endl + << "\t\t\t(an integer vector) is contained" << endl + << "\t\t- vectortype: type of saved integer vector" << endl + << "\t\t\t0: serialized int_vector<>" << endl + << "\t\t\t1: byte sequence" << endl + << "\t\t\t2: 16-bit word sequence" << endl + << "\t\t\t4: 32-bit word sequence" << endl + << "\t\t\t8: 64-bit word sequence" << endl + << "\t\t\td: Parse decimal numbers" << endl; + cerr << "TESTET COMPRESSION VECTORS:" << endl; + for (size_t i = 0; i < vectorcount; i++) { + cerr << "\t- " << vectornames[i] << endl; + } +} +void displayHeading() { + cout << left; //left justify + //add a comment how to read values + cout << "# encoding / decoding rate unit: MB/s" << endl; + cout << "# compression : percentage of needed space " + << " compared to original vector" << endl; + //and print a header for csv output + cout << setw(20) << "testcase" + << setw(1) << ";" << setw(20) << "vector" + << setw(1) << ";" << setw(20) << "encodingrate" + << setw(1) << ";" << setw(20) << "decodingrate" + << setw(1) << ";" << "compressionrate" << endl; +} + +void displayResult( const char *testcase, const iv_testresult *result ) { + cout << left << fixed; //prepare cout + for (size_t i = 0; i < vectorcount; i++) { + cout << setw(20) << testcase + << setw(1) << ";" << setw(20) << vectornames[i] + << setw(1) << ";" << setw(20) << result[i].enc_MBperSec + << setw(1) << ";" << setw(20) << result[i].dec_MBperSec + << setw(1) << ";" << result[i].comp_percent << endl; + } +} diff --git a/benchmark/self_delimiting_codes/test_case.config b/benchmark/self_delimiting_codes/test_case.config new file mode 100644 index 000000000..be8c7fcda --- /dev/null +++ b/benchmark/self_delimiting_codes/test_case.config @@ -0,0 +1,33 @@ +# Configuration for test files +# (1) Identifier for test file (consisting of letters, no `.`) +# (2) Path to the test file +# (3) LaTeX name +# (4) Download link (if the test is available online) +# (5) Test file type(0: serialized int_vector<>, 1: byte sequence, 2: 16-bit word sequence, 4: 32-bit word sequence, 8: 64-bit word sequence, d: Parse decimal numbers) +# (6) Input type (PLAIN: (2) is used as input, BWT_MTF: the MTF of BWT of (2) will be constructed and used as input) + +ENGLISH;../data/english.200MB;english.200MB;http://pizzachili.di.unipi.it/texts/nlang/english.200MB.gz;1;PLAIN +DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/texts/xml/dblp.xml.200MB.gz;1;PLAIN +DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz;1;PLAIN +PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz;1;PLAIN +SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz;1;PLAIN +#INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz;1;PLAIN +#EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz;1;PLAIN +#EINSTEIN-en;../data/einstein.en.txt;einstein-en;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.en.txt.gz;1;PLAIN +#PARA;../data/para;para;http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz;1;PLAIN +WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz;1;PLAIN +#E_COLI;../data/Escherichia_Coli;E.coli;http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz;1;PLAIN +#ENWIKISMLINT;../data/enwiki-20130805-pages-articles1.int.sdsl;enwiki-sml-int;http://people.eng.unimelb.edu.au/sgog/data/enwiki-20130805-pages-articles1.int.sdsl.gz;0;PLAIN + +ENGLISH_BWT_MTF;../data/english.200MB;english.200MB-bwt-mtf;http://pizzachili.di.unipi.it/texts/nlang/english.200MB.gz;1;BWT_MTF +DBLPXML_BWT_MTF;../data/dblp.xml.200MB;dblp.xml.200MB-bwt-mtf;http://pizzachili.di.unipi.it/texts/xml/dblp.xml.200MB.gz;1;BWT_MTF +DNA_BWT_MTF;../data/dna.200MB;dna.200MB-bwt-mtf;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz;1;BWT_MTF +PROTEINS_BWT_MTF;../data/proteins.200MB;proteins.200MB-bwt-mtf;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz;1;BWT_MTF +SOURCES_BWT_MTF;../data/sources.200MB;sources.200MB-bwt-mtf;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz;1;BWT_MTF +#INFLUENZA_BWT_MTF;../data/influenza;influenza-bwt-mtf;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz;1;BWT_MTF +#EINSTEIN-de_BWT_MTF;../data/einstein.de.txt;einstein-de-bwt-mtf;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz;1;BWT_MTF +#EINSTEIN-en_BWT_MTF;../data/einstein.en.txt;einstein-en-bwt-mtf;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.en.txt.gz;1;BWT_MTF +#PARA_BWT_MTF;../data/para;para-bwt-mtf;http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz;1;BWT_MTF +#WORLDLEADER_BWT_MTF;../data/world_leaders;world-leaders-bwt-mtf;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz;1;BWT_MTF +#E_COLI_BWT_MTF;../data/Escherichia_Coli;E.coli-bwt-mtf;http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz;1;BWT_MTF +#ENWIKISMLINT_BWT_MTF;../data/enwiki-20130805-pages-articles1.int.sdsl;enwiki-sml-int-bwt-mtf;http://people.eng.unimelb.edu.au/sgog/data/enwiki-20130805-pages-articles1.int.sdsl.gz;0;BWT_MTF diff --git a/benchmark/self_delimiting_codes/vectors.config b/benchmark/self_delimiting_codes/vectors.config new file mode 100644 index 000000000..2c90e62c9 --- /dev/null +++ b/benchmark/self_delimiting_codes/vectors.config @@ -0,0 +1,26 @@ +# This file specifies integer vectors that are used in the benchmark. +# (1) Identifier for test file (consisting of letters, no `.`) +# (2) Integer Vector sdsl-type (no whitespaces) +# (3) LaTeX name of testet Vector (no whitespaces) +# VLC Vectors +VLC_EG;vlc_vector;VLC-Elias-Gamma +VLC_ED;vlc_vector;VLC-Elias-Delta +VLC_FIB;vlc_vector;VLC-Fibonacci +VLC_C2;vlc_vector>;VLC-Comma-Base3 +#VLC_C3;vlc_vector>;VLC-Comma-Base7 +#VLC_C4;vlc_vector>;VLC-Comma-Base15 +#VLC_C5;vlc_vector>;VLC-Comma-Base31 +#VLC_C6;vlc_vector>;VLC-Comma-Base63 +#VLC_C7;vlc_vector>;VLC-Comma-Base127 +#VLC_C8;vlc_vector>;VLC-Comma-Base255 +# ENC Vectors +ENC_EG;enc_vector;ENC-Elias-Gamma +ENC_ED;enc_vector;ENC-Elias-Delta +ENC_FIB;enc_vector;ENC-Fibonacci +ENC_C2;enc_vector>;ENC-Comma-Base3 +#ENC_C3;enc_vector>;ENC-Comma-Base7 +#ENC_C4;enc_vector>;ENC-Comma-Base15 +#ENC_C5;enc_vector>;ENC-Comma-Base31 +#ENC_C6;enc_vector>;ENC-Comma-Base63 +#ENC_C7;enc_vector>;ENC-Comma-Base127 +#ENC_C8;enc_vector>;ENC-Comma-Base255 diff --git a/benchmark/self_delimiting_codes/visualize/Makefile b/benchmark/self_delimiting_codes/visualize/Makefile new file mode 100644 index 000000000..a0d3aa7bb --- /dev/null +++ b/benchmark/self_delimiting_codes/visualize/Makefile @@ -0,0 +1,31 @@ +# Makefile will also automatically generate a tex file with system information + +#utility +empty:= +space:= $(empty) $(empty) + +auto: self_delimiting_codes.pdf + +self_delimiting_codes.pdf: ../results/result.csv ../results/tc.csv ../results/vat.csv + $(eval CPUINFO := $(strip $(patsubst \\%, $(space),\ + $(shell cat /proc/cpuinfo | grep "model name.*" | uniq | cut -d':' -f 2)))) + $(eval MEMINFO := $(strip \ + $(firstword $(shell free -k | grep "Mem.*" | uniq | cut -d':' -f 2)))) + $(eval MEMINFO := $(addsuffix " KB", $(MEMINFO))) + $(eval DISTINFO := $(strip $(patsubst \\%, $(space),\ + $(shell cat /etc/issue)))) + @echo "Creating system information file" + @echo "\\\\begin{tabular}{ll}" > sysinfo.tex + @echo "\\\\toprule" >> sysinfo.tex + @echo "CPU & \\\\verb\\\\$(CPUINFO)\\\\ \\\\\\\\" >> sysinfo.tex + @echo "Total Memory & \\\\verb\\\\$(MEMINFO)\\\\ \\\\\\\\" >> sysinfo.tex + @echo "Distribution & \\\\verb\\\\$(DISTINFO)\\\\ \\\\\\\\" >> sysinfo.tex + @echo "\\\\bottomrule" >> sysinfo.tex + @echo "\\\\end{tabular}" >> sysinfo.tex + @echo "Use pdflatex to generate self_delimiting_codes.pdf" + @pdflatex self_delimiting_codes.tex >> LaTeX.log 2>&1 + +cleanall: + rm -f self_delimiting_codes.pdf self_delimiting_codes.aux \ + self_delimiting_codes.log LaTeX.log sysinfo.tex + diff --git a/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex b/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex new file mode 100644 index 000000000..59d6020a8 --- /dev/null +++ b/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex @@ -0,0 +1,92 @@ +\documentclass{article} + +\usepackage{pgfplots} +\usepackage{pgfplotstable} +\usepackage{color} +\usepackage{booktabs} +\usepackage[section]{placeins} +\pgfplotsset{compat=1.10} + +%load necessary data from result and configuration files +\pgfplotstableread[col sep=semicolon,trim cells]{../results/tc.csv}\testcasetable +\pgfplotstableread[col sep=semicolon,trim cells]{../results/vat.csv}\vectortable + +%some pgf macros and setups +\pgfplotsset{ + discard if not/.style 2 args={ + x filter/.code={ + \edef\tempa{\thisrow{#1}} + \edef\tempb{#2} + \ifx\tempa\tempb + \else + \def\pgfmathresult{inf} + \fi + } + }, + small +} +\pgfkeys{/pgf/number format/.cd,fixed,precision=2} + +%background +\usetikzlibrary{backgrounds} +\definecolor{graphicbackground}{HTML}{F3F3F3} +\pgfkeys{/tikz/.cd, + background color/.initial=graphicbackground, + background color/.get=\backcol, + background color/.store in=\backcol, +} +\tikzset{background rectangle/.style={ + fill=\backcol, + }, + use background/.style={ + show background rectangle + } +} + +\begin{document} +\title{Self -- Delimiting Codes Benchmark} + +\maketitle + +This file contains the results of a benchmark on self delimiting codes +using \texttt{SDSL} Library. Benchmark tested a list of integer vectors +for encoding and decoding rates (both measured in Megabytes per second), +and the achieved compression rate (measured in percentage, compared to +the uncompressed integer vector) on different self -- delimiting codes +and compression vectors. + +\input{testcase.tex} +% iterate over each test case and output its result +\pgfplotstabletypeset[ + begin table=, + end table=, + typeset cell/.style={/pgfplots/table/@cell content={\showTestcaseResult{#1}}}, + before row=, + after row=, + skip coltypes, + TeX comment=, + every head row/.style={output empty row}, + string type, + ]{\testcasetable} + +%write some appendix with additional information +\appendix + +\section{Vector Assignment Table} +\pgfplotstabletypeset[columns={vector,sdsltype}, + columns/vector/.style={column name=Vector,string type}, + columns/sdsltype/.style={column name=SDSL Type, + string replace*={_}{\_}, + assign cell content/.code={ + \pgfkeyssetvalue{/pgfplots/table/@cell content}{\texttt{##1}} + }}, + column type={ll}, + every head row/.style={before row=\toprule,after row=\midrule}, + every last row/.style={after row=\bottomrule}, + row sep=\\,col sep=& + ]{\vectortable} + +\section{System Information} +\input{sysinfo.tex} + +\end{document} diff --git a/benchmark/self_delimiting_codes/visualize/testcase.tex b/benchmark/self_delimiting_codes/visualize/testcase.tex new file mode 100644 index 000000000..675f78fe6 --- /dev/null +++ b/benchmark/self_delimiting_codes/visualize/testcase.tex @@ -0,0 +1,63 @@ +% This file declares a macro for showing the results of one test case. +% macro expects testcase name as parameter. +\newcommand{\showTestcaseResult}[1]{ +\section{#1 Result} + +%draw first picture containing coding rates +\begin{figure}[htb] +\begin{tikzpicture}[use background] +\begin{axis}[ + width=.75\textwidth, + xbar, + xmin=0, + xlabel= {Rate (MB/s)}, + xmajorgrids=true, + enlarge x limits={value=0.2, upper}, + y=1cm, + enlarge y limits={true, abs value=0.75}, + ytick=data, + yticklabels from table={\vectortable}{vector}, + y tick label style={major tick length=0pt}, + y dir=reverse, + legend style={cells={anchor=east},legend pos=outer north east}, + legend reversed=true, + nodes near coords, + nodes near coords align={horizontal}] +\addplot[draw=black,fill=red,discard if not={testcase}{#1}] table + [x=decodingrate,y expr=\coordindex,col sep=semicolon,trim cells] + {../results/result.csv}; +\addplot[draw=black,fill=blue,discard if not={testcase}{#1}] table + [x=encodingrate,y expr=\coordindex,col sep=semicolon,trim cells] + {../results/result.csv}; +\legend{Decoding,Encoding}; +\end{axis} +\end{tikzpicture} +\caption{Coding Rates on Testcase #1} +\end{figure} + +%and second picture containing compression rate +\begin{figure}[htb] +\begin{tikzpicture}[use background] +\begin{axis}[ + width=.75\textwidth, + xbar, + xmin=0, + xlabel= {Compression (in percentage)}, + xmajorgrids=true, + enlarge x limits={value=0.2, upper}, + y=0.6cm, + enlarge y limits={true, abs value=0.75}, + ytick=data, + yticklabels from table={\vectortable}{vector}, + y tick label style={major tick length=0pt}, + y dir=reverse, + nodes near coords, + nodes near coords align={horizontal}] +\addplot[draw=black,fill=green,discard if not={testcase}{#1}] table + [x=compressionrate,y expr=\coordindex,col sep=semicolon,trim cells] + {../results/result.csv}; +\end{axis} +\end{tikzpicture} +\caption{Compression Rate on Testcase #1} +\end{figure} +} diff --git a/include/sdsl/coder.hpp b/include/sdsl/coder.hpp index 65a717e3f..7f46df825 100644 --- a/include/sdsl/coder.hpp +++ b/include/sdsl/coder.hpp @@ -25,6 +25,7 @@ #include "coder_fibonacci.hpp" #include "coder_elias_delta.hpp" #include "coder_elias_gamma.hpp" +#include "coder_comma.hpp" namespace sdsl { diff --git a/include/sdsl/coder_comma.hpp b/include/sdsl/coder_comma.hpp new file mode 100644 index 000000000..afd095c63 --- /dev/null +++ b/include/sdsl/coder_comma.hpp @@ -0,0 +1,319 @@ +/* sdsl - succinct data structures library + Copyright (C) 2009 Simon Gog + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see http://www.gnu.org/licenses/ . +*/ +/*! \file coder_comma.hpp + \brief coder_comma.hpp contains the class sdsl::coder::comma + \author Uwe Baier + */ +#ifndef SDSL_CODER_COMMA_INCLUDED +#define SDSL_CODER_COMMA_INCLUDED + +#include +#include +#include + +namespace sdsl { + +namespace coder { + +//! A class to encode and decode between comma code and binary code. +/*! \author Uwe Baier + * + * Comma coding works as the following: + * First of all, comma coding needs a parameter t_width which indicates + * how big a encoded digit will be (in bits), let's say t_width = 2. + * By use of t_width one can calculate a base for encoding, in detail + * this means + * base = 2^t_width - 1 + * now, given any number it is encoded as the follows: The number gets displayed + * in the calculated base, and each digit of the number is saved with t_width bits. + * To indicate the end of the number, a termination digit is used (namely this is + * the value base). + * Example: + * t_width = 2 => base = 2^2 - 1 = 3 + * Value to be encoded: 15 + * 15 (base 10) = 120 (base 3) + * Encoded value: 120 (plus termination digit) = 01 10 00 11 in binary + * (last digit is termination digit) + * + * \tparam t_width Width of one digit used in comma code + */ +template +class comma { + private: + static_assert(t_width > 1 && t_width <= 32, + "comma coder: Width must be in interval [2,32]"); + + //base in which numbers are coded + static const uint32_t base = (1 << t_width) - 1; + + //table needed for computation of encoding lengths. + //table contains entries of the kind (index, base^index) + //to know how much digits a number needs to be encoded. + static const size_t codelentbllen = ceil(64 / log2(base)); + static std::array codelentbl; + + //utility function to set up codelen table + static std::array createCodeLenTbl(); + + //helper function to encode a single number without + //termination digit + static void encode_in_base(uint64_t x, uint64_t *& z, + uint8_t& offset); + public: + typedef uint64_t size_type; + static const uint8_t min_codeword_length = + t_width; //0 needs t_width bits as termination + + //// ENCODING ///////////////////////////////////////////////// + + //! Get the number of bits that are necessary to encode + // the value w in comma code. + /*! \param w 64bit int to get the length of its comma encoding. + */ + static uint8_t encoding_length(uint64_t w); + + //! Encode one positive integer x to an int_vector + // at bit position start_idx. + /* \param x Positive integer to encode. + \param z Raw data of vector to write the encoded form of x. + \param start_idx Beginning bit index to write the encoded form ox x in z. + */ + static void encode(uint64_t x, uint64_t*& z, uint8_t& offset); + + //! Encode integers contained in vector v into vector z + /* \param v vector containing positive integer values + \param z vector to put the encoded values + */ + template + static bool encode(const int_vector& v, int_vector& z); + + //// DECODING ///////////////////////////////////////////////// + + //! Decode n comma encoded values beginning at start_idx + // in the bitstring "data" + /* \param data Bitstring + \param start_idx Starting index of the decoding. + \param n Number of values to decode from the bitstring. + \param it Iterator to store the values. + */ + template + static uint64_t decode(const uint64_t* data, + const size_type start_idx, size_type n, + t_iter it=(t_iter)nullptr); + + //! Decode n comma gamma encoded integers + // beginning at start_idx in the bitstring "data" + // and return the sum of these values. + /*! \param data Pointer to the beginning + of the comma encoded bitstring. + \param start_idx Index of the first bit + to encode the values from. + \param n Number of values to decode from the bitstring. + Attention: There have to be at least n encoded + values in the bitstring. + */ + static uint64_t decode_prefix_sum(const uint64_t* data, + const size_type start_idx, size_type n); + + //! Decode n comma gamma encoded integers + // beginning at start_idx ending at end_idx (exclusive) + // in the bitstring "data" + // and return the sum of these values. + /*! \param data Pointer to the beginning + of the comma encoded bitstring. + \param start_idx Index of the first bit + to encode the values from. + \param end_idx Index of the last bit + to encode the values from. + \param n Number of values to decode from the bitstring. + Attention: There have to be at least n encoded + values in the bitstring. + */ + static uint64_t decode_prefix_sum(const uint64_t* data, + const size_type start_idx, const size_type end_idx, + size_type n); + + //! Decode vector z containing comma encoded integers + // and store them in vector v. + /*! \param z vector that contains encoded integers. + \param v vector to store the decoded integers + */ + template + static bool decode(const int_vector& z, int_vector& v); + + //interface needs this function for whatever :> + template + static uint64_t* raw_data(int_vector& v) { + return v.m_data; + } +}; + +//// IMPLEMENTATION /////////////////////////////////////////////////////////// + +//// CODELENGTH TABLE SETUP /////////////////////////////// + +template +std::array::codelentbllen> comma::codelentbl = + createCodeLenTbl(); + +template +std::array::codelentbllen> comma::createCodeLenTbl() { + std::array tbl; + uint64_t n = 1; + for (size_t i = 0; i < codelentbllen; i++) { + tbl[i] = n; + n = (n << t_width) - n; //n = n * base + } + return tbl; +} + +//// Encoding ///////////////////////////////////////////// + +template +inline uint8_t comma::encoding_length(uint64_t w) { + //use function table and binary search to determine the number of digits + //needed to encode w in given base. + uint8_t numdigits = + std::upper_bound(codelentbl.begin(), codelentbl.end(), w) + - codelentbl.begin(); + //finally calculate length. + //Don't forget termination character on calculations ;) + return (numdigits + 1) * t_width; +} + +template +void comma::encode_in_base(uint64_t x, uint64_t *& z, + uint8_t& offset) { + if (x) { + uint32_t digit = x % base; //get next digit + //encode digits with higher order + encode_in_base(x / base, z, offset); + //and write own digit + bits::write_int_and_move(z, digit, offset, t_width); + } +} + +template +inline void comma::encode(uint64_t x, uint64_t*& z, uint8_t& offset) { + //encode x itself + encode_in_base(x, z, offset); + //and append the termination digit + bits::write_int_and_move(z, base, offset, t_width); +} + +template +template +bool comma::encode(const int_vector& v, int_vector& z) { + //first, find out how much bits vector z needs to save values + typedef typename int_vector::size_type size_type; + size_type z_bit_size = 0; + for (typename int_vector::const_iterator it = v.begin(), end = v.end(); + it != end; ++it) { + z_bit_size += encoding_length(*it); + } + + //trim vector z to correct size + z.width(v.width()); + z.bit_resize(z_bit_size); //for future may check if resizing works + + //iterate again and save values in z + uint64_t* z_data = z.m_data; + uint8_t offset = 0; + for (typename int_vector::const_iterator it = v.begin(), end = v.end(); + it != end; ++it) { + encode(*it, z_data, offset); + } + return true; +} + +//// DECODING ///////////////////////////////////////////// + +template +template +inline uint64_t comma::decode(const uint64_t* data, + const size_type start_idx, size_type n, t_iter it) { + data += (start_idx >> 6); //jump to byte offset + uint8_t offset = start_idx & 0x3F; //and calculate bit offset + uint64_t value = 0; + for (size_type i = 0; i < n; i++) { + //read next value + uint64_t v = 0; + for (uint32_t digit = (uint32_t)bits::read_int_and_move(data, offset, t_width); //read first digit + digit != base; //while digit is not the terminating digit + v = (v << t_width) - v + digit, //v = v * base + digit + digit = (uint32_t)bits::read_int_and_move(data, offset, t_width)); //and read next digit + //now decide how to handle value + value = (t_sumup) ? value + v : v; + if (t_inc) *(it++) = value; + } + return value; +} + +template +uint64_t comma::decode_prefix_sum(const uint64_t* data, + const size_type start_idx, size_type n) { + //easiest seems to be to use already build function decode... + return decode(data, start_idx, n); + //Note for above: 3rd template parameter ca be any pntr except void * +} + +template +uint64_t comma::decode_prefix_sum(const uint64_t* data, + const size_type start_idx, + SDSL_UNUSED const size_type end_idx, size_type n) { + //end index does not change anything here... + return decode_prefix_sum(data, start_idx, n); +} + +template +template +bool comma::decode(const int_vector& z, int_vector& v) { + //check if bit size is dividable through t_width. + if (z.bit_size() % t_width != 0) return false; + + //calculate num of overall digits in z (including terminating digit) + uint64_t numOfDigits = z.bit_size() / t_width; + //iteration vars for z vector + const uint64_t *z_data = z.data(); + uint8_t z_offset = 0; + //utility to count number of entries in z, and last read digit + uint32_t digit = base; + typename int_vector::size_type n = 0; + + //iterate over all digits. each time a termination digit is + // detected, a encoded number in vector ends. + while (numOfDigits--) { + digit = (uint32_t)bits::read_int_and_move(z_data, z_offset, t_width); + if (digit == base) n++; //termination digit detected + } + + //also, ensure last read digit was a termination digit + if (digit != base) return false; + + //resize vector v + v.width(z.width()); + v.resize(n); + + //and finally decode and save result in v + decode(z.data(), 0, n, v.begin()); + return true; +} + +} //end of namespace coder +} //end of namespace sdsl + +#endif diff --git a/include/sdsl/cst_sada.hpp b/include/sdsl/cst_sada.hpp index 6c33ad38f..4609f7159 100644 --- a/include/sdsl/cst_sada.hpp +++ b/include/sdsl/cst_sada.hpp @@ -26,11 +26,11 @@ #include "iterators.hpp" #include "lcp_support_sada.hpp" #include "select_support_mcl.hpp" +#include "sorted_stack_support.hpp" #include "bp_support.hpp" #include "bp_support_sada.hpp" #include "csa_sada.hpp" // for std initialization of cst_sada #include "cst_iterators.hpp" -#include "cst_sct3.hpp" // this CST is used in the construction #include "util.hpp" #include #include @@ -152,18 +152,76 @@ class cst_sada cst_sada(cache_config& config) { { auto event = memory_monitor::event("bps-dfs"); - cst_sct3<> temp_cst(config, true); - m_bp.resize(4*(temp_cst.bp.size()/2)); - util::set_to_value(m_bp, 0); - size_type idx=0; - for (cst_sct3<>::const_iterator it=temp_cst.begin(), end=temp_cst.end(); it!=end; ++it) { - if (1 == it.visit()) - m_bp[idx] = 1; - if (temp_cst.is_leaf(*it) and temp_cst.root()!= *it) - ++idx; - ++idx; - } - m_bp.resize(idx); + int_vector_buffer<> lcp(cache_file_name(conf::KEY_LCP, config)); + + const bool o_par = true; + const bool c_par = !o_par; + + //trim bps to maximal size of tree + m_bp.resize( 4 * lcp.size() ); + + if (lcp.size() > 0) { + //run from back to front of lcp, enumerate intervals and count + // opening parentheses per position i + sorted_stack_support stack( lcp.size()+1 ); + stack.push( 0 ); //for lcp[n+1] + size_type p = m_bp.size() - 1; + for (size_type i = lcp.size() - 1; i > 0; --i) { + //compute number of opening parentheses at position i + size_type co = 1; //for singleton interval + size_type x = lcp[i]+1; //to indicate start and end of lcp-array + while (stack.top() > x) { + stack.pop(); ++co; + } + if (stack.top() < x) { + stack.push(x); + } + //encode number of opening parenthesis at i as unary number + m_bp[p--] = o_par; + while (--co > 0) m_bp[p--] = c_par; + } + //handle last value lcp[0] separate, since it virtually is a -1, but in real is a 0 + m_bp[p--] = o_par; //code last number of opening parenthesis + while (stack.size() > 1) { //remove all elements except the zero from stack for next run + stack.pop(); + m_bp[p--] = c_par; //move k to first bit before unary number + } + + + //run from front to back of lcp, enumerate intervals, + //write opening parentheses and leave out closing parentheses + size_type q = 0; + for (size_type i = 1; i < lcp.size(); ++i) { + //compute number of opening parentheses at position i-1 using + //the unary coding from the last step + size_type co = 0; + do { + ++co; + } while (m_bp[++p] == c_par); + + //compute number of closing parentheses at position i-1 + size_type cc = 1; //for singleton interval + size_type x = lcp[i]+1; + while (stack.top() > x) { + stack.pop(); ++cc; + } + if (stack.top() < x) { + stack.push(x); + } + //write sequence for position i-1 + while (co-- > 0) m_bp[q++] = o_par; + while (cc-- > 0) m_bp[q++] = c_par; + } + //handle last value lcp[n+1] separate + m_bp[q++] = o_par; + while (!stack.empty()) { + m_bp[q++] = c_par; + stack.pop(); + } + + //trim bps to correct size and stop + m_bp.resize(q); + } } { auto event = memory_monitor::event("bpss-dfs"); diff --git a/include/sdsl/int_vector.hpp b/include/sdsl/int_vector.hpp index cfabd93af..ff343e7d6 100644 --- a/include/sdsl/int_vector.hpp +++ b/include/sdsl/int_vector.hpp @@ -101,6 +101,7 @@ namespace coder class fibonacci; class elias_delta; class elias_gamma; +template class comma; } template @@ -272,6 +273,7 @@ class int_vector friend class coder::elias_delta; friend class coder::elias_gamma; friend class coder::fibonacci; + template friend class coder::comma; friend class memory_manager; friend void util::set_random_bits(int_vector& v, int); diff --git a/test/CoderTest.cpp b/test/CoderTest.cpp index 1365782cc..582251bf3 100644 --- a/test/CoderTest.cpp +++ b/test/CoderTest.cpp @@ -38,9 +38,13 @@ class CoderTest : public ::testing::Test using testing::Types; typedef Types< -coder::elias_delta, + coder::elias_delta, coder::elias_gamma, - coder::fibonacci + coder::fibonacci, + coder::comma<>, + coder::comma<4>, + coder::comma<8>, + coder::comma<16> > Implementations;