From aa9fa768dc099525a6c9677a91ba37acd78fda95 Mon Sep 17 00:00:00 2001 From: ocsmit Date: Mon, 31 Jul 2023 16:51:45 -0400 Subject: [PATCH] Update to add variable length encoding for bit array --- src/bitarr.c | 24 ++++++++ src/bitarr.h | 33 +++++++--- src/bitarr_vl.c | 102 +++++++++++++++++++++++++++++++ src/bitarr_vl.h | 33 ++++++++++ src/bitops.c | 156 +++++++++++++++++++++++++----------------------- src/bitops.h | 98 ++++-------------------------- src/common.h | 12 ++++ src/encoding.c | 23 +------ src/encoding.h | 1 + tests/tests.c | 51 ++++++++++++---- 10 files changed, 333 insertions(+), 200 deletions(-) create mode 100644 src/bitarr_vl.c create mode 100644 src/bitarr_vl.h create mode 100644 src/common.h diff --git a/src/bitarr.c b/src/bitarr.c index 0422516..660cb5e 100644 --- a/src/bitarr.c +++ b/src/bitarr.c @@ -31,3 +31,27 @@ BitArray* BitArray_init(unsigned int A[], uint32_t n, uint8_t element_size, size for (i = 0; i < n; ++i) BitArray_write(bit_arr, i, A[i]); return bit_arr; } + + +unsigned int BitArray_read(BitArray* bit_arr, unsigned int i) +{ + if (i >= bit_arr->n) { + fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__); + exit(OUT_OF_BOUNDS); + } + return bit_read_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size, + (i+1)*bit_arr->element_size-1); +} + +// -- Writing ----------------------------------------------------------------- +void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x) +{ + if (i >= bit_arr->n) { + fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__); + exit(OUT_OF_BOUNDS); + } + bit_write_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size, + (i+1)*bit_arr->element_size-1, x); +} + + diff --git a/src/bitarr.h b/src/bitarr.h index af7f688..7cd6847 100644 --- a/src/bitarr.h +++ b/src/bitarr.h @@ -64,14 +64,10 @@ #include #include #include +#include "bitops.h" +#include "common.h" -typedef enum { - BITARR_SUCCESS, - OUT_OF_BOUNDS, // Indexing error - FILE_ERROR // I/O Error -} BITARR_ERROR; - /** * @struct BitArray @@ -120,8 +116,31 @@ void BitArray_free(BitArray *bitarr); * @param l Maximum number of bits for each element in A * @return pointer to BitArray */ -BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size, +BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size, size_t word_size); + +/** + * @brief Get value from original array at index i + * + * The array held within a BitArray is a compact version of the original. + * We can retrieve this original value by reading the bits from the range + * [i * l, (i+1)*l-1] in the compact array. + * + * @param bit_arr + * @param i + * @return Value at A[i] + */ +unsigned int BitArray_read(BitArray* bit_arr, unsigned int i); + +/** + * @brief Write value to compact bit representation of array + * + * @param bit_arr Pointer to BitArray + * @param i Index in array to write + * @param x Integer to write + */ +void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x); + #endif // BITARR_H_ diff --git a/src/bitarr_vl.c b/src/bitarr_vl.c new file mode 100644 index 0000000..4bc58f0 --- /dev/null +++ b/src/bitarr_vl.c @@ -0,0 +1,102 @@ +#include "bitarr_vl.h" +#include "bitops.h" +#include "encoding.h" + +void VLBitArray_free(VLBitArray *bit_arr) +{ + free(bit_arr->W); + free(bit_arr); +} + +VLBitArray *VLBitArray_init(unsigned int A[], size_t length, size_t k, size_t size) +{ + + // bytes -> bits + size_t size_bits = size * 8; + + // Find length of P + int p_len = ceil_int(length, k); + // Allocate struct and pointer vla + VLBitArray *vlb = calloc(1, sizeof(VLBitArray) + sizeof(size_t) * p_len); + + size_t current_p_pos = 0; + // Create array of size the length of A + uint32_t gamma_A = 0, + g_length = 0, + g_offset = 0, + A_copy[length]; // Empty array of same size + + for (size_t i = 0, j = 0; i < length; ++i) { + // Encode value as A[i] + 1 (gamma encoding can't be zero) + // code will be (g_offset << (g_length+1) | ((1 << g_length))) + g_length = (uint32_t) log2(A[i] + 1); + g_offset = (A[i] + 1) - (1 << g_length); + + // Gamma code is of size length * 2 + size_t p_increment = g_length * 2 + 1; + + // Write gamma code of A[i] to array + bit_write_range( + A_copy, + size_bits, + current_p_pos, + current_p_pos + p_increment, + // Encode as g_offset.g_length + (g_offset << (g_length+1) | ((1 << g_length))) + ); + + // Assign current bit idx to pointer array + if (i % k == 0) vlb->P[j++] = current_p_pos; + current_p_pos += p_increment; + } + + // Maximum number of elements of word size we need to fit total number of bits + size_t max_idx = ceil_int(current_p_pos, size_bits); + // Allocate array, and copy over only the needed bits from A_copy + vlb->W = malloc((size) * max_idx); + memcpy(vlb->W, A_copy, (size) * max_idx); + + // Set struct members + vlb->k = k; + vlb->length = length; + vlb->logical_size = current_p_pos; + vlb->physical_size = max_idx; + vlb->element_size = size_bits; + + return vlb; +} + + +uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i) +{ + if (i >= bit_arr->length) { + fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__); + exit(OUT_OF_BOUNDS); + } + + + size_t curr_idx = bit_arr->P[ceil_int(i+1, bit_arr->k)-1], + idx_diff = i - (ceil_int(i+1, bit_arr->k)-1) * bit_arr->k, + max_idx = 0; + + uint32_t g_length = 0, + chunk = 0; + + int ii = 0; + while (ii <= idx_diff) { + max_idx = curr_idx + bit_arr->element_size - 1; + chunk = bit_read_range( + bit_arr->W, + bit_arr->element_size, + curr_idx, + max_idx + + ((max_idx > bit_arr->logical_size) * (bit_arr->logical_size - max_idx)) - 1 + ); + + curr_idx += 1 + ((find_LSB(chunk)) * 2); + ii++; + } + + return (gamma_decode(chunk) - 1); +} + diff --git a/src/bitarr_vl.h b/src/bitarr_vl.h new file mode 100644 index 0000000..719774c --- /dev/null +++ b/src/bitarr_vl.h @@ -0,0 +1,33 @@ +#ifndef BITARR_VL_ +#define BITARR_VL_ + +#include +#include +#include +#include +#include + +#include "common.h" +#include "bitops.h" + +typedef struct { + size_t k; + size_t length; // Length of A + size_t logical_size; // Length of B + size_t physical_size; // Length of W + size_t element_size; // Size of each word in W + uint32_t *W; + size_t P[]; +} VLBitArray; + + +void VLBitArray_free(VLBitArray *bit_arr); + +VLBitArray *VLBitArray_init( + unsigned int A[], size_t length, size_t k, size_t size +); + + +uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i); + +#endif // !BITARR_VL_ diff --git a/src/bitops.c b/src/bitops.c index 61e9611..2408ab0 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -3,118 +3,124 @@ * @brief Operations for BitArray */ +#include "bitops.h" +#include -#include "bitarr.h" -extern inline unsigned int sig_bit_idx(unsigned int j, unsigned int word_size) +uint32_t find_LSB(uint32_t v) { - return ((j) % word_size) + 1; + // modified from: + // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup + static const uint32_t Mod37BitPosition[] = { + 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4, + 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5, + 20, 8, 19, 18 + }; + + // map a bit value mod 37 to its position + return Mod37BitPosition[(-v & v) % 37]; } -// -- Single bit ops ---------------------------------------------------------- -unsigned int BitArray_bitread(BitArray* bit_arr, unsigned int j) { - return (bit_arr->v[j/bit_arr->width] >> (j % bit_arr->width)) & 1; -} - - -void BitArray_bitset(BitArray* bit_arr, unsigned int j) +uint32_t find_MSB(uint32_t v) { - // Shift word left to bit idx, OR w/ 1 - bit_arr->v[j/bit_arr->width] |= 1 << (j % bit_arr->width); -} -void BitArray_bitclear(BitArray* bit_arr, unsigned int j) -{ - // Shift word left to bit idx, AND w/ NOT(1) - bit_arr->v[j/bit_arr->width] &= ~(1 << (j % bit_arr->width)); -} + static const uint32_t MultiplyDeBruijnBitPosition[32] = + { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + v |= v >> 1; // first round down to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; -// -- Reading ----------------------------------------------------------------- + return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; +} -unsigned int BitArray_bitsread( - BitArray* bit_arr, unsigned int j1, unsigned int j -) -{ - if (j1 > j) return 0; // Early return if start idx > end idx - // Confined w/in single word - if (j1 / bit_arr->width == j / bit_arr->width) { - return ( - // Shift word right - (bit_arr->v[j/bit_arr->width] >> (j1 % bit_arr->width)) & - // AND on bit vector of 1s the necessary length to extract only needed - // bits - ((1 << (j-j1+1)) - 1) - ); - } - - // Spans two words - return ( - // Get bits in the first word - (bit_arr->v[j1/bit_arr->width] >> (j1 % bit_arr->width)) | - // Bits in second word - (bit_arr->v[j/bit_arr->width] & ((1 << ((j+1) % bit_arr->width)) - 1)) << - // Shift bits from second word n bits from first word left to make - // room for concatenation - (bit_arr->width - (j1 % bit_arr->width)) - ); +// -- Writing ----------------------------------------------------------------- +void bit_set(uint32_t *bit_arr, size_t size, size_t j) +{ + // Shift word left to bit idx, OR w/ 1 + bit_arr[j/size] |= 1 << (j % size); } -unsigned int BitArray_read(BitArray* bit_arr, unsigned int i) +void bit_clear(uint32_t* bit_arr, size_t size, size_t j) { - if (i >= bit_arr->n) { - fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__); - exit(OUT_OF_BOUNDS); - } - return BitArray_bitsread(bit_arr, i*bit_arr->element_size, - (i+1)*bit_arr->element_size-1); + // Shift word left to bit idx, AND w/ NOT(1) + bit_arr[j/size] &= ~(1 << (j % size)); } -// -- Writing ----------------------------------------------------------------- -void BitArray_bitswrite( - BitArray* bit_arr, unsigned int j1, unsigned int j, unsigned int x +void bit_write_range( + uint32_t *bit_arr, size_t w, unsigned int j1, unsigned int j, unsigned int x ) { if (j1 > j) return; // Early return if start idx > end idx - unsigned int w = bit_arr->width; // Confined w/in single word - if (j1 / bit_arr->width == j / bit_arr->width) { + if (j1 / w == j / w) { // Clear bits - bit_arr->v[j/bit_arr->width] &= ( - ~((unsigned) ((1 << (j-j1+1)) - 1) << (j1 % bit_arr->width)) + bit_arr[j/w] &= ( + ~((unsigned) ((1 << (j-j1+1)) - 1) << (j1 % w)) ); // Write x bits - bit_arr->v[j/bit_arr->width] |= x << (j1 % bit_arr->width); - } else { + bit_arr[j/w] |= x << (j1 % w); + } else { // Spans two words - bit_arr->v[j1/bit_arr->width] = ( + bit_arr[j1/w] = ( // Get bits in first word to store lower bits - (bit_arr->v[j1/bit_arr->width] & ((1 << (j1 % bit_arr->width)) - 1)) | - // Write bits - (x << (j1 % bit_arr->width)) + (bit_arr[j1/w] & ((1 << (j1 % w)) - 1)) | + // Write bits + (x << (j1 % w)) ); - // - bit_arr->v[j/bit_arr->width] = ( + + bit_arr[j/w] = ( // Get bits in second word to store lower bits - (bit_arr->v[j/bit_arr->width] & ~((1 << ((j+1) % w)) - 1)) | - // Write bits + (bit_arr[j/w] & ~((1 << ((j+1) % w)) - 1)) | + // Write bits (x >> (w - (j1 % w))) ); } } -void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x) +// -- Reading ----------------------------------------------------------------- +unsigned bit_read(uint32_t *bit_arr, size_t size, size_t j) { - if (i >= bit_arr->n) { - fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__); - exit(OUT_OF_BOUNDS); + return (bit_arr[j/size] >> (j % size)) & 1; +} + +unsigned int bit_read_range( + uint32_t *bit_arr, size_t width, unsigned int j1, unsigned int j +) +{ + if (j1 > j) return 0; // Early return if start idx > end idx + + // Confined w/in single word + if (j1 / width == j / width) { + return ( + // Shift word right + (bit_arr[j/width] >> (j1 % width)) & + // AND on bit vector of 1s the necessary length to extract only needed + // bits + ((1 << (j-j1+1)) - 1) + ); } - BitArray_bitswrite(bit_arr, i*bit_arr->element_size, - (i+1)*bit_arr->element_size-1, x); + + // Spans two words + return ( + // Get bits in the first word + (bit_arr[j1/width] >> (j1 % width)) | + // Bits in second word + (bit_arr[j/width] & ((1 << ((j+1) % width)) - 1)) << + // Shift bits from second word n bits from first word left to make + // room for concatenation + (width - (j1 % width)) + ); } + diff --git a/src/bitops.h b/src/bitops.h index 4579042..f1d07a6 100644 --- a/src/bitops.h +++ b/src/bitops.h @@ -2,95 +2,23 @@ #define BITOPS_H_ #include -#include "bitarr.h" +#include +#include "common.h" -/** - * @brief Find significant bit of index j in compressed entry - * - * @param j - * @param word_size - * @return bit index - */ -extern inline unsigned int sig_bit_idx(unsigned int j, unsigned int word_size); +uint32_t find_LSB(uint32_t v); +uint32_t find_MSB(uint32_t v); -/** - * @brief Read single bit at index `j` - * - * @param bit_arr Pointer to BitArray - * @param j Index - * @return bit value - */ -unsigned int BitArray_bitread(BitArray* bit_arr, unsigned int j); +void bit_set(uint32_t *bit_arr, size_t size, size_t j); +void bit_clear(uint32_t *bit_arr, size_t size, size_t j); +void bit_write_range( + uint32_t *bit_arr, size_t w, unsigned int j1, unsigned int j, unsigned int x +); -/** - * @brief Set bit at index `j` - * - * Sets the the bit = 1 at index `j`. If bit is already equal to 1 then nothing - * happens. - * - * @param bit_arr Pointer to BitArray - * @param j Index - */ -void BitArray_bitset(BitArray* bit_arr, unsigned int j); +unsigned bit_read(uint32_t *bit_arr, size_t size, size_t j); -/** - * @brief Clears bit at index `j` - * - * Sets the the bit = 0 at index `j`. If bit is already equal to 0 then nothing - * happens. - * - * @param bit_arr Pointer to BitArray - * @param j Index - */ -void BitArray_bitclear(BitArray* bit_arr, unsigned int j); - -/** - * @brief Reads range of bits B[j1, j] - * - * @param bit_arr - * @param j1 - * @param j - * @return Integer constructed from bits [j1, j] - */ -unsigned int BitArray_bitsread(BitArray* bit_arr, unsigned int j1, unsigned int j); - - -/** - * @brief Get value from original array at index i - * - * The array held within a BitArray is a compact version of the original. - * We can retrieve this original value by reading the bits from the range - * [i * l, (i+1)*l-1] in the compact array. - * - * @param bit_arr - * @param i - * @return Value at A[i] - */ -unsigned int BitArray_read(BitArray* bit_arr, unsigned int i); - - -/** - * @brief Write to range of bits - * - * Abstract function, most will want to use BitArray_write instead - * - * @param bit_arr Pointer to BitArray - * @param j1 Starting index for virtual bit array - * @param j Ending index for virtual bit array - * @param x Integer to write - */ -void BitArray_bitswrite(BitArray* bit_arr, unsigned int j1, unsigned int j, unsigned int x); - - - -/** - * @brief Write value to compact bit representation of array - * - * @param bit_arr Pointer to BitArray - * @param i Index in array to write - * @param x Integer to write - */ -void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x); +unsigned int bit_read_range( + uint32_t *bit_arr, size_t width, unsigned int j1, unsigned int j +); #endif // !BITOPS_H_ diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..7284e16 --- /dev/null +++ b/src/common.h @@ -0,0 +1,12 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +#define ceil_int(x, y) (1 + (x - 1) / y) + +typedef enum { + BITARR_SUCCESS, + OUT_OF_BOUNDS, // Indexing error + FILE_ERROR // I/O Error +} BITARR_ERROR; + +#endif \ No newline at end of file diff --git a/src/encoding.c b/src/encoding.c index 2a1fe56..e62f0b6 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -5,25 +5,6 @@ #include "encoding.h" -#include -#include - - -uint32_t count_trailing_zeros(unsigned int v) -{ - // modified from: - // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup - static const uint32_t Mod37BitPosition[] = { - 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4, - 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5, - 20, 8, 19, 18 - }; - - // map a bit value mod 37 to its position - return Mod37BitPosition[(-v & v) % 37]; -} - - unsigned int unary_encode(uint32_t k) { // code 1 . 0 k times (e.g. 3 := 1 . 000 @@ -39,7 +20,7 @@ unsigned int gamma_encode(uint32_t k) length = (uint32_t) log2(k); offset = k - (1 << length); - // Unary coded offset . length + // Unary coded offset . length // e.g. 13 (l = 3, o = 5) := 101.1000 return (offset << (length+1) | ((1 << length))); } @@ -49,7 +30,7 @@ unsigned int gamma_decode(unsigned int k) uint32_t l = 0; uint32_t o = 0; - l = count_trailing_zeros(k); + l = find_LSB(k); // idx of LSB set to 1 k >>= l; // Mask for l bits diff --git a/src/encoding.h b/src/encoding.h index fecda75..2c6cea4 100644 --- a/src/encoding.h +++ b/src/encoding.h @@ -3,6 +3,7 @@ #include #include +#include "bitops.h" diff --git a/tests/tests.c b/tests/tests.c index b5e8c7d..5d0a8f0 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -6,6 +6,7 @@ #include "../src/bitops.h" #include "../src/bitarr_io.h" #include "../src/encoding.h" +#include "../src/bitarr_vl.h" @@ -15,9 +16,10 @@ BEGIN_TESTING // -- Data -------------------------------------------------------------------- unsigned int A[10] = { 20, 18, 22, 22, 16, 21, 11, 22, 21, 21 }; +unsigned int A_vl[10] = { 0, 1, 0, 2, 5, 1, 3, 2, 8, 2 }; /* -* Both of binary representations of b have been flipped since when reading +* Both of binary representations of b have been flipped since when reading * individual bits the most from array A, the least significant bit will be read * first from each int. * @@ -61,7 +63,7 @@ TEST("single bit read") { unsigned int b; for (unsigned int i = 0; i < 64; ++i) { - b = BitArray_bitread(bit_arr, i); + b = bit_read(bit_arr->v, bit_arr->width, i); assert(b == B_sig_ordered[i]); } printf("✔ bit read passed\n"); @@ -71,26 +73,26 @@ TEST("bit set & clear") { unsigned int og_bit, nu_bit, idx; idx = 2; - og_bit = BitArray_bitread(bit_arr, 2); - + og_bit = bit_read(bit_arr->v, bit_arr->width, idx); + // 1 -> 1 - BitArray_bitset(bit_arr, idx); - nu_bit = BitArray_bitread(bit_arr, idx); + bit_set(bit_arr->v, bit_arr->width, idx); + nu_bit = bit_read(bit_arr->v, bit_arr->width, idx); assert((og_bit & nu_bit) == 1); // 1 -> 0 - BitArray_bitclear(bit_arr, idx); - nu_bit = BitArray_bitread(bit_arr, idx); + bit_clear(bit_arr->v, bit_arr->width, idx); + nu_bit = bit_read(bit_arr->v, bit_arr->width, idx); assert(nu_bit == 0); // 0 -> 0 - BitArray_bitclear(bit_arr, idx); - nu_bit = BitArray_bitread(bit_arr, idx); + bit_clear(bit_arr->v, bit_arr->width, idx); + nu_bit = bit_read(bit_arr->v, bit_arr->width, idx); assert(nu_bit == 0); // 0 -> 1 (back to original) - BitArray_bitset(bit_arr, idx); - nu_bit = BitArray_bitread(bit_arr, idx); + bit_set(bit_arr->v, bit_arr->width, idx); + nu_bit = bit_read(bit_arr->v, bit_arr->width, idx); assert((og_bit & nu_bit) == 1); printf("✔ bit set/clear passed\n"); } @@ -144,6 +146,31 @@ TEST("Gamma encoding") printf("✔ Gamma coding\n"); } +TEST("VL BitArray") +{ + + + uint32_t correct_W_vla[2] = { 415519957, 3 }; + VLBitArray *vlb = VLBitArray_init(A_vl, 10, 4, sizeof(uint32_t)); + uint32_t AA[31] = {1, 100, 200, 11, 1, 50, 1000}; + AA[20] = 20; + AA[30] = 10000; + VLBitArray *vlb1 = VLBitArray_init(AA, 31, 30, sizeof(uint32_t)); + + for (size_t i = 0; i < 2; i++) { + assert(correct_W_vla[i] == vlb->W[i]); + } + for (size_t i = 0; i < 10; ++i) assert(A_vl[i] == VLBitArray_read(vlb, i)); + + VLBitArray_free(vlb); + + for (size_t i = 0; i < 31; ++i) { + assert(AA[i] == VLBitArray_read(vlb1, i)); + } + + printf("✔ Variable Length BitArray\n"); +} +