Skip to content

Commit

Permalink
Update to add variable length encoding for bit array
Browse files Browse the repository at this point in the history
  • Loading branch information
ocsmit committed Jul 31, 2023
1 parent 7806827 commit aa9fa76
Show file tree
Hide file tree
Showing 10 changed files with 333 additions and 200 deletions.
24 changes: 24 additions & 0 deletions src/bitarr.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,27 @@ BitArray* BitArray_init(unsigned int A[], uint32_t n, uint8_t element_size, size
for (i = 0; i < n; ++i) BitArray_write(bit_arr, i, A[i]);
return bit_arr;
}


unsigned int BitArray_read(BitArray* bit_arr, unsigned int i)
{
if (i >= bit_arr->n) {
fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
exit(OUT_OF_BOUNDS);
}
return bit_read_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size,
(i+1)*bit_arr->element_size-1);
}

// -- Writing -----------------------------------------------------------------
void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x)
{
if (i >= bit_arr->n) {
fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
exit(OUT_OF_BOUNDS);
}
bit_write_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size,
(i+1)*bit_arr->element_size-1, x);
}


33 changes: 26 additions & 7 deletions src/bitarr.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,10 @@
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include "bitops.h"
#include "common.h"


typedef enum {
BITARR_SUCCESS,
OUT_OF_BOUNDS, // Indexing error
FILE_ERROR // I/O Error
} BITARR_ERROR;


/**
* @struct BitArray
Expand Down Expand Up @@ -120,8 +116,31 @@ void BitArray_free(BitArray *bitarr);
* @param l Maximum number of bits for each element in A
* @return pointer to BitArray
*/
BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size,
BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size,
size_t word_size);



/**
* @brief Get value from original array at index i
*
* The array held within a BitArray is a compact version of the original.
* We can retrieve this original value by reading the bits from the range
* [i * l, (i+1)*l-1] in the compact array.
*
* @param bit_arr
* @param i
* @return Value at A[i]
*/
unsigned int BitArray_read(BitArray* bit_arr, unsigned int i);

/**
* @brief Write value to compact bit representation of array
*
* @param bit_arr Pointer to BitArray
* @param i Index in array to write
* @param x Integer to write
*/
void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x);

#endif // BITARR_H_
102 changes: 102 additions & 0 deletions src/bitarr_vl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include "bitarr_vl.h"
#include "bitops.h"
#include "encoding.h"

void VLBitArray_free(VLBitArray *bit_arr)
{
free(bit_arr->W);
free(bit_arr);
}

VLBitArray *VLBitArray_init(unsigned int A[], size_t length, size_t k, size_t size)
{

// bytes -> bits
size_t size_bits = size * 8;

// Find length of P
int p_len = ceil_int(length, k);
// Allocate struct and pointer vla
VLBitArray *vlb = calloc(1, sizeof(VLBitArray) + sizeof(size_t) * p_len);

size_t current_p_pos = 0;
// Create array of size the length of A
uint32_t gamma_A = 0,
g_length = 0,
g_offset = 0,
A_copy[length]; // Empty array of same size

for (size_t i = 0, j = 0; i < length; ++i) {
// Encode value as A[i] + 1 (gamma encoding can't be zero)
// code will be (g_offset << (g_length+1) | ((1 << g_length)))
g_length = (uint32_t) log2(A[i] + 1);
g_offset = (A[i] + 1) - (1 << g_length);

// Gamma code is of size length * 2
size_t p_increment = g_length * 2 + 1;

// Write gamma code of A[i] to array
bit_write_range(
A_copy,
size_bits,
current_p_pos,
current_p_pos + p_increment,
// Encode as g_offset.g_length
(g_offset << (g_length+1) | ((1 << g_length)))
);

// Assign current bit idx to pointer array
if (i % k == 0) vlb->P[j++] = current_p_pos;
current_p_pos += p_increment;
}

// Maximum number of elements of word size we need to fit total number of bits
size_t max_idx = ceil_int(current_p_pos, size_bits);
// Allocate array, and copy over only the needed bits from A_copy
vlb->W = malloc((size) * max_idx);
memcpy(vlb->W, A_copy, (size) * max_idx);

// Set struct members
vlb->k = k;
vlb->length = length;
vlb->logical_size = current_p_pos;
vlb->physical_size = max_idx;
vlb->element_size = size_bits;

return vlb;
}


uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i)
{
if (i >= bit_arr->length) {
fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
exit(OUT_OF_BOUNDS);
}


size_t curr_idx = bit_arr->P[ceil_int(i+1, bit_arr->k)-1],
idx_diff = i - (ceil_int(i+1, bit_arr->k)-1) * bit_arr->k,
max_idx = 0;

uint32_t g_length = 0,
chunk = 0;

int ii = 0;
while (ii <= idx_diff) {
max_idx = curr_idx + bit_arr->element_size - 1;
chunk = bit_read_range(
bit_arr->W,
bit_arr->element_size,
curr_idx,
max_idx +
((max_idx > bit_arr->logical_size) * (bit_arr->logical_size - max_idx)) - 1
);

curr_idx += 1 + ((find_LSB(chunk)) * 2);
ii++;
}

return (gamma_decode(chunk) - 1);
}

33 changes: 33 additions & 0 deletions src/bitarr_vl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#ifndef BITARR_VL_
#define BITARR_VL_

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <math.h>

#include "common.h"
#include "bitops.h"

typedef struct {
size_t k;
size_t length; // Length of A
size_t logical_size; // Length of B
size_t physical_size; // Length of W
size_t element_size; // Size of each word in W
uint32_t *W;
size_t P[];
} VLBitArray;


void VLBitArray_free(VLBitArray *bit_arr);

VLBitArray *VLBitArray_init(
unsigned int A[], size_t length, size_t k, size_t size
);


uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i);

#endif // !BITARR_VL_
Loading

0 comments on commit aa9fa76

Please sign in to comment.