LEB128 Encoding/Decoding Library

leb128 is a lightweight, header-only C library for encoding and decoding LEB128 (Little Endian Base 128) values. Supports both signed (LEB128) and unsigned (ULEB128) integers with streaming, validation, and error handling.

Usage

No compilation or linking steps are required. Simply copy leb128.h to your project repository and include it as a header.

Example Program

#include "leb128.h"
#include <stdio.h>
#include <inttypes.h>

int main() {
    // Encode an unsigned 64-bit integer to ULEB128
    uint64_t value = 624485;
    uint8_t buffer[ULEB128_MAX_BYTES];
    size_t bytes_written;
    
    int result = uleb128_encode(value, buffer, sizeof(buffer), &bytes_written);
    if (result == LEB128_SUCCESS) {
        printf("Encoded %" PRIu64 " into %zu bytes\n", value, bytes_written);
        
        // Decode the ULEB128 value to an unsigned 64-bit integer
        uint64_t decoded;
        size_t bytes_read;
        uleb128_decode(buffer, bytes_written, &decoded, &bytes_read);
        printf("Decoded: %" PRIu64 "\n", decoded);
    }
    
    return 0;
}

Note

For automatic allocation functions, link with standard C library (-lc).

Testing

Build the test suite (tests/test_all.c) and the additional examples program (examples/leb128_examples.c) by running make.

API Reference

Error Codes

#define LEB128_SUCCESS          0  // Operation successful
#define LEB128_BUFFER_SMALL     1  // Buffer too small for operation
#define LEB128_OVERFLOW         2  // Value overflow during decode
#define LEB128_INVALID_INPUT    3  // Invalid input parameters
#define LEB128_INVALID_ENCODING 4  // Malformed LEB128 data

ULEB128 (Unsigned) Functions

Basic Encoding/Decoding

// Calculate bytes needed for encoding
size_t uleb128_size(uint64_t value);

// Encode value to buffer
int uleb128_encode(uint64_t value, uint8_t *buffer, 
                   size_t buffer_size, size_t *bytes_written);

// Decode value from buffer
int uleb128_decode(const uint8_t *buffer, size_t buffer_size,
                   uint64_t *value, size_t *bytes_read);

Automatic Memory Management

// Encode with automatic allocation (caller must free())
uint8_t* uleb128_encode_alloc(uint64_t value, size_t *bytes_written);

Performance

// Fast encoding for values 0-127 (single byte)
int uleb128_encode_fast_small(uint8_t value, uint8_t *buffer);

// Fast decoding check for single-byte values
int uleb128_decode_fast_small(uint8_t byte, uint8_t *value);

LEB128 (Signed) Functions

// Calculate bytes needed for signed encoding
size_t leb128_size(int64_t value);

// Encode signed value
int leb128_encode(int64_t value, uint8_t *buffer,
                  size_t buffer_size, size_t *bytes_written);

// Decode signed value
int leb128_decode(const uint8_t *buffer, size_t buffer_size,
                  int64_t *value, size_t *bytes_read);

// Encode with automatic allocation
uint8_t* leb128_encode_alloc(int64_t value, size_t *bytes_written);

Multi-Value Operations

// Decode multiple unsigned values
int uleb128_decode_multi(const uint8_t *buffer, size_t buffer_size,
                         uint64_t *values, size_t max_values,
                         size_t *values_decoded);

// Decode multiple signed values  
int leb128_decode_multi(const uint8_t *buffer, size_t buffer_size,
                        int64_t *values, size_t max_values,
                        size_t *values_decoded);

// Calculate total size for multiple values
size_t uleb128_multi_size(const uint64_t *values, size_t count);
size_t leb128_multi_size(const int64_t *values, size_t count);

Streaming Interface

// Stream structure
typedef struct leb128_stream leb128_stream_t;

// Initialize stream
int leb128_stream_init(leb128_stream_t *stream, const uint8_t *data, size_t size);

// Stream status
bool leb128_stream_eof(const leb128_stream_t *stream);
size_t leb128_stream_remaining(const leb128_stream_t *stream);
size_t leb128_stream_position(const leb128_stream_t *stream);
int leb128_stream_error(const leb128_stream_t *stream);

// Stream navigation
int leb128_stream_reset(leb128_stream_t *stream);
int leb128_stream_seek(leb128_stream_t *stream, size_t position);

// Read values from stream
int leb128_stream_read_uleb128(leb128_stream_t *stream, uint64_t *value);
int leb128_stream_read_leb128(leb128_stream_t *stream, int64_t *value);

// Utility functions
int leb128_stream_peek(const leb128_stream_t *stream, uint8_t *byte);
int leb128_stream_skip(leb128_stream_t *stream, bool is_signed);

Validation and Utilities

// Validate encoded data
bool leb128_validate(const uint8_t *buffer, size_t buffer_size, bool is_signed);

// Compare encoded values without decoding
int leb128_compare_encoded(const uint8_t *buf1, size_t size1,
                          const uint8_t *buf2, size_t size2,
                          bool is_signed, int *result);

// Find all LEB128 values in buffer
int leb128_find_all(const uint8_t *buffer, size_t buffer_size,
                   bool is_signed, size_t *positions,
                   size_t max_positions, size_t *found_count);

// Get error message
const char* leb128_strerror(int error_code);

Additional Usage Examples

Basic Encoding/Decoding

#include "leb128.h"
#include <stdio.h>
#include <inttypes.h>

// Unsigned integer example
uint64_t original = 624485;
uint8_t buffer[ULEB128_MAX_BYTES];
size_t written, read;

// Encode
uleb128_encode(original, buffer, sizeof(buffer), &written);

// Decode
uint64_t decoded;
uleb128_decode(buffer, written, &decoded, &read);

printf("Original: %" PRIu64 ", Decoded: %" PRIu64 "\n", original, decoded);

Automatic Memory Management

uint64_t value = 1000000;
size_t size;

// Automatically allocate buffer
uint8_t *encoded = uleb128_encode_alloc(value, &size);
if (encoded) {
    printf("Encoded into %zu bytes\n", size);
    // ... use encoded data ...
    free(encoded);  // Don't forget to free!
}

Streaming Large Data

uint8_t data[] = {0x85, 0x80, 0x80, 0x4F, 0x7F, 0x01};
leb128_stream_t stream;

leb128_stream_init(&stream, data, sizeof(data));

uint64_t value;
while (!leb128_stream_eof(&stream)) {
    if (leb128_stream_read_uleb128(&stream, &value) == LEB128_SUCCESS) {
        printf("Read value: %" PRIu64 " at position %zu\n", 
               value, leb128_stream_position(&stream));
    } else {
        printf("Error: %s\n", leb128_strerror(leb128_stream_error(&stream)));
        break;
    }
}

Processing Multiple Values

uint64_t values[] = {100, 200, 300, 400, 500};
size_t count = sizeof(values) / sizeof(values[0]);

// Calculate total size needed
size_t total_size = uleb128_multi_size(values, count);
uint8_t *buffer = malloc(total_size);

// Encode all values
size_t pos = 0;
for (size_t i = 0; i < count; i++) {
    size_t written;
    uleb128_encode(values[i], buffer + pos, total_size - pos, &written);
    pos += written;
}

// Decode all values back
uint64_t decoded[5];
size_t decoded_count;
uleb128_decode_multi(buffer, total_size, decoded, 5, &decoded_count);

free(buffer);

Error Handling

uint64_t value = 12345;
uint8_t small_buffer[2];  // Intentionally too small
size_t written;

int result = uleb128_encode(value, small_buffer, sizeof(small_buffer), &written);
if (result != LEB128_SUCCESS) {
    printf("Encoding failed: %s\n", leb128_strerror(result));
    
    // Use automatic allocation instead
    uint8_t *proper_buffer = uleb128_encode_alloc(value, &written);
    if (proper_buffer) {
        printf("Successfully encoded with auto-allocation\n");
        free(proper_buffer);
    }
}

Validation

uint8_t potentially_corrupt[] = {0x80, 0x80, 0x80};  // Missing terminator

if (leb128_validate(potentially_corrupt, sizeof(potentially_corrupt), false)) {
    printf("Valid LEB128 data\n");
} else {
    printf("Invalid or corrupt LEB128 data\n");
}

Performance Considerations

Small values (0-127): Use uleb128_encode_fast_small() for single-byte encoding
Large datasets: Use streaming interface to avoid loading everything into memory
Known buffer sizes: Pre-calculate sizes with uleb128_size() / leb128_size()
Memory allocation: Use stack buffers when possible, auto-allocation for convenience

What is LEB128?

LEB128 (Little Endian Base 128) is a variable-length encoding used to store integers in a compact form. It was designed to save space compared to fixed-width integer representations (like 32-bit or 64-bit integers), especially when encoding small numbers. It has gained popularity for use in binary formats, including WebAssembly (WASM), Dwarf, Protocol Buffers and SQLite.

Advantages

Space-efficient for small integers, while still supporting arbitrarily large integers.
Arbitrary size integers (not limited to 32/64 bits).
Endianness-independent (encoding is byte-oriented).

How it Works

Each integer is split into 7-bit chunks.
Each chunk is stored in a byte:
- The lowest 7 bits of the byte hold data.
- The highest bit (bit 7, or 0x80) is a continuation flag:
  - 1 → another byte follows
  - 0 → this is the last byte
Values are stored in little-endian bit order
Signed values use two's complement with sign extension

Examples:

0 → 0x00
127 → 0x7F
128 → 0x80 0x01
624485 → 0xE5 0x8E 0x26

Types

1. LEB128 (signed LEB128) encodes signed integers with sign extension, so negative values are stored efficiently.

Example: Decimal 624485 -> binary 1001100001110110101 -> encoded as 0xE5 0x8E 0x26

Breakdown:

0xE5 = 11100101 (0x65 data + continuation flag 1)
0x8E = 10001110 (0x0E data + continuation flag 1)
0x26 = 00100110 (0x26 data, final chunk)

2. ULEB128 (unsigned ULEB128) encodes unsigned integers (non-negative).

Example: Decimal -123456 -> (encoded into as few bytes as possible, maintaining sign)

License

This library is released under the MIT License. See LICENSE file for details.

Name		Name	Last commit message	Last commit date
Latest commit History 1 Commit
examples		examples
scripts		scripts
tests		tests
.gitignore		.gitignore
LICENSE		LICENSE
Makefile		Makefile
README.md		README.md
leb128.h		leb128.h

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

LEB128 Encoding/Decoding Library

Usage

Example Program

Testing

API Reference

Error Codes

ULEB128 (Unsigned) Functions

Basic Encoding/Decoding

Automatic Memory Management

Performance

LEB128 (Signed) Functions

Multi-Value Operations

Streaming Interface

Validation and Utilities

Additional Usage Examples

Basic Encoding/Decoding

Automatic Memory Management

Streaming Large Data

Processing Multiple Values

Error Handling

Validation

Performance Considerations

What is LEB128?

Advantages

How it Works

Types

License

About

Uh oh!

Contributors

Uh oh!

Languages

Folders and files

Latest commit

History

Repository files navigation

LEB128 Encoding/Decoding Library

Usage

Example Program

Testing

API Reference

Error Codes

ULEB128 (Unsigned) Functions

Basic Encoding/Decoding

Automatic Memory Management

Performance

LEB128 (Signed) Functions

Multi-Value Operations

Streaming Interface

Validation and Utilities

Additional Usage Examples

Basic Encoding/Decoding

Automatic Memory Management

Streaming Large Data

Processing Multiple Values

Error Handling

Validation

Performance Considerations

What is LEB128?

Advantages

How it Works

Types

License

About

Topics

Resources

License

Uh oh!

Stars

Watchers

Forks

Contributors

Uh oh!

Languages