Skip to content

Commit d49ef2d

Browse files
authored
Use a table-based Huffman decoder (#88)
1 parent 22f23ef commit d49ef2d

File tree

2 files changed

+151
-137
lines changed

2 files changed

+151
-137
lines changed

src/huffman.rs

Lines changed: 135 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -17,49 +17,45 @@ enum HuffmanTreeNode {
1717
Empty,
1818
}
1919

20-
/// Huffman tree
21-
#[derive(Clone, Debug, Default)]
22-
pub(crate) struct HuffmanTree {
23-
tree: Vec<HuffmanTreeNode>,
24-
max_nodes: usize,
25-
num_nodes: usize,
20+
#[derive(Clone, Debug)]
21+
enum HuffmanTreeInner {
22+
Single(u16),
23+
Tree {
24+
tree: Vec<HuffmanTreeNode>,
25+
table: Vec<u32>,
26+
table_mask: u16,
27+
},
2628
}
2729

28-
impl HuffmanTree {
29-
fn is_full(&self) -> bool {
30-
self.num_nodes == self.max_nodes
31-
}
32-
33-
/// Turns a node from empty into a branch and assigns its children
34-
fn assign_children(&mut self, node_index: usize) -> usize {
35-
let offset_index = self.num_nodes - node_index;
36-
self.tree[node_index] = HuffmanTreeNode::Branch(offset_index);
37-
self.num_nodes += 2;
30+
/// Huffman tree
31+
#[derive(Clone, Debug)]
32+
pub(crate) struct HuffmanTree(HuffmanTreeInner);
3833

39-
offset_index
34+
impl Default for HuffmanTree {
35+
fn default() -> Self {
36+
Self(HuffmanTreeInner::Single(0))
4037
}
38+
}
4139

42-
/// Init a huffman tree
43-
fn init(num_leaves: usize) -> Result<HuffmanTree, DecodingError> {
44-
if num_leaves == 0 {
45-
return Err(DecodingError::HuffmanError);
46-
}
40+
impl HuffmanTree {
41+
/// Builds a tree implicitly, just from code lengths
42+
pub(crate) fn build_implicit(code_lengths: Vec<u16>) -> Result<HuffmanTree, DecodingError> {
43+
let mut num_symbols = 0;
44+
let mut root_symbol = 0;
4745

48-
let max_nodes = 2 * num_leaves - 1;
49-
let tree = vec![HuffmanTreeNode::Empty; max_nodes];
50-
let num_nodes = 1;
46+
for (symbol, length) in code_lengths.iter().enumerate() {
47+
if *length > 0 {
48+
num_symbols += 1;
49+
root_symbol = symbol.try_into().unwrap();
50+
}
51+
}
5152

52-
let tree = HuffmanTree {
53-
tree,
54-
max_nodes,
55-
num_nodes,
53+
if num_symbols == 0 {
54+
return Err(DecodingError::HuffmanError);
55+
} else if num_symbols == 1 {
56+
return Ok(Self::build_single_node(root_symbol));
5657
};
5758

58-
Ok(tree)
59-
}
60-
61-
/// Converts code lengths to codes
62-
fn code_lengths_to_codes(code_lengths: &[u16]) -> Result<Vec<Option<u16>>, DecodingError> {
6359
let max_code_length = *code_lengths
6460
.iter()
6561
.reduce(|a, b| if a >= b { a } else { b })
@@ -86,129 +82,117 @@ impl HuffmanTree {
8682

8783
// Assign codes
8884
let mut curr_code = 0;
89-
let mut next_codes = [None; MAX_ALLOWED_CODE_LENGTH + 1];
85+
let mut next_codes = [0; MAX_ALLOWED_CODE_LENGTH + 1];
9086
for code_len in 1..=usize::from(max_code_length) {
9187
curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
92-
next_codes[code_len] = Some(curr_code);
88+
next_codes[code_len] = curr_code;
9389
}
94-
let mut huff_codes = vec![None; code_lengths.len()];
90+
let mut huff_codes = vec![0u16; code_lengths.len()];
9591
for (symbol, &length) in code_lengths.iter().enumerate() {
9692
let length = usize::from(length);
9793
if length > 0 {
9894
huff_codes[symbol] = next_codes[length];
99-
if let Some(value) = next_codes[length].as_mut() {
100-
*value += 1;
101-
}
102-
} else {
103-
huff_codes[symbol] = None;
95+
next_codes[length] += 1;
10496
}
10597
}
10698

107-
Ok(huff_codes)
108-
}
109-
110-
/// Adds a symbol to a huffman tree
111-
fn add_symbol(
112-
&mut self,
113-
symbol: u16,
114-
code: u16,
115-
code_length: u16,
116-
) -> Result<(), DecodingError> {
117-
let mut node_index = 0;
118-
let code = usize::from(code);
119-
120-
for length in (0..code_length).rev() {
121-
if node_index >= self.max_nodes {
122-
return Err(DecodingError::HuffmanError);
123-
}
124-
125-
let node = self.tree[node_index];
126-
127-
let offset = match node {
128-
HuffmanTreeNode::Empty => {
129-
if self.is_full() {
130-
return Err(DecodingError::HuffmanError);
131-
}
132-
self.assign_children(node_index)
99+
// Populate decoding table
100+
let table_bits = max_code_length.min(10);
101+
let table_size = (1 << table_bits) as usize;
102+
let table_mask = table_size as u16 - 1;
103+
let mut table = vec![0; table_size];
104+
for (symbol, (&code, &length)) in huff_codes.iter().zip(code_lengths.iter()).enumerate() {
105+
if length != 0 && length <= table_bits {
106+
let mut j = (u16::reverse_bits(code) >> (16 - length)) as usize;
107+
let entry = ((length as u32) << 16) | symbol as u32;
108+
while j < table_size {
109+
table[j] = entry;
110+
j += 1 << length as usize;
133111
}
134-
HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError),
135-
HuffmanTreeNode::Branch(offset) => offset,
136-
};
137-
138-
node_index += offset + ((code >> length) & 1);
139-
}
140-
141-
match self.tree[node_index] {
142-
HuffmanTreeNode::Empty => self.tree[node_index] = HuffmanTreeNode::Leaf(symbol),
143-
HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError),
144-
HuffmanTreeNode::Branch(_offset) => return Err(DecodingError::HuffmanError),
145-
}
146-
147-
Ok(())
148-
}
149-
150-
/// Builds a tree implicitly, just from code lengths
151-
pub(crate) fn build_implicit(code_lengths: Vec<u16>) -> Result<HuffmanTree, DecodingError> {
152-
let mut num_symbols = 0;
153-
let mut root_symbol = 0;
154-
155-
for (symbol, length) in code_lengths.iter().enumerate() {
156-
if *length > 0 {
157-
num_symbols += 1;
158-
root_symbol = symbol.try_into().unwrap();
159112
}
160113
}
161114

162-
let mut tree = HuffmanTree::init(num_symbols)?;
163-
164-
if num_symbols == 1 {
165-
tree.add_symbol(root_symbol, 0, 0)?;
166-
} else {
167-
let codes = HuffmanTree::code_lengths_to_codes(&code_lengths)?;
115+
// If the longest code is larger than the table size, build a tree as a fallback.
116+
let mut tree = Vec::new();
117+
if max_code_length > table_bits {
118+
tree = vec![HuffmanTreeNode::Empty; 2 * num_symbols - 1];
168119

120+
let mut num_nodes = 1;
169121
for (symbol, &length) in code_lengths.iter().enumerate() {
170-
if length > 0 && codes[symbol].is_some() {
171-
tree.add_symbol(symbol.try_into().unwrap(), codes[symbol].unwrap(), length)?;
122+
let code = huff_codes[symbol];
123+
let code_length = length;
124+
let symbol = symbol.try_into().unwrap();
125+
126+
if length > 0 {
127+
let mut node_index = 0;
128+
let code = usize::from(code);
129+
130+
for length in (0..code_length).rev() {
131+
let node = tree[node_index];
132+
133+
let offset = match node {
134+
HuffmanTreeNode::Empty => {
135+
// Turns a node from empty into a branch and assigns its children
136+
let offset_index = num_nodes - node_index;
137+
tree[node_index] = HuffmanTreeNode::Branch(offset_index);
138+
num_nodes += 2;
139+
offset_index
140+
}
141+
HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError),
142+
HuffmanTreeNode::Branch(offset) => offset,
143+
};
144+
145+
node_index += offset + ((code >> length) & 1);
146+
}
147+
148+
match tree[node_index] {
149+
HuffmanTreeNode::Empty => tree[node_index] = HuffmanTreeNode::Leaf(symbol),
150+
HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError),
151+
HuffmanTreeNode::Branch(_offset) => {
152+
return Err(DecodingError::HuffmanError)
153+
}
154+
}
172155
}
173156
}
174157
}
175158

176-
Ok(tree)
159+
Ok(Self(HuffmanTreeInner::Tree {
160+
tree,
161+
table,
162+
table_mask,
163+
}))
177164
}
178165

179-
/// Builds a tree explicitly from lengths, codes and symbols
180-
pub(crate) fn build_explicit(
181-
code_lengths: Vec<u16>,
182-
codes: Vec<u16>,
183-
symbols: Vec<u16>,
184-
) -> Result<HuffmanTree, DecodingError> {
185-
let mut tree = HuffmanTree::init(symbols.len())?;
186-
187-
for i in 0..symbols.len() {
188-
tree.add_symbol(symbols[i], codes[i], code_lengths[i])?;
189-
}
166+
pub(crate) fn build_single_node(symbol: u16) -> HuffmanTree {
167+
Self(HuffmanTreeInner::Single(symbol))
168+
}
190169

191-
Ok(tree)
170+
pub(crate) fn build_two_node(zero: u16, one: u16) -> HuffmanTree {
171+
Self(HuffmanTreeInner::Tree {
172+
tree: vec![
173+
HuffmanTreeNode::Leaf(zero),
174+
HuffmanTreeNode::Leaf(one),
175+
HuffmanTreeNode::Empty,
176+
],
177+
table: vec![1 << 16 | zero as u32, 1 << 16 | one as u32],
178+
table_mask: 0x1,
179+
})
192180
}
193181

194182
pub(crate) fn is_single_node(&self) -> bool {
195-
self.num_nodes == 1
183+
matches!(self.0, HuffmanTreeInner::Single(_))
196184
}
197185

198-
/// Reads a symbol using the bitstream.
199-
///
200-
/// You must call call `bit_reader.fill()` before calling this function or it may erroroneosly
201-
/// detect the end of the stream and return a bitstream error.
202-
pub(crate) fn read_symbol<R: Read>(
203-
&self,
186+
#[inline(never)]
187+
fn read_symbol_slowpath<R: Read>(
188+
tree: &[HuffmanTreeNode],
189+
mut v: usize,
204190
bit_reader: &mut BitReader<R>,
205191
) -> Result<u16, DecodingError> {
206-
let mut v = bit_reader.peek(15) as usize;
207192
let mut depth = 0;
208-
209193
let mut index = 0;
210194
loop {
211-
match &self.tree[index] {
195+
match &tree[index] {
212196
HuffmanTreeNode::Branch(children_offset) => {
213197
index += children_offset + (v & 1);
214198
depth += 1;
@@ -222,4 +206,31 @@ impl HuffmanTree {
222206
}
223207
}
224208
}
209+
210+
/// Reads a symbol using the bitstream.
211+
///
212+
/// You must call call `bit_reader.fill()` before calling this function or it may erroroneosly
213+
/// detect the end of the stream and return a bitstream error.
214+
pub(crate) fn read_symbol<R: Read>(
215+
&self,
216+
bit_reader: &mut BitReader<R>,
217+
) -> Result<u16, DecodingError> {
218+
match &self.0 {
219+
HuffmanTreeInner::Tree {
220+
tree,
221+
table,
222+
table_mask,
223+
} => {
224+
let v = bit_reader.peek_full() as u16;
225+
let entry = table[(v & table_mask) as usize];
226+
if entry != 0 {
227+
bit_reader.consume((entry >> 16) as u8)?;
228+
return Ok(entry as u16);
229+
}
230+
231+
Self::read_symbol_slowpath(tree, v as usize, bit_reader)
232+
}
233+
HuffmanTreeInner::Single(symbol) => Ok(*symbol),
234+
}
235+
}
225236
}

src/lossless.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -358,24 +358,22 @@ impl<R: Read> LosslessDecoder<R> {
358358
if simple {
359359
let num_symbols = self.bit_reader.read_bits::<u8>(1)? + 1;
360360

361-
let mut code_lengths = vec![u16::from(num_symbols - 1)];
362-
let mut codes = vec![0];
363-
let mut symbols = Vec::new();
364-
365361
let is_first_8bits = self.bit_reader.read_bits::<u8>(1)?;
366-
symbols.push(self.bit_reader.read_bits::<u16>(1 + 7 * is_first_8bits)?);
367-
368-
if num_symbols == 2 {
369-
symbols.push(self.bit_reader.read_bits::<u16>(8)?);
370-
code_lengths.push(1);
371-
codes.push(1);
372-
}
362+
let zero_symbol = self.bit_reader.read_bits::<u16>(1 + 7 * is_first_8bits)?;
373363

374-
if symbols.iter().any(|&s| s > alphabet_size) {
364+
if zero_symbol >= alphabet_size {
375365
return Err(DecodingError::BitStreamError);
376366
}
377367

378-
HuffmanTree::build_explicit(code_lengths, codes, symbols)
368+
if num_symbols == 1 {
369+
Ok(HuffmanTree::build_single_node(zero_symbol))
370+
} else {
371+
let one_symbol = self.bit_reader.read_bits::<u16>(8)?;
372+
if one_symbol >= alphabet_size {
373+
return Err(DecodingError::BitStreamError);
374+
}
375+
Ok(HuffmanTree::build_two_node(zero_symbol, one_symbol))
376+
}
379377
} else {
380378
let mut code_length_code_lengths = vec![0; CODE_LENGTH_CODES];
381379

@@ -751,6 +749,11 @@ impl<R: Read> BitReader<R> {
751749
self.buffer & ((1 << num) - 1)
752750
}
753751

752+
/// Peeks at the full buffer.
753+
pub(crate) fn peek_full(&self) -> u64 {
754+
self.buffer
755+
}
756+
754757
/// Consumes `num` bits from the buffer returning an error if there are not enough bits.
755758
pub(crate) fn consume(&mut self, num: u8) -> Result<(), DecodingError> {
756759
if self.nbits < num {

0 commit comments

Comments
 (0)