From 8b6baca16fde590b5b4be0afcda4e753a2b8dd32 Mon Sep 17 00:00:00 2001 From: mitch Date: Thu, 19 Feb 2026 12:11:08 -0500 Subject: [PATCH 1/4] Add van Emde Boas tree implementation and tests --- algorithms/data_structures/veb_tree.py | 185 +++++++++++++++++++++++++ tests/test_veb_tree.py | 51 +++++++ 2 files changed, 236 insertions(+) create mode 100644 algorithms/data_structures/veb_tree.py create mode 100644 tests/test_veb_tree.py diff --git a/algorithms/data_structures/veb_tree.py b/algorithms/data_structures/veb_tree.py new file mode 100644 index 000000000..19fa6c65c --- /dev/null +++ b/algorithms/data_structures/veb_tree.py @@ -0,0 +1,185 @@ +""" +Van Emde Boas Tree (vEB Tree) / van Emde Boas priority queue + +A van Emde Boas tree is a recursive data structure for storing integers +from a fixed universe [0, u - 1], where u is a power of 2. + +Core idea: + Recursively split the universe of size u into: + - sqrt(u) clusters of size sqrt(u) + - a summary structure tracking which clusters are non-empty + +Each node stores: + - min: smallest element + - max: largest element + - summary: vEB tree over cluster indices + - cluster[]: array of vEB trees for subranges + +Operations work by: + - Decomposing a key x into: + high(x) -> cluster index + low(x) -> position within cluster + - Recursing into the appropriate cluster + - Using the summary to find the next non-empty cluster when needed + +Time complexity: + insert : O(log log u) + delete : O(log log u) + successor : O(log log u) + member : O(log log u) + min / max : O(1) + +Space complexity: + O(u) + +Where: + u = universe size (must be a power of 2) + +Strength: + Extremely fast operations for integer keys. + +Weakness: + High memory usage proportional to universe size. +""" + + +import math + +class VEBTree: + def __init__(self, universe_size): + if not isinstance(universe_size, int): + raise TypeError("universe_size must be an integer.") + if not universe_size > 0: + raise ValueError("universe_size must be greater than 0.") + if not (universe_size & (universe_size - 1)) == 0: + raise ValueError("universe_size must be a power of 2.") + + self.u = universe_size + self.min = None + self.max = None + + if universe_size <= 2: + self.summary = None + self.cluster = None + else: + self.lower_sqrt = 2 ** (math.floor(math.log2(universe_size) / 2)) + self.upper_sqrt = 2 ** (math.ceil(math.log2(universe_size) / 2)) + + self.summary = VEBTree(self.upper_sqrt) + self.cluster = [VEBTree(self.lower_sqrt) for _ in range(self.upper_sqrt)] + + def _validate_key(self, x): + if not (0 <= x < self.u): + raise ValueError(f"Key {x} out of universe range [0, {self.u - 1}]") + + def high(self, x): + return x // self.lower_sqrt + + def low(self, x): + return x % self.lower_sqrt + + def index(self, high, low): + return high * self.lower_sqrt + low + + def empty_insert(self, x): + self.min = self.max = x + + def insert(self, x): + self._validate_key(x) + if self.min is None: + self.empty_insert(x) + return + + if x < self.min: + x, self.min = self.min, x + + if self.u > 2: + h = self.high(x) + l = self.low(x) + + if self.cluster[h].min is None: + self.summary.insert(h) + self.cluster[h].empty_insert(l) + else: + self.cluster[h].insert(l) + + if x > self.max: + self.max = x + + def member(self, x): + self._validate_key(x) + if x == self.min or x == self.max: + return True + elif self.u == 2: + return False + else: + return self.cluster[self.high(x)].member(self.low(x)) + + def successor(self, x): + self._validate_key(x) + if self.u == 2: + if x == 0 and self.max == 1: + return 1 + return None + + if self.min is not None and x < self.min: + return self.min + + h = self.high(x) + l = self.low(x) + + max_low = self.cluster[h].max + + if max_low is not None and l < max_low: + offset = self.cluster[h].successor(l) + return self.index(h, offset) + else: + succ_cluster = self.summary.successor(h) + if succ_cluster is None: + return None + offset = self.cluster[succ_cluster].min + return self.index(succ_cluster, offset) + + def delete(self, x): + self._validate_key(x) + if self.min == self.max: + self.min = self.max = None + return + + if self.u == 2: + if x == 0: + self.min = 1 + else: + self.min = 0 + self.max = self.min + return + + if x == self.min: + first_cluster = self.summary.min + x = self.index(first_cluster, self.cluster[first_cluster].min) + self.min = x + + h = self.high(x) + l = self.low(x) + self.cluster[h].delete(l) + + if self.cluster[h].min is None: + self.summary.delete(h) + + if x == self.max: + summary_max = self.summary.max + if summary_max is None: + self.max = self.min + else: + self.max = self.index( + summary_max, + self.cluster[summary_max].max + ) + elif x == self.max: + self.max = self.index(h, self.cluster[h].max) + + def minimum(self): + return self.min + + def maximum(self): + return self.max \ No newline at end of file diff --git a/tests/test_veb_tree.py b/tests/test_veb_tree.py new file mode 100644 index 000000000..80bed496c --- /dev/null +++ b/tests/test_veb_tree.py @@ -0,0 +1,51 @@ +import unittest + +from algorithms.data_structures.veb_tree import VEBTree + + +class TestVEBTree(unittest.TestCase): + + def setUp(self): + self.veb = VEBTree(16) + + def test_insert_and_member(self): + values = [2, 3, 4, 7, 14] + for v in values: + self.veb.insert(v) + + for v in values: + self.assertTrue(self.veb.member(v)) + + self.assertFalse(self.veb.member(5)) + + def test_min_max(self): + self.veb.insert(10) + self.veb.insert(2) + self.veb.insert(15) + + self.assertEqual(2, self.veb.minimum()) + self.assertEqual(15, self.veb.maximum()) + + def test_successor(self): + for v in [2, 4, 8, 12]: + self.veb.insert(v) + + self.assertEqual(4, self.veb.successor(2)) + self.assertEqual(8, self.veb.successor(4)) + self.assertIsNone(self.veb.successor(12)) + + def test_delete(self): + for v in [1, 3, 5, 7]: + self.veb.insert(v) + + self.veb.delete(3) + self.assertFalse(self.veb.member(3)) + self.assertEqual(5, self.veb.successor(1)) + + def test_invalid_universe(self): + with self.assertRaises(ValueError): + VEBTree(15) # not power of 2 + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From b737a23b47f1c1ee3a17464e21f0c446a51cf09f Mon Sep 17 00:00:00 2001 From: mitch Date: Thu, 19 Feb 2026 12:43:53 -0500 Subject: [PATCH 2/4] Add vEB tree file and README entry --- README.md | 1 + algorithms/data_structures/veb_tree.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index f1408d7e0..84e20193f 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,7 @@ All core data structures live in [`algorithms/data_structures/`](algorithms/data | Stack | `stack.py` | `ArrayStack`, `LinkedListStack` | | Trie | `trie.py` | `Trie` | | Union-Find | `union_find.py` | `Union` | +| vEB Tree | `veb_tree.py` | `VEBTree` | ## Algorithms diff --git a/algorithms/data_structures/veb_tree.py b/algorithms/data_structures/veb_tree.py index 19fa6c65c..51e7e86a6 100644 --- a/algorithms/data_structures/veb_tree.py +++ b/algorithms/data_structures/veb_tree.py @@ -1,6 +1,8 @@ """ Van Emde Boas Tree (vEB Tree) / van Emde Boas priority queue +Reference: https://en.wikipedia.org/wiki/Van_Emde_Boas_tree + A van Emde Boas tree is a recursive data structure for storing integers from a fixed universe [0, u - 1], where u is a power of 2. From 86f7fabc4009248187acfea028dfad309441bfa7 Mon Sep 17 00:00:00 2001 From: mitch Date: Thu, 19 Feb 2026 13:06:07 -0500 Subject: [PATCH 3/4] Add docstrings to VEBTree Improve documentation by adding docstrings to more closely align with other code in the project. Modified module docstring for succinctness. --- algorithms/data_structures/veb_tree.py | 151 +++++++++++++++++++------ 1 file changed, 119 insertions(+), 32 deletions(-) diff --git a/algorithms/data_structures/veb_tree.py b/algorithms/data_structures/veb_tree.py index 51e7e86a6..4cb68bac6 100644 --- a/algorithms/data_structures/veb_tree.py +++ b/algorithms/data_structures/veb_tree.py @@ -6,49 +6,39 @@ A van Emde Boas tree is a recursive data structure for storing integers from a fixed universe [0, u - 1], where u is a power of 2. -Core idea: - Recursively split the universe of size u into: - - sqrt(u) clusters of size sqrt(u) - - a summary structure tracking which clusters are non-empty - -Each node stores: - - min: smallest element - - max: largest element - - summary: vEB tree over cluster indices - - cluster[]: array of vEB trees for subranges - -Operations work by: - - Decomposing a key x into: - high(x) -> cluster index - low(x) -> position within cluster - - Recursing into the appropriate cluster - - Using the summary to find the next non-empty cluster when needed - Time complexity: - insert : O(log log u) - delete : O(log log u) - successor : O(log log u) - member : O(log log u) - min / max : O(1) + insert / delete / successor / member : O(log log u) + min / max : O(1) Space complexity: O(u) - -Where: - u = universe size (must be a power of 2) - -Strength: - Extremely fast operations for integer keys. - -Weakness: - High memory usage proportional to universe size. """ import math class VEBTree: + """ + Van Emde Boas tree supporting fast predecessor/successor queries. + + Attributes: + u (int): Universe size (power of 2) + min (int | None): Minimum element in the tree + max (int | None): Maximum element in the tree + summary (VEBTree | None): Summary tree + cluster (list[VEBTree] | None): Array of clusters + """ def __init__(self, universe_size): + """ + Initialize a Van Emde Boas tree. + + Args: + universe_size (int): Size of the universe; must be a power of 2 and > 0. + + Raises: + TypeError: If universe_size is not an integer. + ValueError: If universe_size <= 0 or not a power of 2. + """ if not isinstance(universe_size, int): raise TypeError("universe_size must be an integer.") if not universe_size > 0: @@ -71,22 +61,74 @@ def __init__(self, universe_size): self.cluster = [VEBTree(self.lower_sqrt) for _ in range(self.upper_sqrt)] def _validate_key(self, x): + """ + Check if x is within the universe range. + + Args: + x (int): Element to validate. + + Raises: + ValueError: If x is not in the range [0, u-1]. + """ if not (0 <= x < self.u): raise ValueError(f"Key {x} out of universe range [0, {self.u - 1}]") def high(self, x): + """ + Return the high part (cluster index) of element x. + + Args: + x (int): Element to split. + + Returns: + int: Cluster index corresponding to x. + """ return x // self.lower_sqrt def low(self, x): + """ + Return the low part (position within cluster) of element x. + + Args: + x (int): Element to split. + + Returns: + int: Position within cluster corresponding to x. + """ return x % self.lower_sqrt def index(self, high, low): + """ + Combine high and low parts to get original element. + + Args: + high (int): Cluster index. + low (int): Position within cluster. + + Returns: + int: Original element corresponding to high and low. + """ return high * self.lower_sqrt + low def empty_insert(self, x): + """ + Insert x into an empty vEB tree (sets min and max). + + Args: + x (int): Element to insert. + """ self.min = self.max = x def insert(self, x): + """ + Insert an element into the Van Emde Boas tree. + + Args: + x (int): Element to insert; must be in the universe [0, u-1]. + + Raises: + ValueError: If x is outside the universe. + """ self._validate_key(x) if self.min is None: self.empty_insert(x) @@ -109,6 +151,18 @@ def insert(self, x): self.max = x def member(self, x): + """ + Check whether element x exists in the tree. + + Args: + x (int): Element to check. + + Returns: + bool: True if x exists, False otherwise. + + Raises: + ValueError: If x is outside the universe. + """ self._validate_key(x) if x == self.min or x == self.max: return True @@ -118,6 +172,18 @@ def member(self, x): return self.cluster[self.high(x)].member(self.low(x)) def successor(self, x): + """ + Return the smallest element greater than x in the tree. + + Args: + x (int): Element to find successor for. + + Returns: + int | None: Successor of x if exists, otherwise None. + + Raises: + ValueError: If x is outside the universe. + """ self._validate_key(x) if self.u == 2: if x == 0 and self.max == 1: @@ -143,6 +209,15 @@ def successor(self, x): return self.index(succ_cluster, offset) def delete(self, x): + """ + Remove element x from the Van Emde Boas tree. + + Args: + x (int): Element to delete. + + Raises: + ValueError: If x is outside the universe. + """ self._validate_key(x) if self.min == self.max: self.min = self.max = None @@ -181,7 +256,19 @@ def delete(self, x): self.max = self.index(h, self.cluster[h].max) def minimum(self): + """ + Get the minimum element in the tree. + + Returns: + int | None: Minimum element, or None if tree is empty. + """ return self.min def maximum(self): + """ + Get the maximum element in the tree. + + Returns: + int | None: Maximum element, or None if tree is empty. + """ return self.max \ No newline at end of file From c8e9165c5a18602d2658b00c8c5d00f0b399910f Mon Sep 17 00:00:00 2001 From: mitch Date: Fri, 20 Feb 2026 08:22:52 -0500 Subject: [PATCH 4/4] Ensured passing ruff lint check --- algorithms/data_structures/veb_tree.py | 78 +++++++++++++------------- tests/test_veb_tree.py | 3 +- 2 files changed, 39 insertions(+), 42 deletions(-) diff --git a/algorithms/data_structures/veb_tree.py b/algorithms/data_structures/veb_tree.py index 4cb68bac6..57a286bfe 100644 --- a/algorithms/data_structures/veb_tree.py +++ b/algorithms/data_structures/veb_tree.py @@ -14,9 +14,9 @@ O(u) """ - import math + class VEBTree: """ Van Emde Boas tree supporting fast predecessor/successor queries. @@ -28,6 +28,7 @@ class VEBTree: summary (VEBTree | None): Summary tree cluster (list[VEBTree] | None): Array of clusters """ + def __init__(self, universe_size): """ Initialize a Van Emde Boas tree. @@ -49,14 +50,14 @@ def __init__(self, universe_size): self.u = universe_size self.min = None self.max = None - + if universe_size <= 2: self.summary = None self.cluster = None else: self.lower_sqrt = 2 ** (math.floor(math.log2(universe_size) / 2)) self.upper_sqrt = 2 ** (math.ceil(math.log2(universe_size) / 2)) - + self.summary = VEBTree(self.upper_sqrt) self.cluster = [VEBTree(self.lower_sqrt) for _ in range(self.upper_sqrt)] @@ -133,20 +134,20 @@ def insert(self, x): if self.min is None: self.empty_insert(x) return - + if x < self.min: x, self.min = self.min, x - + if self.u > 2: - h = self.high(x) - l = self.low(x) - - if self.cluster[h].min is None: - self.summary.insert(h) - self.cluster[h].empty_insert(l) + high = self.high(x) + low = self.low(x) + + if self.cluster[high].min is None: + self.summary.insert(high) + self.cluster[high].empty_insert(low) else: - self.cluster[h].insert(l) - + self.cluster[high].insert(low) + if x > self.max: self.max = x @@ -189,20 +190,20 @@ def successor(self, x): if x == 0 and self.max == 1: return 1 return None - + if self.min is not None and x < self.min: return self.min - - h = self.high(x) - l = self.low(x) - - max_low = self.cluster[h].max - - if max_low is not None and l < max_low: - offset = self.cluster[h].successor(l) - return self.index(h, offset) + + high = self.high(x) + low = self.low(x) + + max_low = self.cluster[high].max + + if max_low is not None and low < max_low: + offset = self.cluster[high].successor(low) + return self.index(high, offset) else: - succ_cluster = self.summary.successor(h) + succ_cluster = self.summary.successor(high) if succ_cluster is None: return None offset = self.cluster[succ_cluster].min @@ -222,7 +223,7 @@ def delete(self, x): if self.min == self.max: self.min = self.max = None return - + if self.u == 2: if x == 0: self.min = 1 @@ -230,30 +231,27 @@ def delete(self, x): self.min = 0 self.max = self.min return - + if x == self.min: first_cluster = self.summary.min x = self.index(first_cluster, self.cluster[first_cluster].min) self.min = x - - h = self.high(x) - l = self.low(x) - self.cluster[h].delete(l) - - if self.cluster[h].min is None: - self.summary.delete(h) - + + high = self.high(x) + low = self.low(x) + self.cluster[high].delete(low) + + if self.cluster[high].min is None: + self.summary.delete(high) + if x == self.max: summary_max = self.summary.max if summary_max is None: self.max = self.min else: - self.max = self.index( - summary_max, - self.cluster[summary_max].max - ) + self.max = self.index(summary_max, self.cluster[summary_max].max) elif x == self.max: - self.max = self.index(h, self.cluster[h].max) + self.max = self.index(high, self.cluster[high].max) def minimum(self): """ @@ -271,4 +269,4 @@ def maximum(self): Returns: int | None: Maximum element, or None if tree is empty. """ - return self.max \ No newline at end of file + return self.max diff --git a/tests/test_veb_tree.py b/tests/test_veb_tree.py index 80bed496c..29d95d247 100644 --- a/tests/test_veb_tree.py +++ b/tests/test_veb_tree.py @@ -4,7 +4,6 @@ class TestVEBTree(unittest.TestCase): - def setUp(self): self.veb = VEBTree(16) @@ -48,4 +47,4 @@ def test_invalid_universe(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main()