From f9b3caa0734fb869872678474f4d1dc9c52c2a0b Mon Sep 17 00:00:00 2001 From: Pavel Kuliaka Date: Thu, 27 Nov 2025 21:39:48 +0300 Subject: [PATCH 1/3] Implemented Huffman's algorithm for files --- src/Huffman's algorithm/file.py | 93 +++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 src/Huffman's algorithm/file.py diff --git a/src/Huffman's algorithm/file.py b/src/Huffman's algorithm/file.py new file mode 100644 index 0000000..3b18d52 --- /dev/null +++ b/src/Huffman's algorithm/file.py @@ -0,0 +1,93 @@ +from json import loads, dumps +from collections import Counter + + +def generate_table(data: bytes) -> dict[int, str]: + freq_table = Counter(data) + + nodes = [(byte, freq) for byte, freq in freq_table.items()] + nodes = sorted(nodes, key=lambda element: element[1]) + + while len(nodes) > 1: + element_1 = nodes[0] + element_2 = nodes[1] + new_element = ([element_1[0], element_2[0]], element_1[1] + element_2[1]) + nodes = nodes[2:] + nodes += [new_element] + nodes = sorted(nodes, key=lambda element: element[1]) + + tree = nodes[0][0] if nodes else [] + + def traverse(node, current_code=''): + if isinstance(node, int): + yield (node, current_code if current_code else '0') + else: + yield from traverse(node[0], current_code + '0') + yield from traverse(node[1], current_code + '1') + + return dict(traverse(tree)) if tree else {} + + +def encode(path: str) -> None: + with open(path, 'rb') as file: + content = file.read() + + table = generate_table(content) + + result_bits = '' + for byte in content: + result_bits += table[byte] + + padding = 8 - len(result_bits) % 8 + if padding != 8: + result_bits += '0' * padding + + result_bytes = bytearray() + for i in range(0, len(result_bits), 8): + byte_str = result_bits[i:i+8] + result_bytes.append(int(byte_str, 2)) + + with open(path, 'wb') as file: + file.write(bytes([padding])) + + table_json = dumps({str(k): v for k, v in table.items()}) + table_bytes = table_json.encode('utf-8') + file.write(len(table_bytes).to_bytes(4, 'big')) + file.write(table_bytes) + + file.write(result_bytes) + +def decode(path: str) -> None: + with open(path, 'rb') as file: + padding = int.from_bytes(file.read(1), 'big') + + table_length = int.from_bytes(file.read(4), 'big') + table_data = file.read(table_length).decode('utf-8') + + encoded_data = file.read() + + table_json = loads(table_data) + table = {int(key): value for key, value in table_json.items()} + reversed_table = {value: key for key, value in table.items()} + + bits_string = '' + for byte in encoded_data: + bits_string += format(byte, '08b') + + if padding != 0: + bits_string = bits_string[:-padding] + + current_code = '' + result_bytes = bytearray() + + for bit in bits_string: + current_code += bit + if current_code in reversed_table: + result_bytes.append(reversed_table[current_code]) + current_code = '' + + if current_code: + raise ValueError("Decoding error") + + with open(path, 'wb') as file: + file.write(result_bytes) From d17c75db817a1929dc4bc2f73c294c127c927a3d Mon Sep 17 00:00:00 2001 From: Pavel Kuliaka Date: Thu, 27 Nov 2025 21:39:56 +0300 Subject: [PATCH 2/3] Implemented Huffman's algorithm for text --- src/Huffman's algorithm/text.py | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/Huffman's algorithm/text.py diff --git a/src/Huffman's algorithm/text.py b/src/Huffman's algorithm/text.py new file mode 100644 index 0000000..1cd8dac --- /dev/null +++ b/src/Huffman's algorithm/text.py @@ -0,0 +1,55 @@ +def generate_table(message: str) -> dict[str, str]: + freq_table = {} + for symbol in message: + if symbol in freq_table: + freq_table[symbol] += 1 + else: + freq_table[symbol] = 1 + + nodes = [(key, freq_table[key]) for key in freq_table] + nodes = sorted(nodes, key=lambda element: element[1]) + + while len(nodes) != 1: + element_1 = nodes[0] + element_2 = nodes[1] + new_element = ([element_1[0], element_2[0]], element_1[1] + element_2[1]) + nodes = nodes[2:] + nodes += [new_element] + nodes = sorted(nodes, key=lambda element: element[1]) + + tree = nodes[0][0] + + def traverse(node, current_code=''): + if isinstance(node, str): + yield (node, current_code if current_code else '0') + else: + yield from traverse(node[0], current_code + '0') + yield from traverse(node[1], current_code + '1') + + return dict(traverse(tree)) + + +def encode(message: str) -> tuple[str, dict[str, str]]: + table = generate_table(message) + result = '' + for symbol in message: + result += table[symbol] + return (result, table) + + +def decode(message: str, table: dict[str, str]) -> str: + reversed_table = {} + for key in table: + reversed_table[table[key]] = key + + current_code = '' + result = '' + + for bit in message: + current_code += bit + + if current_code in reversed_table: + result += reversed_table[current_code] + current_code = '' + + return result From 4ac2f21ae0ecd9932473cf853d7e1130394ad984 Mon Sep 17 00:00:00 2001 From: Pavel Kuliaka Date: Thu, 27 Nov 2025 21:41:11 +0300 Subject: [PATCH 3/3] Formatted files --- src/Huffman's algorithm/file.py | 57 +++++++++++++++++---------------- src/Huffman's algorithm/text.py | 16 ++++----- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/Huffman's algorithm/file.py b/src/Huffman's algorithm/file.py index 3b18d52..a1cc917 100644 --- a/src/Huffman's algorithm/file.py +++ b/src/Huffman's algorithm/file.py @@ -4,7 +4,7 @@ def generate_table(data: bytes) -> dict[int, str]: freq_table = Counter(data) - + nodes = [(byte, freq) for byte, freq in freq_table.items()] nodes = sorted(nodes, key=lambda element: element[1]) @@ -18,76 +18,77 @@ def generate_table(data: bytes) -> dict[int, str]: tree = nodes[0][0] if nodes else [] - def traverse(node, current_code=''): + def traverse(node, current_code=""): if isinstance(node, int): - yield (node, current_code if current_code else '0') + yield (node, current_code if current_code else "0") else: - yield from traverse(node[0], current_code + '0') - yield from traverse(node[1], current_code + '1') + yield from traverse(node[0], current_code + "0") + yield from traverse(node[1], current_code + "1") return dict(traverse(tree)) if tree else {} def encode(path: str) -> None: - with open(path, 'rb') as file: + with open(path, "rb") as file: content = file.read() table = generate_table(content) - - result_bits = '' + + result_bits = "" for byte in content: result_bits += table[byte] padding = 8 - len(result_bits) % 8 if padding != 8: - result_bits += '0' * padding + result_bits += "0" * padding result_bytes = bytearray() for i in range(0, len(result_bits), 8): - byte_str = result_bits[i:i+8] + byte_str = result_bits[i : i + 8] result_bytes.append(int(byte_str, 2)) - with open(path, 'wb') as file: + with open(path, "wb") as file: file.write(bytes([padding])) - + table_json = dumps({str(k): v for k, v in table.items()}) - table_bytes = table_json.encode('utf-8') - file.write(len(table_bytes).to_bytes(4, 'big')) + table_bytes = table_json.encode("utf-8") + file.write(len(table_bytes).to_bytes(4, "big")) file.write(table_bytes) - + file.write(result_bytes) + def decode(path: str) -> None: - with open(path, 'rb') as file: - padding = int.from_bytes(file.read(1), 'big') - - table_length = int.from_bytes(file.read(4), 'big') - table_data = file.read(table_length).decode('utf-8') - + with open(path, "rb") as file: + padding = int.from_bytes(file.read(1), "big") + + table_length = int.from_bytes(file.read(4), "big") + table_data = file.read(table_length).decode("utf-8") + encoded_data = file.read() table_json = loads(table_data) table = {int(key): value for key, value in table_json.items()} reversed_table = {value: key for key, value in table.items()} - bits_string = '' + bits_string = "" for byte in encoded_data: - bits_string += format(byte, '08b') - + bits_string += format(byte, "08b") + if padding != 0: bits_string = bits_string[:-padding] - current_code = '' + current_code = "" result_bytes = bytearray() - + for bit in bits_string: current_code += bit if current_code in reversed_table: result_bytes.append(reversed_table[current_code]) - current_code = '' + current_code = "" if current_code: raise ValueError("Decoding error") - with open(path, 'wb') as file: + with open(path, "wb") as file: file.write(result_bytes) diff --git a/src/Huffman's algorithm/text.py b/src/Huffman's algorithm/text.py index 1cd8dac..9b8d02f 100644 --- a/src/Huffman's algorithm/text.py +++ b/src/Huffman's algorithm/text.py @@ -19,19 +19,19 @@ def generate_table(message: str) -> dict[str, str]: tree = nodes[0][0] - def traverse(node, current_code=''): + def traverse(node, current_code=""): if isinstance(node, str): - yield (node, current_code if current_code else '0') + yield (node, current_code if current_code else "0") else: - yield from traverse(node[0], current_code + '0') - yield from traverse(node[1], current_code + '1') + yield from traverse(node[0], current_code + "0") + yield from traverse(node[1], current_code + "1") return dict(traverse(tree)) def encode(message: str) -> tuple[str, dict[str, str]]: table = generate_table(message) - result = '' + result = "" for symbol in message: result += table[symbol] return (result, table) @@ -42,14 +42,14 @@ def decode(message: str, table: dict[str, str]) -> str: for key in table: reversed_table[table[key]] = key - current_code = '' - result = '' + current_code = "" + result = "" for bit in message: current_code += bit if current_code in reversed_table: result += reversed_table[current_code] - current_code = '' + current_code = "" return result