From 95313ecde57f82c12f2f8bfcb7e0c8ea294aba9a Mon Sep 17 00:00:00 2001 From: Tatiana Muromtseva Date: Sun, 16 Nov 2025 01:22:19 +0300 Subject: [PATCH 1/7] Add __init__ file --- src/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/__init__.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..bc63beb --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +# empty \ No newline at end of file From 901eaca01d7c1b6e161542df1b094c76bc4d3a1b Mon Sep 17 00:00:00 2001 From: Tatiana Muromtseva Date: Sun, 16 Nov 2025 01:25:06 +0300 Subject: [PATCH 2/7] Add Huffman's code (3 files) --- src/huffman/huffman_binary.py | 14 ++++++++ src/huffman/huffman_codec.py | 36 +++++++++++++++++++ src/huffman/huffman_encoding.py | 62 +++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 src/huffman/huffman_binary.py create mode 100644 src/huffman/huffman_codec.py create mode 100644 src/huffman/huffman_encoding.py diff --git a/src/huffman/huffman_binary.py b/src/huffman/huffman_binary.py new file mode 100644 index 0000000..912cfbf --- /dev/null +++ b/src/huffman/huffman_binary.py @@ -0,0 +1,14 @@ +def bits_to_bytes(bits: str) -> tuple[bytes, int]: + """Преобразует битовую строку в байты и возвращает (байты, padding).""" + padding = (8 - len(bits) % 8) % 8 + padded_bits = bits + "0" * padding + data = int(padded_bits, 2).to_bytes(len(padded_bits) // 8, "big") + return data, padding + + +def bytes_to_bits(data: bytes, padding: int) -> str: + """Преобразует байты в битовую строку, убирая добивочные нули.""" + raw_bits = bin(int.from_bytes(data, "big"))[2:].zfill(len(data) * 8) + if padding: + raw_bits = raw_bits[:-padding] + return raw_bits \ No newline at end of file diff --git a/src/huffman/huffman_codec.py b/src/huffman/huffman_codec.py new file mode 100644 index 0000000..f1fc627 --- /dev/null +++ b/src/huffman/huffman_codec.py @@ -0,0 +1,36 @@ +import json +from .huffman_encoding import encode, decode +from .huffman_binary import bits_to_bytes, bytes_to_bits + + +def encode_file(input_path: str, output_path: str): + # читаем исходный текст + text = open(input_path, "r", encoding="utf-8").read() + + # кодируем в биты + encoded_bits, codes = encode(text) + + # в байты + data_bytes, padding = bits_to_bytes(encoded_bits) + + # сериализация таблицы + codes_json = json.dumps(codes).encode("utf-8") + + with open(output_path, "wb") as f: + f.write(len(codes_json).to_bytes(4, "big")) # длина таблицы + f.write(codes_json) # таблица + f.write(bytes([padding])) # padding + f.write(data_bytes) # данные + + +def decode_file(input_path: str, output_path: str): + with open(input_path, "rb") as f: + table_size = int.from_bytes(f.read(4), "big") + codes = json.loads(f.read(table_size).decode("utf-8")) + padding = f.read(1)[0] + data_bytes = f.read() + + bits = bytes_to_bits(data_bytes, padding) + text = decode(bits, codes) + + open(output_path, "w", encoding="utf-8").write(text) diff --git a/src/huffman/huffman_encoding.py b/src/huffman/huffman_encoding.py new file mode 100644 index 0000000..6122fa5 --- /dev/null +++ b/src/huffman/huffman_encoding.py @@ -0,0 +1,62 @@ +def build_huffman_codes(text: str) -> dict[str, str]: + # если текст пустой — возвращаем пустой словарь + if not text: + return {} + + # подсчёт частоты каждого символа + frequency = {} + for char in text: + frequency[char] = frequency.get(char, 0) + 1 + + # если только один уникальный символ — простой код + if len(frequency) == 1: + only_char = next(iter(frequency)) + return {only_char: "0"} + + # узлы дерева: (частота, символ/поддерево) + nodes = [(freq, char) for char, freq in frequency.items()] + + # строим дерево Хаффмана + while len(nodes) > 1: + nodes.sort(key=lambda x: x[0]) + freq1, node1 = nodes.pop(0) + freq2, node2 = nodes.pop(0) + nodes.append((freq1 + freq2, (node1, node2))) + + # корень дерева + (_, root) = nodes[0] + codes = {} + + def traverse(node, code): + if isinstance(node, str): + codes[node] = code + else: + left, right = node + traverse(left, code + "0") + traverse(right, code + "1") + + traverse(root, "") + return codes + + +def encode(text: str) -> tuple[str, dict[str, str]]: + codes = build_huffman_codes(text) + encoded_bits = "".join(codes[ch] for ch in text) if text else "" + return encoded_bits, codes + + +def decode(bits: str, codes: dict[str, str]) -> str: + if not bits or not codes: + return "" + + reverse_codes = {code: ch for ch, code in codes.items()} + decoded = [] + current = "" + + for bit in bits: + current += bit + if current in reverse_codes: + decoded.append(reverse_codes[current]) + current = "" + + return "".join(decoded) From 4232752e3ada6f9ed5023e8146809273e6d522f5 Mon Sep 17 00:00:00 2001 From: Tatiana Muromtseva Date: Sun, 16 Nov 2025 01:26:07 +0300 Subject: [PATCH 3/7] Add test for Huffman's code --- tests/test_huffman.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/test_huffman.py diff --git a/tests/test_huffman.py b/tests/test_huffman.py new file mode 100644 index 0000000..df10d16 --- /dev/null +++ b/tests/test_huffman.py @@ -0,0 +1,42 @@ +import pytest +from src.huffman.huffman_encoding import encode, decode +from src.huffman.huffman_codec import encode_file, decode_file + + +def test_encode_decode_basic_string(): + text = "hello huffman" + encoded, codes = encode(text) + decoded = decode(encoded, codes) + assert decoded == text + + +def test_encode_decode_single_char(): + text = "aaaaaa" + encoded, codes = encode(text) + decoded = decode(encoded, codes) + assert decoded == text + assert all(bit == "0" for bit in encoded) + + +def test_encode_decode_empty_string(): + text = "" + encoded, codes = encode(text) + decoded = decode(encoded, codes) + assert decoded == text + assert encoded == "" + assert codes == {} + + +def test_encode_decode_file(tmp_path): + input_file = tmp_path / "input.txt" + compressed_file = tmp_path / "compressed.huf" + output_file = tmp_path / "output.txt" + + text = "This is a test of the Huffman codec.\n" * 10 + input_file.write_text(text, encoding="utf-8") + + encode_file(str(input_file), str(compressed_file)) + decode_file(str(compressed_file), str(output_file)) + + decoded_text = output_file.read_text(encoding="utf-8") + assert decoded_text == text From b63e1fb7b49801c13e4b4389e054042c07f538cc Mon Sep 17 00:00:00 2001 From: Tatiana Muromtseva Date: Wed, 26 Nov 2025 19:47:13 +0300 Subject: [PATCH 4/7] Empty line in huffman_binary.p --- src/huffman/huffman_binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huffman/huffman_binary.py b/src/huffman/huffman_binary.py index 912cfbf..92a5ba6 100644 --- a/src/huffman/huffman_binary.py +++ b/src/huffman/huffman_binary.py @@ -11,4 +11,4 @@ def bytes_to_bits(data: bytes, padding: int) -> str: raw_bits = bin(int.from_bytes(data, "big"))[2:].zfill(len(data) * 8) if padding: raw_bits = raw_bits[:-padding] - return raw_bits \ No newline at end of file + return raw_bits From 2bc896581d8f1f0e6bf24046b4c6ecefaebbbbba Mon Sep 17 00:00:00 2001 From: Tatiana Muromtseva Date: Mon, 8 Dec 2025 04:27:38 +0300 Subject: [PATCH 5/7] Pull workflow from CI branch --- .github/workflows/main.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..56c2ac4 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,19 @@ +name: Ruff +on: push +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.13.7" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff pytest + - name: Run Ruff + run: ruff check --output-format=github . + - name: Run unit-tests + run: python -m pytest tests/* From 9822c6cfdf82f075aa6dd9d171cfb8739165cd3b Mon Sep 17 00:00:00 2001 From: Tatiana Muromtseva Date: Sat, 20 Dec 2025 03:15:18 +0300 Subject: [PATCH 6/7] finalizing test file --- tests/test_huffman.py | 52 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/tests/test_huffman.py b/tests/test_huffman.py index df10d16..fafe0ba 100644 --- a/tests/test_huffman.py +++ b/tests/test_huffman.py @@ -6,25 +6,60 @@ def test_encode_decode_basic_string(): text = "hello huffman" encoded, codes = encode(text) - decoded = decode(encoded, codes) - assert decoded == text + assert decode(encoded, codes) == text def test_encode_decode_single_char(): text = "aaaaaa" encoded, codes = encode(text) - decoded = decode(encoded, codes) - assert decoded == text - assert all(bit == "0" for bit in encoded) + assert codes == {"a": "0"} + assert encoded == "000000" + assert decode(encoded, codes) == text def test_encode_decode_empty_string(): text = "" encoded, codes = encode(text) - decoded = decode(encoded, codes) - assert decoded == text assert encoded == "" assert codes == {} + assert decode(encoded, codes) == text + + +def test_encode_two_chars_manual(): + text = "ab" + encoded, codes = encode(text) + + assert codes == { + "a": "0", + "b": "1", + } + assert encoded == "01" + assert decode(encoded, codes) == text + + +def test_encode_aaab_manual(): + text = "aaab" + encoded, codes = encode(text) + + assert codes == { + "b": "0", + "a": "1", + } + assert encoded == "1110" + assert decode(encoded, codes) == text + + +def test_encode_abc_manual(): + text = "abc" + encoded, codes = encode(text) + + assert codes == { + "c": "0", + "a": "10", + "b": "11", + } + assert encoded == "10110" + assert decode(encoded, codes) == text def test_encode_decode_file(tmp_path): @@ -38,5 +73,4 @@ def test_encode_decode_file(tmp_path): encode_file(str(input_file), str(compressed_file)) decode_file(str(compressed_file), str(output_file)) - decoded_text = output_file.read_text(encoding="utf-8") - assert decoded_text == text + assert output_file.read_text(encoding="utf-8") == text From ba7129e439811a5588d93ee4e19368351c91ac4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A2=D0=B0=D1=82=D1=8C=D1=8F=D0=BD=D0=B0=20=D0=9C=D1=83?= =?UTF-8?q?=D1=80=D0=BE=D0=BC=D1=86=D0=B5=D0=B2=D0=B0?= Date: Sat, 20 Dec 2025 03:16:32 +0300 Subject: [PATCH 7/7] Update test_huffman.py --- tests/test_huffman.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_huffman.py b/tests/test_huffman.py index fafe0ba..7eb73ec 100644 --- a/tests/test_huffman.py +++ b/tests/test_huffman.py @@ -1,4 +1,3 @@ -import pytest from src.huffman.huffman_encoding import encode, decode from src.huffman.huffman_codec import encode_file, decode_file