From 00f8b1c1f3ac4efc5f1d3aba940d97f6ed7c2e7c Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sun, 16 Nov 2025 23:57:53 +0300 Subject: [PATCH 01/10] =?UTF-8?q?=D0=9A=D0=BE=D0=B4=20=D0=A5=D0=B0=D1=84?= =?UTF-8?q?=D1=84=D0=BC=D0=B0=D0=BD=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/decoding.py | 42 +++++++++++++ haffman_code/code/encoding.py | 80 +++++++++++++++++++++++++ haffman_code/code/tempCodeRunnerFile.py | 2 + 3 files changed, 124 insertions(+) create mode 100644 haffman_code/code/decoding.py create mode 100644 haffman_code/code/encoding.py create mode 100644 haffman_code/code/tempCodeRunnerFile.py diff --git a/haffman_code/code/decoding.py b/haffman_code/code/decoding.py new file mode 100644 index 0000000..fd9d19f --- /dev/null +++ b/haffman_code/code/decoding.py @@ -0,0 +1,42 @@ +import pickle +import struct + +def decode(encoding: str, dictionary: dict[str, str]) -> str: + decoding = '' + + i = 0 + while i < len(encoding): + for key in dictionary: + value = dictionary[key] + if value == encoding[i:len(value) + i]: + i += len(value) + decoding += key + break + return decoding + +# print(decode('001101', {'1': '00', '3': '01', '2': '1'})) + +def decode_file(input_file: str, output_file: str): + try: + with open(input_file, 'rb') as f: + padding = struct.unpack('B', f.read(1))[0] + + table_len = struct.unpack('I', f.read(4))[0] + table = f.read(table_length) + codes_table = pickle.loads(table_data) + + encoded_bytes = f.read() + + # Преобразование байтов в битовую строку + code = ''.join(f'{byte:08b}' for byte in encoded_bytes) + + # Удаление дополнения + if padding > 0: + code = code[:-padding] + + # Декодирование + decoded_text = decode(code, codes_table) + + # Сохранение декодированного текста + with open(output_file, 'w') as f: + f.write(decoded_text) diff --git a/haffman_code/code/encoding.py b/haffman_code/code/encoding.py new file mode 100644 index 0000000..1310a98 --- /dev/null +++ b/haffman_code/code/encoding.py @@ -0,0 +1,80 @@ +import pickle +import struct + +class TreeNode: + def __init__(self, value: str): + self.left: None | TreeNode = None + self.right: None | TreeNode = None + self.value = value + +def sorted_insert(lst: list, value, key): + target_idx = 0 + for i in range(len(lst)): + target_idx = i + if key(lst[i]) > key(value): + break + lst.insert(target_idx, value) + +def encode(inp: str) -> tuple[str, dict[str, str]]: + output_string = "" + dictionary = {} + + frequencies = {} + for chr in inp: + if chr in frequencies: + frequencies[chr] += 1 + else: + frequencies[chr] = 1 + srt = sorted(list(frequencies.items()), key=lambda x: x[1]) + nodes = [(TreeNode(char), value) for (char, value) in srt] + + while len(nodes) > 1: + s1, s2 = nodes.pop(0), nodes.pop(0) + node = TreeNode(s1[0].value + s2[0].value) + node.left = s1[0] + node.right = s2[0] + sorted_insert(nodes, (node, s1[1] + s2[1]), lambda x: x[1]) + + root = nodes[0][0] + + def walk(node, acc): + if node.left is None and node.right is None: + dictionary[node.value] = acc + else: + if node.left is not None: + walk(node.left, acc + "0") + if node.right is not None: + walk(node.right, acc + "1") + + walk(root, "") + + for ch in inp: + output_string += dictionary[ch] + + return (output_string, dictionary) + +# print(encode("1223")) + +def encode_file(input_file: str, output_file: str): + try: + with open(input_file) as file: + text = file.read() + + code, dictionary = encode(text) + padding = 8 - len(code) % 8 + if padding == 8: + padding = 0 + code += 0 * padding + + encoded_bytes = bytearray() + for i in range(0, len(code), 8): + byte = code[i:i+8] + encoded_bytes.append(int(byte, 2)) + + with open(output_file, "wb") as file: + file.write(struct.pack("B", padding)) + table = pickle.dumps(dictionary) + file.write("I", len(table)) + file.write(table) + f.write(encoded_bytes) + diff --git a/haffman_code/code/tempCodeRunnerFile.py b/haffman_code/code/tempCodeRunnerFile.py new file mode 100644 index 0000000..00a89b5 --- /dev/null +++ b/haffman_code/code/tempCodeRunnerFile.py @@ -0,0 +1,2 @@ + # encode = encoding[0] + # dictionary = encoding[-1] \ No newline at end of file From 350568273c42667e1077e3c448bac4db9d184855 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Mon, 17 Nov 2025 02:20:22 +0300 Subject: [PATCH 02/10] =?UTF-8?q?=D0=98=D1=81=D0=BF=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=B8=D0=BB=20=D0=BD=D0=B5=D0=B4=D0=BE=D1=87=D0=B5=D1=82=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/decoding.py | 54 +++++++++++++++++------------------ haffman_code/code/encoding.py | 38 ++++++++++++------------ 2 files changed, 44 insertions(+), 48 deletions(-) diff --git a/haffman_code/code/decoding.py b/haffman_code/code/decoding.py index fd9d19f..0c40f9e 100644 --- a/haffman_code/code/decoding.py +++ b/haffman_code/code/decoding.py @@ -2,41 +2,39 @@ import struct def decode(encoding: str, dictionary: dict[str, str]) -> str: - decoding = '' + reverse_dict = {value: key for key, value in dictionary.items()} + + decoded_letters = [] + current_letter = "" + + for bit in encoding: + current_letter += bit + if current_letter in reverse_dict: + decoded_letters.append(reverse_dict[current_letter]) + current_letter = "" + + return ''.join(decoded_letters) - i = 0 - while i < len(encoding): - for key in dictionary: - value = dictionary[key] - if value == encoding[i:len(value) + i]: - i += len(value) - decoding += key - break - return decoding -# print(decode('001101', {'1': '00', '3': '01', '2': '1'})) def decode_file(input_file: str, output_file: str): - try: - with open(input_file, 'rb') as f: - padding = struct.unpack('B', f.read(1))[0] + with open(input_file, 'rb') as f: + padding = struct.unpack('B', f.read(1))[0] - table_len = struct.unpack('I', f.read(4))[0] - table = f.read(table_length) - codes_table = pickle.loads(table_data) + table_len = struct.unpack('I', f.read(4))[0] + table = f.read(table_len) + codes_table = pickle.loads(table) - encoded_bytes = f.read() + encoded_bytes = f.read() - # Преобразование байтов в битовую строку - code = ''.join(f'{byte:08b}' for byte in encoded_bytes) + code = ''.join(f'{byte:08b}' for byte in encoded_bytes) - # Удаление дополнения - if padding > 0: - code = code[:-padding] + if padding > 0: + code = code[:-padding] - # Декодирование - decoded_text = decode(code, codes_table) + decoded_text = decode(code, codes_table) - # Сохранение декодированного текста - with open(output_file, 'w') as f: - f.write(decoded_text) + with open(output_file, 'w', encoding="utf-8") as file: + file.write(decoded_text) + +decode_file("example_none.txt", "example_none_none.txt") \ No newline at end of file diff --git a/haffman_code/code/encoding.py b/haffman_code/code/encoding.py index 1310a98..990e926 100644 --- a/haffman_code/code/encoding.py +++ b/haffman_code/code/encoding.py @@ -53,28 +53,26 @@ def walk(node, acc): return (output_string, dictionary) -# print(encode("1223")) - def encode_file(input_file: str, output_file: str): - try: - with open(input_file) as file: - text = file.read() + with open(input_file, "r", encoding="utf-8") as file: + text = file.read() - code, dictionary = encode(text) - padding = 8 - len(code) % 8 - if padding == 8: - padding = 0 - code += 0 * padding + code, dictionary = encode(text) + padding = 8 - len(code) % 8 + if padding == 8: + padding = 0 + code += '0' * padding - encoded_bytes = bytearray() - for i in range(0, len(code), 8): - byte = code[i:i+8] - encoded_bytes.append(int(byte, 2)) + encoded_bytes = bytearray() + for i in range(0, len(code), 8): + byte = code[i:i+8] + encoded_bytes.append(int(byte, 2)) - with open(output_file, "wb") as file: - file.write(struct.pack("B", padding)) - table = pickle.dumps(dictionary) - file.write("I", len(table)) - file.write(table) - f.write(encoded_bytes) + with open(output_file, "wb") as file: + file.write(struct.pack("B", padding)) + table = pickle.dumps(dictionary) + file.write(struct.pack("I", len(table))) + file.write(table) + file.write(encoded_bytes) +encode_file("example.txt", "example_none.txt") From 4f2e6525bfc83c9f7c023568952019d48573fef2 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Mon, 17 Nov 2025 02:25:17 +0300 Subject: [PATCH 03/10] =?UTF-8?q?=D0=A3=D0=B4=D0=B0=D0=BB=D0=B8=D0=BB=20te?= =?UTF-8?q?mpCodeRunnerFile.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/tempCodeRunnerFile.py | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 haffman_code/code/tempCodeRunnerFile.py diff --git a/haffman_code/code/tempCodeRunnerFile.py b/haffman_code/code/tempCodeRunnerFile.py deleted file mode 100644 index 00a89b5..0000000 --- a/haffman_code/code/tempCodeRunnerFile.py +++ /dev/null @@ -1,2 +0,0 @@ - # encode = encoding[0] - # dictionary = encoding[-1] \ No newline at end of file From b5a5b7dda6ad0b3d5e8ea22886e02de1623da18f Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 6 Dec 2025 15:38:16 +0300 Subject: [PATCH 04/10] =?UTF-8?q?=D0=A4=D0=B0=D0=B9=D0=BB=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/origin_file.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 haffman_code/code/origin_file.txt diff --git a/haffman_code/code/origin_file.txt b/haffman_code/code/origin_file.txt new file mode 100644 index 0000000..5dd01c1 --- /dev/null +++ b/haffman_code/code/origin_file.txt @@ -0,0 +1 @@ +Hello, world! \ No newline at end of file From 4b585676ec04f670e6009c6677a7d0b83bf427d5 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 6 Dec 2025 15:39:03 +0300 Subject: [PATCH 05/10] =?UTF-8?q?=D0=A3=D0=B1=D1=80=D0=B0=D0=BB=20=D0=BB?= =?UTF-8?q?=D0=B8=D1=88=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D1=80=D0=BE=D1=87?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=B2=D1=8B=D0=B7=D0=BE=D0=B2=D0=BE=D0=B2=20?= =?UTF-8?q?=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/decoding.py | 1 - haffman_code/code/encoding.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/haffman_code/code/decoding.py b/haffman_code/code/decoding.py index 0c40f9e..63c2356 100644 --- a/haffman_code/code/decoding.py +++ b/haffman_code/code/decoding.py @@ -37,4 +37,3 @@ def decode_file(input_file: str, output_file: str): with open(output_file, 'w', encoding="utf-8") as file: file.write(decoded_text) -decode_file("example_none.txt", "example_none_none.txt") \ No newline at end of file diff --git a/haffman_code/code/encoding.py b/haffman_code/code/encoding.py index 990e926..a9fdf6a 100644 --- a/haffman_code/code/encoding.py +++ b/haffman_code/code/encoding.py @@ -75,4 +75,4 @@ def encode_file(input_file: str, output_file: str): file.write(table) file.write(encoded_bytes) -encode_file("example.txt", "example_none.txt") + From f3dc0642808505e67d74b6566de19c03dfa9e116 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 6 Dec 2025 15:39:33 +0300 Subject: [PATCH 06/10] =?UTF-8?q?=D0=A2=D0=B5=D1=81=D1=82=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/test_coding.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 haffman_code/code/test_coding.py diff --git a/haffman_code/code/test_coding.py b/haffman_code/code/test_coding.py new file mode 100644 index 0000000..ce5e008 --- /dev/null +++ b/haffman_code/code/test_coding.py @@ -0,0 +1,11 @@ +from encoding import encode_file +from decoding import decode_file + +encode_file("origin_file.txt", "encoding_file.txt") +decode_file("encoding_file.txt", "decoding_file.txt") + +def test_unit(): + origin_file = open('origin_file.txt').read() + decoding_file = open('decoding_file.txt').read() + assert origin_file == decoding_file + From 23effaf12067da1c382763aac45c8b8886a82754 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 6 Dec 2025 15:40:04 +0300 Subject: [PATCH 07/10] CI --- .../workflows/.github/workflows/run_tests.yml | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/.github/workflows/run_tests.yml diff --git a/.github/workflows/.github/workflows/run_tests.yml b/.github/workflows/.github/workflows/run_tests.yml new file mode 100644 index 0000000..0695e93 --- /dev/null +++ b/.github/workflows/.github/workflows/run_tests.yml @@ -0,0 +1,29 @@ +name: Python application + +on: [push] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v3 + with: + python-version: "3.13" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + pip install pytest + - name: Lint with ruff + run: | + ruff check --output-format=github + - name: Test with pytest + run: | + pytest \ No newline at end of file From 8f3897f100169f943a76604a788334328e897c83 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 6 Dec 2025 15:46:47 +0300 Subject: [PATCH 08/10] =?UTF-8?q?=D0=9F=D0=B5=D1=80=D0=B5=D0=BC=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=B8=D0=BB=20run=5Ftests.yml=20=D0=B2=20worflows?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/{.github/workflows => }/run_tests.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{.github/workflows => }/run_tests.yml (100%) diff --git a/.github/workflows/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml similarity index 100% rename from .github/workflows/.github/workflows/run_tests.yml rename to .github/workflows/run_tests.yml From b92b9912bd71903f1b527b2091da44309e42dd32 Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 20 Dec 2025 16:27:19 +0300 Subject: [PATCH 09/10] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20=D1=84=D1=83=D0=BD=D1=86=D0=B8=D1=8E=20=D1=81=D0=BE?= =?UTF-8?q?=D0=B7=D0=B4=D1=84=D0=BD=D0=B8=D1=8F=20=D1=84=D0=B0=D0=B9=D0=BB?= =?UTF-8?q?=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/test_coding.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/haffman_code/code/test_coding.py b/haffman_code/code/test_coding.py index ce5e008..607a5ac 100644 --- a/haffman_code/code/test_coding.py +++ b/haffman_code/code/test_coding.py @@ -4,7 +4,12 @@ encode_file("origin_file.txt", "encoding_file.txt") decode_file("encoding_file.txt", "decoding_file.txt") +def make_file(str): + with open(str, 'w', encoding = 'utf-8') as file: + file.write('Hello, World!') + def test_unit(): + make_file('origin_file.txt') origin_file = open('origin_file.txt').read() decoding_file = open('decoding_file.txt').read() assert origin_file == decoding_file From a8e80ee4c6e761981cf275e6d4096017249922ae Mon Sep 17 00:00:00 2001 From: Andrew-Kochanov Date: Sat, 20 Dec 2025 20:05:21 +0300 Subject: [PATCH 10/10] =?UTF-8?q?=D0=97=D0=B0=D0=BD=D1=91=D1=81=20=D1=84?= =?UTF-8?q?=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B8=20=D1=81=D0=BE=D0=B7=D0=B4?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=84=D0=B0=D0=B9=D0=BB=D0=BE=D0=B2?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=B2=20=D1=82=D0=B5=D1=81=D1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- haffman_code/code/test_coding.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/haffman_code/code/test_coding.py b/haffman_code/code/test_coding.py index 607a5ac..fdc9598 100644 --- a/haffman_code/code/test_coding.py +++ b/haffman_code/code/test_coding.py @@ -1,16 +1,14 @@ from encoding import encode_file from decoding import decode_file -encode_file("origin_file.txt", "encoding_file.txt") -decode_file("encoding_file.txt", "decoding_file.txt") - def make_file(str): with open(str, 'w', encoding = 'utf-8') as file: file.write('Hello, World!') def test_unit(): make_file('origin_file.txt') + encode_file("origin_file.txt", "encoding_file.txt") + decode_file("encoding_file.txt", "decoding_file.txt") origin_file = open('origin_file.txt').read() decoding_file = open('decoding_file.txt').read() - assert origin_file == decoding_file - + assert origin_file == decoding_file \ No newline at end of file