diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..0695e93 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,29 @@ +name: Python application + +on: [push] + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v3 + with: + python-version: "3.13" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + pip install pytest + - name: Lint with ruff + run: | + ruff check --output-format=github + - name: Test with pytest + run: | + pytest \ No newline at end of file diff --git a/haffman_code/code/decoding.py b/haffman_code/code/decoding.py new file mode 100644 index 0000000..63c2356 --- /dev/null +++ b/haffman_code/code/decoding.py @@ -0,0 +1,39 @@ +import pickle +import struct + +def decode(encoding: str, dictionary: dict[str, str]) -> str: + reverse_dict = {value: key for key, value in dictionary.items()} + + decoded_letters = [] + current_letter = "" + + for bit in encoding: + current_letter += bit + if current_letter in reverse_dict: + decoded_letters.append(reverse_dict[current_letter]) + current_letter = "" + + return ''.join(decoded_letters) + + + +def decode_file(input_file: str, output_file: str): + with open(input_file, 'rb') as f: + padding = struct.unpack('B', f.read(1))[0] + + table_len = struct.unpack('I', f.read(4))[0] + table = f.read(table_len) + codes_table = pickle.loads(table) + + encoded_bytes = f.read() + + code = ''.join(f'{byte:08b}' for byte in encoded_bytes) + + if padding > 0: + code = code[:-padding] + + decoded_text = decode(code, codes_table) + + with open(output_file, 'w', encoding="utf-8") as file: + file.write(decoded_text) + diff --git a/haffman_code/code/encoding.py b/haffman_code/code/encoding.py new file mode 100644 index 0000000..a9fdf6a --- /dev/null +++ b/haffman_code/code/encoding.py @@ -0,0 +1,78 @@ +import pickle +import struct + +class TreeNode: + def __init__(self, value: str): + self.left: None | TreeNode = None + self.right: None | TreeNode = None + self.value = value + +def sorted_insert(lst: list, value, key): + target_idx = 0 + for i in range(len(lst)): + target_idx = i + if key(lst[i]) > key(value): + break + lst.insert(target_idx, value) + +def encode(inp: str) -> tuple[str, dict[str, str]]: + output_string = "" + dictionary = {} + + frequencies = {} + for chr in inp: + if chr in frequencies: + frequencies[chr] += 1 + else: + frequencies[chr] = 1 + srt = sorted(list(frequencies.items()), key=lambda x: x[1]) + nodes = [(TreeNode(char), value) for (char, value) in srt] + + while len(nodes) > 1: + s1, s2 = nodes.pop(0), nodes.pop(0) + node = TreeNode(s1[0].value + s2[0].value) + node.left = s1[0] + node.right = s2[0] + sorted_insert(nodes, (node, s1[1] + s2[1]), lambda x: x[1]) + + root = nodes[0][0] + + def walk(node, acc): + if node.left is None and node.right is None: + dictionary[node.value] = acc + else: + if node.left is not None: + walk(node.left, acc + "0") + if node.right is not None: + walk(node.right, acc + "1") + + walk(root, "") + + for ch in inp: + output_string += dictionary[ch] + + return (output_string, dictionary) + +def encode_file(input_file: str, output_file: str): + with open(input_file, "r", encoding="utf-8") as file: + text = file.read() + + code, dictionary = encode(text) + padding = 8 - len(code) % 8 + if padding == 8: + padding = 0 + code += '0' * padding + + encoded_bytes = bytearray() + for i in range(0, len(code), 8): + byte = code[i:i+8] + encoded_bytes.append(int(byte, 2)) + + with open(output_file, "wb") as file: + file.write(struct.pack("B", padding)) + table = pickle.dumps(dictionary) + file.write(struct.pack("I", len(table))) + file.write(table) + file.write(encoded_bytes) + + diff --git a/haffman_code/code/origin_file.txt b/haffman_code/code/origin_file.txt new file mode 100644 index 0000000..5dd01c1 --- /dev/null +++ b/haffman_code/code/origin_file.txt @@ -0,0 +1 @@ +Hello, world! \ No newline at end of file diff --git a/haffman_code/code/test_coding.py b/haffman_code/code/test_coding.py new file mode 100644 index 0000000..fdc9598 --- /dev/null +++ b/haffman_code/code/test_coding.py @@ -0,0 +1,14 @@ +from encoding import encode_file +from decoding import decode_file + +def make_file(str): + with open(str, 'w', encoding = 'utf-8') as file: + file.write('Hello, World!') + +def test_unit(): + make_file('origin_file.txt') + encode_file("origin_file.txt", "encoding_file.txt") + decode_file("encoding_file.txt", "decoding_file.txt") + origin_file = open('origin_file.txt').read() + decoding_file = open('decoding_file.txt').read() + assert origin_file == decoding_file \ No newline at end of file