Skip to content
29 changes: 29 additions & 0 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Python application

on: [push]

permissions:
contents: read

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python 3.13
uses: actions/setup-python@v3
with:
python-version: "3.13"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff
pip install pytest
- name: Lint with ruff
run: |
ruff check --output-format=github
- name: Test with pytest
run: |
pytest
39 changes: 39 additions & 0 deletions haffman_code/code/decoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pickle
import struct

def decode(encoding: str, dictionary: dict[str, str]) -> str:
reverse_dict = {value: key for key, value in dictionary.items()}

decoded_letters = []
current_letter = ""

for bit in encoding:
current_letter += bit
if current_letter in reverse_dict:
decoded_letters.append(reverse_dict[current_letter])
current_letter = ""

return ''.join(decoded_letters)



def decode_file(input_file: str, output_file: str):
with open(input_file, 'rb') as f:
padding = struct.unpack('B', f.read(1))[0]

table_len = struct.unpack('I', f.read(4))[0]
table = f.read(table_len)
codes_table = pickle.loads(table)

encoded_bytes = f.read()

code = ''.join(f'{byte:08b}' for byte in encoded_bytes)

if padding > 0:
code = code[:-padding]

decoded_text = decode(code, codes_table)

with open(output_file, 'w', encoding="utf-8") as file:
file.write(decoded_text)

78 changes: 78 additions & 0 deletions haffman_code/code/encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pickle
import struct

class TreeNode:
def __init__(self, value: str):
self.left: None | TreeNode = None
self.right: None | TreeNode = None
self.value = value

def sorted_insert(lst: list, value, key):
target_idx = 0
for i in range(len(lst)):
target_idx = i
if key(lst[i]) > key(value):
break
lst.insert(target_idx, value)

def encode(inp: str) -> tuple[str, dict[str, str]]:
output_string = ""
dictionary = {}

frequencies = {}
for chr in inp:
if chr in frequencies:
frequencies[chr] += 1
else:
frequencies[chr] = 1
srt = sorted(list(frequencies.items()), key=lambda x: x[1])
nodes = [(TreeNode(char), value) for (char, value) in srt]

while len(nodes) > 1:
s1, s2 = nodes.pop(0), nodes.pop(0)
node = TreeNode(s1[0].value + s2[0].value)
node.left = s1[0]
node.right = s2[0]
sorted_insert(nodes, (node, s1[1] + s2[1]), lambda x: x[1])

root = nodes[0][0]

def walk(node, acc):
if node.left is None and node.right is None:
dictionary[node.value] = acc
else:
if node.left is not None:
walk(node.left, acc + "0")
if node.right is not None:
walk(node.right, acc + "1")

walk(root, "")

for ch in inp:
output_string += dictionary[ch]

return (output_string, dictionary)

def encode_file(input_file: str, output_file: str):
with open(input_file, "r", encoding="utf-8") as file:
text = file.read()

code, dictionary = encode(text)
padding = 8 - len(code) % 8
if padding == 8:
padding = 0
Comment on lines +62 to +63
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Можно просто % 8

code += '0' * padding

encoded_bytes = bytearray()
for i in range(0, len(code), 8):
byte = code[i:i+8]
encoded_bytes.append(int(byte, 2))

with open(output_file, "wb") as file:
file.write(struct.pack("B", padding))
table = pickle.dumps(dictionary)
file.write(struct.pack("I", len(table)))
file.write(table)
file.write(encoded_bytes)


1 change: 1 addition & 0 deletions haffman_code/code/origin_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello, world!
14 changes: 14 additions & 0 deletions haffman_code/code/test_coding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from encoding import encode_file
from decoding import decode_file

def make_file(str):
with open(str, 'w', encoding = 'utf-8') as file:
file.write('Hello, World!')

def test_unit():
make_file('origin_file.txt')
encode_file("origin_file.txt", "encoding_file.txt")
decode_file("encoding_file.txt", "decoding_file.txt")
origin_file = open('origin_file.txt').read()
decoding_file = open('decoding_file.txt').read()
assert origin_file == decoding_file