-
Notifications
You must be signed in to change notification settings - Fork 0
Домашнее задание 9. Код Хаффмана. Разгуляева А.И. #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ada1ra
wants to merge
2
commits into
main
Choose a base branch
from
hw_9_huffman
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| # ruff.toml | ||
| target-version = "py311" | ||
| line-length = 100 | ||
|
|
||
| # включить все основные правила | ||
| select = [ | ||
| "E", # pycodestyle errors | ||
| "W", # pycodestyle warnings | ||
| "F", # Pyflakes | ||
| "I", # isort | ||
| "N", # pep8-naming | ||
| "UP", # pyupgrade | ||
| "YTT", # flake8-2020 | ||
| "S", # flake8-bandit | ||
| "A", # flake8-builtins | ||
| "COM", # flake8-commas | ||
| "C4", # flake8-comprehensions | ||
| "DTZ", # flake8-datetimez | ||
| "T10", # flake8-debugger | ||
| "EM", # flake8-errmsg | ||
| "EXE", # flake8-executable | ||
| "ISC", # flake8-implicit-str-concat | ||
| "ICN", # flake8-import-conventions | ||
| "G", # flake8-logging-format | ||
| "INP", # flake8-no-pep420 | ||
| "PIE", # flake8-pie | ||
| "T20", # flake8-print | ||
| "PYI", # flake8-pyi | ||
| "PT", # flake8-pytest-style | ||
| "Q", # flake8-quotes | ||
| "RSE", # flake8-raise | ||
| "RET", # flake8-return | ||
| "SLF", # flake8-self | ||
| "SIM", # flake8-simplify | ||
| "TID", # flake8-tidy-imports | ||
| "TCH", # flake8-type-checking | ||
| "INT", # flake8-gettext | ||
| "ARG", # flake8-unused-arguments | ||
| "FBT", # flake8-boolean-trap | ||
| "B", # flake8-bugbear | ||
| "AIR", # flake8-airflow | ||
| "PERF", # flake8-perflint | ||
| ] | ||
|
|
||
| # игнорировать правила | ||
| ignore = [ | ||
| "E501", # line too long - handled by formatter | ||
| "S101", # assert used - ok in tests | ||
| "T201", # print found - sometimes needed | ||
| "COM812", # trailing comma missing - not always required | ||
| ] | ||
|
|
||
| # настройки для конкретных файлов | ||
| [per-file-ignores] | ||
| "__init__.py" = ["F401"] # Unused imports allowed in __init__.py | ||
| "tests/**" = ["S101", "SLF001"] # Allow assert and self in tests | ||
| "**/migrations/**" = ["ALL"] # Ignore all in migrations | ||
|
|
||
| # настройки форматтера | ||
| [format] | ||
| indent-style = "space" | ||
| quote-style = "double" | ||
| skip-magic-trailing-comma = false | ||
| line-ending = "auto" | ||
|
|
||
| # настройки для конкретных правил | ||
| [flake8-quotes] | ||
| docstring-quotes = "double" | ||
| inline-quotes = "double" | ||
|
|
||
| [flake8-tidy-imports] | ||
| ban-relative-imports = "all" | ||
|
|
||
| [isort] | ||
| known-first-party = ["myapp"] | ||
| lines-after-imports = 2 | ||
| combine-as-imports = true | ||
| split-on-trailing-comma = true |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| def huffman_encode(msg: str): | ||
| """Кодирует текст с помощью алгоритма Хаффмана""" | ||
| if not msg: | ||
| return "", {} | ||
|
|
||
| # подсчет частоты повтора символов | ||
| freq = {} | ||
| for char in msg: | ||
| freq[char] = freq.get(char, 0) + 1 | ||
|
|
||
| # создаем список узлов [частота, символ] | ||
| nodes = [[freq, char] for char, freq in freq.items()] | ||
|
|
||
| # строим дерево хаффмана | ||
| while len(nodes) > 1: | ||
| # сортируем по частоте (сначала наименьшие) | ||
| nodes.sort(key=lambda x: x[0]) | ||
|
|
||
| # берём два узла с наименьшей частотой повторения | ||
| left = nodes.pop(0) | ||
| right = nodes.pop(0) | ||
|
|
||
| # создаем новый узел | ||
| new_node = [left[0] + right[0], left, right] | ||
| nodes.append(new_node) | ||
|
|
||
| # строим таблицу кодов | ||
| huffman_table = {} | ||
|
|
||
| def build_codes(node, code=""): | ||
| if len(node) == 2: # листовой узел | ||
| huffman_table[node[1]] = code | ||
|
|
||
| else: # внутренний узел | ||
| build_codes(node[1], code + "0") # левая ветка | ||
| build_codes(node[2], code + "1") # правая ветка | ||
|
|
||
| if nodes: | ||
| build_codes(nodes[0]) | ||
|
|
||
| # если один символ, устанавливаем код "0" | ||
| if len(huffman_table) == 1: | ||
| char = list(huffman_table.keys())[0] | ||
| huffman_table[char] = '0' | ||
|
|
||
| encoded_msg = ''.join(huffman_table[char] for char in msg) | ||
|
|
||
| return encoded_msg, huffman_table | ||
|
|
||
|
|
||
| def huffman_decode(encoded: str, table: dict): | ||
| """Декодирует текст с помощью таблицы Хаффмана""" | ||
| if not encoded or not table: | ||
| return "" | ||
|
|
||
| # создаем обратную таблицу (код -> символ) | ||
| reverse_table = {code: char for char, code in table.items()} | ||
|
|
||
| current_code = "" | ||
| decoded_text = [] | ||
|
|
||
| for bit in encoded: | ||
| current_code += bit | ||
| if current_code in reverse_table: | ||
| decoded_text.append(reverse_table[current_code]) | ||
| current_code = "" | ||
|
|
||
| return ''.join(decoded_text) | ||
|
|
||
|
|
||
| def encode_to_file(msg: str, filename: str): | ||
| """Кодирует текст и сохраняет в файл (простой текстовый формат)""" | ||
| encoded, table = huffman_encode(msg) | ||
|
|
||
| with open(filename, 'w', encoding='utf-8') as f: | ||
| # сохраняем таблицу в формате "символ:код\n" | ||
| for char, code in table.items(): | ||
| # записываем специальные символы, чтобы не ломался формат | ||
| if char == '\n': | ||
| char_escaped = '\\n' | ||
| elif char == '\t': | ||
| char_escaped = '\\t' | ||
| elif char == '\\': | ||
| char_escaped = '\\\\' | ||
| else: | ||
| char_escaped = char | ||
| f.write(f"{char_escaped}:{code}\n") | ||
|
|
||
| # создаём разделитель между таблицей и закоддированным текстом | ||
| f.write("---\n") | ||
|
|
||
| f.write(encoded) | ||
|
|
||
|
|
||
| def decode_from_file(filename: str): | ||
| """Декодирует текст из файла""" | ||
| with open(filename, 'r', encoding='utf-8') as f: | ||
| lines = f.readlines() | ||
|
|
||
| table = {} | ||
| i = 0 | ||
| while i < len(lines) and lines[i].strip() != '---': | ||
| line = lines[i].strip() | ||
| if ':' in line: | ||
| char_escaped, code = line.split(':', 1) | ||
| # восстанавливаем специальные символы | ||
| if char_escaped == '\\n': | ||
| char = '\n' | ||
| elif char_escaped == '\\t': | ||
| char = '\t' | ||
| elif char_escaped == '\\\\': | ||
| char = '\\' | ||
| else: | ||
| char = char_escaped | ||
|
Comment on lines
+106
to
+114
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. То же что и в прошлом коммите |
||
| table[char] = code | ||
| i += 1 | ||
|
|
||
| encoded = ''.join(lines[i + 1:]).strip() | ||
|
|
||
| return huffman_decode(encoded, table) | ||
|
|
||
|
|
||
| # Простая бинарная версия | ||
| def encode_to_file_bin(msg: str, filename: str): | ||
| """Кодирует текст и сохраняет в бинарный файл""" | ||
| encoded, table = huffman_encode(msg) | ||
|
|
||
| with open(filename, 'wb') as f: | ||
| table_data = "" | ||
| for char, code in table.items(): | ||
| # кодируем символ через unicode (чтобы избежать проблем со спец. символами и т.д.) | ||
| char_code = ord(char) | ||
| table_data += f"{char_code}:{code};" | ||
|
|
||
| # записываем длину таблицы и саму таблицу | ||
| table_bytes = table_data.encode('utf-8') | ||
| f.write(len(table_bytes).to_bytes(4, 'big')) | ||
| f.write(table_bytes) | ||
|
|
||
| # преобразуем биты в байты | ||
| padding = 8 - len(encoded) % 8 | ||
| if padding != 8: | ||
| encoded += '0' * padding | ||
|
|
||
| # записываем закодированные данные | ||
| encoded_bytes = bytearray() | ||
| for i in range(0, len(encoded), 8): | ||
| byte = encoded[i:i + 8] | ||
| encoded_bytes.append(int(byte, 2)) | ||
|
|
||
| f.write(len(encoded).to_bytes(4, 'big')) | ||
| f.write(encoded_bytes) | ||
|
|
||
|
|
||
| def decode_from_file_bin(filename: str): | ||
| """Декодирует текст из бинарного файла""" | ||
| with open(filename, 'rb') as f: | ||
|
|
||
| table_length = int.from_bytes(f.read(4), 'big') | ||
| table_data = f.read(table_length).decode('utf-8') | ||
|
|
||
| # восстанавливаем таблицу | ||
| table = {} | ||
| for item in table_data.split(';'): | ||
| if ':' in item: | ||
| char_code, code = item.split(':', 1) | ||
| if char_code: | ||
| char = chr(int(char_code)) | ||
| table[char] = code | ||
|
|
||
| bit_length = int.from_bytes(f.read(4), 'big') | ||
| encoded_bytes = f.read() | ||
|
|
||
| # преобразуем байты в биты | ||
| encoded_bits = "" | ||
| for byte in encoded_bytes: | ||
| encoded_bits += format(byte, '08b') | ||
| encoded_bits = encoded_bits[:bit_length] # обрезаем до исходной длины | ||
|
|
||
| return huffman_decode(encoded_bits, table) | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
лучше сделать через словарь или switch case