From cf1ea47fab57df65cc8b0f84a3f79810bc1e9bbd Mon Sep 17 00:00:00 2001 From: ermmmaks Date: Sun, 16 Nov 2025 23:49:23 +0300 Subject: [PATCH 1/3] Add algorithm --- src/huffmans_code.py | 131 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/huffmans_code.py diff --git a/src/huffmans_code.py b/src/huffmans_code.py new file mode 100644 index 0000000..2ec335f --- /dev/null +++ b/src/huffmans_code.py @@ -0,0 +1,131 @@ +import heapq +from collections import Counter, defaultdict + +class TreeNode: + def __init__(self, char, freq): + self.char = char + self.freq = freq + self.left = None + self.right = None + + def __lt__(self, other): + return self.freq < other.freq + +class Huffman: + + def __init__(self): + self.codes = {} + self.reversing = {} + + def build_frequency_dict(self, text): + return Counter(text) + + def build_heap(self, frequency): + heap = [] + for char, freq in frequency.items(): + node = heapq.heappop(heap) + heapq.heappush(heap, node) + return heap + + def build_tree(self, heap): + while len(heap) > 1: + node1 = heapq.heappop(heap) + node2 = heapq.heappop(heap) + + merged = Node(None, node1.frq + node2.freq) + merged.left = node1 + merget.right = node2 + + heapq.heappush(heap, merged) + + if heap: + return heap[0] + + return None + + def assigment(self, root, code): + if root is None: + return + + if root.char is not None: + self.codes[root.char] = code + self.reverse[code] = root.char + return + + self.assigment(root.left, code + '0') + self.assigment(root.left, code + '1') + + def building(sself, root): + self.codes = {} + self.reverse = {} + if root: + self.assigment(root, '') + + def get_encoded(self, text): + encoded = '' + for char in test: + encoded += self.codes[char] + return encoded + + def filling(self, encoded): + fill = 8 - len(encoded) % 8 + for i in range(fill): + encoded += '0' + + return encoded + + def get_byte(self, fill_text): + b = bytearray() + for i in range(len(fill_text), 8): + byte = fill_text[i:i+8] + b.append(int(byte, 2)) + return b + + def compress(self, text): + frequency = self.frequency_dict(text) + heap = self.build_heap(frequency) + root = self.build_tree(heap) + self.assigment(root) + + encoded_text = self.get_encoded(text) + filling_text = self.filling(encoded_text) + byte_array = self.get_byte(filling_text) + + return bytes(byte_array) + + def remove_filled(self, filling_text): + fill_info = filling_text[:8] + fill = int(fill_info, 2) + + filling_text = filling_text[:8] + if fill > 0: + encoded_text = filling_text[:-fill] + else: + encoded_text = filling_text + + return encoded_text + + def decode(self, encoded_text): + current_code = '' + decoded_text = '' + + for bit in encoded_text: + current_code += bit + if current_code in self.reverse: + char = self.reverse[current_code] + decoded_text += char + current_code = '' + + return decoded_text + + def decompress(self, compressed_text): + bit_string = '' + for byte in compressed_text: + bits = bin(byte)[2:].rqust(8, '0') + bit_string += bits + + encoded_text = self.remove_filled(bit_string) + decompressed = self.decode(encoded_text) + + return decompressed + From 88148f753c32f2893ce95c275048d3a7ec4d177a Mon Sep 17 00:00:00 2001 From: ermmmaks Date: Sun, 16 Nov 2025 23:54:51 +0300 Subject: [PATCH 2/3] a few corrections --- .../conftest.cpython-313-pytest-8.4.2.pyc | Bin 647 -> 0 bytes ...yramid_sort_test.cpython-313-pytest-8.4.2.pyc | Bin 5726 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test/__pycache__/conftest.cpython-313-pytest-8.4.2.pyc delete mode 100644 test/__pycache__/pyramid_sort_test.cpython-313-pytest-8.4.2.pyc diff --git a/test/__pycache__/conftest.cpython-313-pytest-8.4.2.pyc b/test/__pycache__/conftest.cpython-313-pytest-8.4.2.pyc deleted file mode 100644 index ea5218730eb79571e76b26e1d093e111c9f49bf0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 647 zcmYLG&ui2`6n-;FlXkZY{b4H>DN(c%gbgZtkz#w46-(KmuvA%MH`|6blVv7L&8=d= zTMuIKqR^{16%YOy1wmvySg4o1X>;m$%)e6&rk06Nt@~H*=ypktKI;YB zalnKeU+yNYb9JfJO|mmRA$%SO5vy{4hX=7Q$Ie4|uRK=5W>t>2lExY3>Q|)M37*G2 z?q?%^fD~iE%PKRv7)I^hwttUbM5(?>Tu1juBZT|{*n^jqpHMj_=BxP^^M5HNv#ADj KB4d0OEB^yf@|lSM diff --git a/test/__pycache__/pyramid_sort_test.cpython-313-pytest-8.4.2.pyc b/test/__pycache__/pyramid_sort_test.cpython-313-pytest-8.4.2.pyc deleted file mode 100644 index 2d6952996e3022e213716ad52d34a54faffbb147..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5726 zcmeHL-ESMm5#J@36vd-tS(aN#c5F&^T%VcTN(TV}Sb#W-w5*uq z5py&gIVhk41=@!`1xTR+4j?2!A-n%V{s=O%t>F6g&?mmRR-Xd&V`lemd6ZPhD*Vu( zU66A#bF;H^d%M4#xt*S#B*S**)91In2xEUCA!yw`v-34FpD>GA{5k_JuaUW^8%eV{ z0kJgQiCmA*bwSr$0uoY=UhkeuLQ?Ylx&cO(Sz?|;%8Cs$=s_utQm@s8(vhDS3+Z6c z_I2S*n-4fEVRdICS(atFh{duPOP1$QwvzK4sWGpll$3g;B&P7Ws9C+kY)Y{V`mG~4 z^08%Z_O(cN!;E-6H}9^Liv{=YO>mrgAl6pFuB;SI?#02b7M+SIfIPMmM507uNS5%g zTNiS4XQz&qpRha+?&SSg7@P8SReTnU&vU15k-zaCW6tP#-}qf#JkD5N1br9c0oT5c z;><5ba@>l(k54}3+gc(|!O6BD-)|tP>!_ZGH&IbM-7!k4)0U)BmeJEJi1| zltlh$FlixQ{eiB19c7Cm@D*H3xEE%475}x7WoCBY@RupjS_E1SKirjF@7W*&>&4o1ezSnD6S zIa&l+t*aPy>nd^UeJ=_2mj4fXG|j602jIL1xc--6F9zSkiG*`RUL|4@p&2m|`$i6d zeIvWDFXfh&oN7^_AM}u00%tfzzI}unC&d5}Ji8fuhsbw{3=$b4f(Isp6GToTX@d!j zqUaeC8r4Yf8fs`9_cursK%Cq%&NamBR~KfHHZIIIjoEGC|F%$LoYSIWo`_k32;ZdH zAu07!G5g?T*dmaytM*5}FSLi!=8wfJoTb2H@=)*+h_$7qdZ}neTmQt?qQ%+zJ91_e zE+P*TL?)4-LcS&9B^M~Pc1f>nG|@@bm#7UVRqZ3#8TFH>OTiAnB_T&~ zZ>@TvdWrJ0p`bps7D}7B=N)VUWSzkR=xaC@Teu3n&mux2EogbFeEa z2fJc+s~qeF!O4ubM}b$!J(dXwPn0{Ur08;3n0Dzk%A_c*fUI8v*}aweZA!l+B)~M< z5L&vAkgT!4C0S}{{rHw~x*^6krklptw(x&jVyt1D)}msbh%tgP-=x_gDfLq^wthTp z5lGl|ufNAMYS=m?X*C*2fLqc<9L7#rn=J5ti*$c1s`5Np$kyH+&d6d!mU&@>`dvo} zrT0V7n+hL6>42j7k|^;& zq?R?cLhZ@PJxG02BDMOs77w0wyJ|9PBW4vi(wuS%O z5}Af^N{fnlA~J;6zDct~QtGE7vpx{E2qf&f*WY6rK5QM5w7<;`e}RPlHpX>V&7emK z?&g4(2);SUq}>}TsIL?mHJkY5AOod)uu{h_4qD}P`8xTsya5-Mu*R3)3su!`mzRM~ z0kuRDSU%jM$p-p@;KkJl@~epa767t(prqdu6&vn&27VF2Zya@4xnGipC(w&|$Yjjw zm`%J@af)}#rMF=YZ_yd$t|56A<(&VSv7fVTHuFbz;`glQd6M-GuO**HIG=hRXMN|_ zQp8{5&r&Rs_{E8zo!I7sO+NT_EcMF|8e(XDVWWTjy-mL1Zq9FB+xWnX^?f|n5T`yH idi29a`ZA7ia(X8kjYR&M;Cy_i8@XqaOL3l+5&jGAYlZ^= From 284c7b5cf97c895bb16953fcbd43ab6cd7f37d4b Mon Sep 17 00:00:00 2001 From: ermmmaks Date: Mon, 17 Nov 2025 00:04:07 +0300 Subject: [PATCH 3/3] And more a few corrections --- src/huffmans_code.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/huffmans_code.py b/src/huffmans_code.py index 2ec335f..f1e22ce 100644 --- a/src/huffmans_code.py +++ b/src/huffmans_code.py @@ -1,5 +1,5 @@ import heapq -from collections import Counter, defaultdict +from collections import Counter class TreeNode: def __init__(self, char, freq): @@ -23,7 +23,7 @@ def build_frequency_dict(self, text): def build_heap(self, frequency): heap = [] for char, freq in frequency.items(): - node = heapq.heappop(heap) + node = TreeNode(char, freq) heapq.heappush(heap, node) return heap @@ -32,9 +32,9 @@ def build_tree(self, heap): node1 = heapq.heappop(heap) node2 = heapq.heappop(heap) - merged = Node(None, node1.frq + node2.freq) + merged = TreeNode(None, node1.freq + node2.freq) merged.left = node1 - merget.right = node2 + merged.right = node2 heapq.heappush(heap, merged) @@ -49,13 +49,13 @@ def assigment(self, root, code): if root.char is not None: self.codes[root.char] = code - self.reverse[code] = root.char + self.reversing[code] = root.char return self.assigment(root.left, code + '0') self.assigment(root.left, code + '1') - def building(sself, root): + def building(self, root): self.codes = {} self.reverse = {} if root: @@ -63,7 +63,7 @@ def building(sself, root): def get_encoded(self, text): encoded = '' - for char in test: + for char in text: encoded += self.codes[char] return encoded @@ -76,7 +76,7 @@ def filling(self, encoded): def get_byte(self, fill_text): b = bytearray() - for i in range(len(fill_text), 8): + for i in range(0, len(fill_text), 8): byte = fill_text[i:i+8] b.append(int(byte, 2)) return b @@ -111,8 +111,8 @@ def decode(self, encoded_text): for bit in encoded_text: current_code += bit - if current_code in self.reverse: - char = self.reverse[current_code] + if current_code in self.reversing: + char = self.reversing[current_code] decoded_text += char current_code = '' @@ -121,7 +121,7 @@ def decode(self, encoded_text): def decompress(self, compressed_text): bit_string = '' for byte in compressed_text: - bits = bin(byte)[2:].rqust(8, '0') + bits = bin(byte)[2:].rjust(8, '0') bit_string += bits encoded_text = self.remove_filled(bit_string)