-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhuffman.py
More file actions
138 lines (103 loc) · 3.72 KB
/
huffman.py
File metadata and controls
138 lines (103 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import copy
import heapq
from math import ceil, log
from collections import Counter
class HeapNode:
def __init__(self, key, value):
self.left = None
self.right = None
self.key = key
self.value = value
self.flag = None
def __gt__(self, node):
if node == None or not isinstance(node, HeapNode):
return -1
return self.value > node.value
def __lt__(self, node):
if node == None or not isinstance(node, HeapNode):
return -1
return self.value < node.value
class TreePath:
def __init__(self):
self.temp_dict_var = {}
def paths(self, root):
path = []
self.paths_rec(root, path, 0)
def paths_rec(self, root, path, path_len):
if root is None:
return
if len(path) > path_len:
path[path_len] = root.flag
else:
path.append(root.flag)
path_len += 1
if root.left is None and root.right is None:
self.temp_dict_var[root.key] = copy.deepcopy(
''.join(str(char) for char in path[1:]))
else:
self.paths_rec(root.left, path, path_len)
self.paths_rec(root.right, path, path_len)
class Compressor:
def __init__(self, orignal_data):
self.orignal_data = orignal_data
def freq_counter(self):
return(dict(Counter(self.orignal_data)))
def fixed_huffman_coding(self, freq_dict, length):
orignal_data = self.orignal_data
temp_dict, code_dict = {}, {}
counter = 0
for key in freq_dict:
temp_dict[key] = bin(counter)[2:].zfill(length)
code_dict[bin(counter)[2:].zfill(length)] = key
counter += 1
compressed_data = []
for value in orignal_data:
compressed_data.append(temp_dict[value])
return compressed_data, code_dict
def variable_huffman_coding(self, freq_dict):
freq_dict = {key: value for key, value in sorted(
freq_dict.items(), key=lambda item: item[1])}
heap = []
for key in freq_dict:
node = HeapNode(key, freq_dict[key])
heapq.heappush(heap, node)
counter = 0
while(len(heap) > 1):
node1 = heapq.heappop(heap)
node2 = heapq.heappop(heap)
node1.flag = 0
node2.flag = 1
merged = HeapNode(None, node1.value + node2.value)
merged.flag = (counter % 2)
counter += 1
merged.left, merged.right = node1, node2
heapq.heappush(heap, merged)
treePath = TreePath()
treePath.paths(heap[0])
temp_dict = treePath.temp_dict_var
compressed_data = []
for elem in self.orignal_data:
compressed_data.append(temp_dict[elem])
code_dict = {}
for key in temp_dict:
code_dict[temp_dict[key]] = key
return compressed_data, code_dict
def fixed_length_helper(self):
freq_dict = self.freq_counter()
max_code_length = ceil(log(len(freq_dict), 2))
compressed_data, code_dict = self.fixed_huffman_coding(
freq_dict, max_code_length)
return compressed_data, code_dict
def variable_length_helper(self):
freq_dict = self.freq_counter()
compressed_data, code_dict = self.variable_huffman_coding(freq_dict)
return compressed_data, code_dict
class Decompressor:
def __init__(self, compressed_data, code_dict):
self.compressed_data = compressed_data
self.code_dict = code_dict
def decompressor(self):
orginal_data = []
for value in self.compressed_data:
orginal_data.append(self.code_dict[value])
return orginal_data