Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
176 changes: 176 additions & 0 deletions Assignment 2/2020BTEIT00002-CA-2/HuffmanEncoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import heapq
import os



class HuffmanCoding:
def __init__(self, path):
self.path = path
self.heap = []
self.codes = {}
self.reverse_mapping = {}

class HeapNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None

# defining comparators less_than and equals
def __lt__(self, other):
return self.freq < other.freq

def __eq__(self, other):
if(other == None):
return False
if(not isinstance(other, HeapNode)):
return False
return self.freq == other.freq

# functions for compression:

def make_frequency_dict(self, text):
frequency = {}
for character in text:
if not character in frequency:
frequency[character] = 0
frequency[character] += 1
return frequency

def make_heap(self, frequency):
for key in frequency:
node = self.HeapNode(key, frequency[key])
heapq.heappush(self.heap, node)

def merge_nodes(self):
while(len(self.heap)>1):
node1 = heapq.heappop(self.heap)
node2 = heapq.heappop(self.heap)

merged = self.HeapNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2

heapq.heappush(self.heap, merged)


def make_codes_helper(self, root, current_code):
if(root == None):
return

if(root.char != None):
self.codes[root.char] = current_code
self.reverse_mapping[current_code] = root.char
return

self.make_codes_helper(root.left, current_code + "0")
self.make_codes_helper(root.right, current_code + "1")


def make_codes(self):
root = heapq.heappop(self.heap)
current_code = ""
self.make_codes_helper(root, current_code)


def get_encoded_text(self, text):
encoded_text = ""
for character in text:
encoded_text += self.codes[character]
return encoded_text


def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += "0"

padded_info = "{0:08b}".format(extra_padding)
encoded_text = padded_info + encoded_text
return encoded_text


def get_byte_array(self, padded_encoded_text):
if(len(padded_encoded_text) % 8 != 0):
print("Encoded text not padded properly")
exit(0)

b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i+8]
b.append(int(byte, 2))
return b


def compress(self):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + ".bin"

with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
text = file.read()
text = text.rstrip()

frequency = self.make_frequency_dict(text)
self.make_heap(frequency)
self.merge_nodes()
self.make_codes()

encoded_text = self.get_encoded_text(text)
padded_encoded_text = self.pad_encoded_text(encoded_text)

b = self.get_byte_array(padded_encoded_text)
output.write(bytes(b))

print("Compressed")
return output_path


""" functions for decompression: """


def remove_padding(self, padded_encoded_text):
padded_info = padded_encoded_text[:8]
extra_padding = int(padded_info, 2)

padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-1*extra_padding]

return encoded_text

def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""

for bit in encoded_text:
current_code += bit
if(current_code in self.reverse_mapping):
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""

return decoded_text


def decompress(self, input_path):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + "_decompressed" + ".txt"

with open(input_path, 'rb') as file, open(output_path, 'w') as output:
bit_string = ""

byte = file.read(1)
while(len(byte) > 0):
byte = ord(byte)
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
byte = file.read(1)

encoded_text = self.remove_padding(bit_string)

decompressed_text = self.decode_text(encoded_text)

output.write(decompressed_text)

print("Decompressed")
return output_path
Binary file added Assignment 2/2020BTEIT00002-CA-2/ORIGINAL.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions Assignment 2/2020BTEIT00002-CA-2/Observation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Huffman Encoding Algorithm:

The encoding performance for the Huffman is on the number of various bits present inside the image.

The general Time Complexity of the algorithm is O(nlogn), where n is number of different nodes present inside the Huffman tree.


compression ratio = (uncompressed image size / compressed image size)
= (2.36/ 1.25)
= 1.888
68 changes: 68 additions & 0 deletions Assignment 2/2020BTEIT00002-CA-2/imageCompression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans


img = plt.imread("ORIGINAL.jpeg")
plt.imshow(img)
plt.axis('off')
plt.show()

type(img)

print(img.shape)
print(img.size)


w,h, d = img.shape
# d=1
image_array = img.reshape(w*h, d)
print(image_array.shape)

#normalize in the range of (0,1)

image_array=image_array/255


from sklearn.utils import shuffle
# fitting model on a small sub sample of the complete image
image_array_sample =shuffle(image_array, random_state=1)[:1000]
image_array_sample.size

kmeans =KMeans(n_clusters=6, random_state=1)
kmeans.fit(image_array_sample)

labels=kmeans.predict(image_array)
labels

print(kmeans.cluster_centers_)
c=kmeans.cluster_centers_


# recreate original image according to labes and each pixels

def recreate_image(c,labels,w,h,d):
image=np.zeros((w,h,d))
label_idx =0

#now label each pixels according to the limited labels


for i in range(w):
for j in range(h):
image[i][j]=c[labels[label_idx]]
label_idx +=1

return(image)

plt.figure(1)
plt.axis('off')
plt.title("original")
plt.imshow(img)
plt.show()
plt.figure(2)
plt.axis('off')
plt.title("reduced")
plt.imshow(recreate_image(c,labels,w,h,d))
plt.show()