diff --git a/Assignment 2/2020BTEIT00002-CA-2/Compressed.jpg b/Assignment 2/2020BTEIT00002-CA-2/Compressed.jpg new file mode 100644 index 0000000..92d9322 Binary files /dev/null and b/Assignment 2/2020BTEIT00002-CA-2/Compressed.jpg differ diff --git a/Assignment 2/2020BTEIT00002-CA-2/HuffmanEncoding.py b/Assignment 2/2020BTEIT00002-CA-2/HuffmanEncoding.py new file mode 100644 index 0000000..b0e73d7 --- /dev/null +++ b/Assignment 2/2020BTEIT00002-CA-2/HuffmanEncoding.py @@ -0,0 +1,176 @@ +import heapq +import os + + + +class HuffmanCoding: + def __init__(self, path): + self.path = path + self.heap = [] + self.codes = {} + self.reverse_mapping = {} + + class HeapNode: + def __init__(self, char, freq): + self.char = char + self.freq = freq + self.left = None + self.right = None + + # defining comparators less_than and equals + def __lt__(self, other): + return self.freq < other.freq + + def __eq__(self, other): + if(other == None): + return False + if(not isinstance(other, HeapNode)): + return False + return self.freq == other.freq + + # functions for compression: + + def make_frequency_dict(self, text): + frequency = {} + for character in text: + if not character in frequency: + frequency[character] = 0 + frequency[character] += 1 + return frequency + + def make_heap(self, frequency): + for key in frequency: + node = self.HeapNode(key, frequency[key]) + heapq.heappush(self.heap, node) + + def merge_nodes(self): + while(len(self.heap)>1): + node1 = heapq.heappop(self.heap) + node2 = heapq.heappop(self.heap) + + merged = self.HeapNode(None, node1.freq + node2.freq) + merged.left = node1 + merged.right = node2 + + heapq.heappush(self.heap, merged) + + + def make_codes_helper(self, root, current_code): + if(root == None): + return + + if(root.char != None): + self.codes[root.char] = current_code + self.reverse_mapping[current_code] = root.char + return + + self.make_codes_helper(root.left, current_code + "0") + self.make_codes_helper(root.right, current_code + "1") + + + def make_codes(self): + root = heapq.heappop(self.heap) + current_code = "" + self.make_codes_helper(root, current_code) + + + def get_encoded_text(self, text): + encoded_text = "" + for character in text: + encoded_text += self.codes[character] + return encoded_text + + + def pad_encoded_text(self, encoded_text): + extra_padding = 8 - len(encoded_text) % 8 + for i in range(extra_padding): + encoded_text += "0" + + padded_info = "{0:08b}".format(extra_padding) + encoded_text = padded_info + encoded_text + return encoded_text + + + def get_byte_array(self, padded_encoded_text): + if(len(padded_encoded_text) % 8 != 0): + print("Encoded text not padded properly") + exit(0) + + b = bytearray() + for i in range(0, len(padded_encoded_text), 8): + byte = padded_encoded_text[i:i+8] + b.append(int(byte, 2)) + return b + + + def compress(self): + filename, file_extension = os.path.splitext(self.path) + output_path = filename + ".bin" + + with open(self.path, 'r+') as file, open(output_path, 'wb') as output: + text = file.read() + text = text.rstrip() + + frequency = self.make_frequency_dict(text) + self.make_heap(frequency) + self.merge_nodes() + self.make_codes() + + encoded_text = self.get_encoded_text(text) + padded_encoded_text = self.pad_encoded_text(encoded_text) + + b = self.get_byte_array(padded_encoded_text) + output.write(bytes(b)) + + print("Compressed") + return output_path + + + """ functions for decompression: """ + + + def remove_padding(self, padded_encoded_text): + padded_info = padded_encoded_text[:8] + extra_padding = int(padded_info, 2) + + padded_encoded_text = padded_encoded_text[8:] + encoded_text = padded_encoded_text[:-1*extra_padding] + + return encoded_text + + def decode_text(self, encoded_text): + current_code = "" + decoded_text = "" + + for bit in encoded_text: + current_code += bit + if(current_code in self.reverse_mapping): + character = self.reverse_mapping[current_code] + decoded_text += character + current_code = "" + + return decoded_text + + + def decompress(self, input_path): + filename, file_extension = os.path.splitext(self.path) + output_path = filename + "_decompressed" + ".txt" + + with open(input_path, 'rb') as file, open(output_path, 'w') as output: + bit_string = "" + + byte = file.read(1) + while(len(byte) > 0): + byte = ord(byte) + bits = bin(byte)[2:].rjust(8, '0') + bit_string += bits + byte = file.read(1) + + encoded_text = self.remove_padding(bit_string) + + decompressed_text = self.decode_text(encoded_text) + + output.write(decompressed_text) + + print("Decompressed") + return output_path diff --git a/Assignment 2/2020BTEIT00002-CA-2/ORIGINAL.jpg b/Assignment 2/2020BTEIT00002-CA-2/ORIGINAL.jpg new file mode 100644 index 0000000..0b7f9dd Binary files /dev/null and b/Assignment 2/2020BTEIT00002-CA-2/ORIGINAL.jpg differ diff --git a/Assignment 2/2020BTEIT00002-CA-2/Observation.txt b/Assignment 2/2020BTEIT00002-CA-2/Observation.txt new file mode 100644 index 0000000..2e4e791 --- /dev/null +++ b/Assignment 2/2020BTEIT00002-CA-2/Observation.txt @@ -0,0 +1,10 @@ + Huffman Encoding Algorithm: + + The encoding performance for the Huffman is on the number of various bits present inside the image. + + The general Time Complexity of the algorithm is O(nlogn), where n is number of different nodes present inside the Huffman tree. + + + compression ratio = (uncompressed image size / compressed image size) + = (2.36/ 1.25) + = 1.888 diff --git a/Assignment 2/2020BTEIT00002-CA-2/imageCompression.py b/Assignment 2/2020BTEIT00002-CA-2/imageCompression.py new file mode 100644 index 0000000..b95afed --- /dev/null +++ b/Assignment 2/2020BTEIT00002-CA-2/imageCompression.py @@ -0,0 +1,68 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.cluster import KMeans + + +img = plt.imread("ORIGINAL.jpeg") +plt.imshow(img) +plt.axis('off') +plt.show() + +type(img) + +print(img.shape) +print(img.size) + + +w,h, d = img.shape +# d=1 +image_array = img.reshape(w*h, d) +print(image_array.shape) + +#normalize in the range of (0,1) + +image_array=image_array/255 + + +from sklearn.utils import shuffle +# fitting model on a small sub sample of the complete image +image_array_sample =shuffle(image_array, random_state=1)[:1000] +image_array_sample.size + +kmeans =KMeans(n_clusters=6, random_state=1) +kmeans.fit(image_array_sample) + +labels=kmeans.predict(image_array) +labels + +print(kmeans.cluster_centers_) +c=kmeans.cluster_centers_ + + +# recreate original image according to labes and each pixels + +def recreate_image(c,labels,w,h,d): + image=np.zeros((w,h,d)) + label_idx =0 + +#now label each pixels according to the limited labels + + + for i in range(w): + for j in range(h): + image[i][j]=c[labels[label_idx]] + label_idx +=1 + + return(image) + +plt.figure(1) +plt.axis('off') +plt.title("original") +plt.imshow(img) +plt.show() +plt.figure(2) +plt.axis('off') +plt.title("reduced") +plt.imshow(recreate_image(c,labels,w,h,d)) +plt.show()