1
1
import numpy as np
2
2
import matplotlib .pyplot as plt
3
+ from itertools import groupby
3
4
import time
4
5
from dahuffman import HuffmanCodec
5
6
6
7
7
8
def open_raw_image (file_path , image_width , image_height ):
9
+
8
10
# Read the raw file as binary
9
11
with open (file_path , 'rb' ) as f :
10
12
# Read the binary data
11
13
raw_data = f .read ()
12
14
13
- # Convert the raw data for 8-bit grayscale image to a numpy integer array
15
+ # Convert the binary data to an array
14
16
image_array = np .frombuffer (raw_data , dtype = np .uint8 )
15
17
16
- # Reshape the array based on the image dimensions
18
+ # Reshape the array to a 2D image
17
19
image_array = image_array .reshape ((image_height , image_width ))
18
20
19
21
return image_array
20
22
21
- def split_image_into_blocks (image , num_blocks_width , num_blocks_height , block_size ):
23
+ def split_image_into_blocks (image , num_blocks_height , num_blocks_width , block_size ):
22
24
23
25
# Create an empty array to store the blocks
24
26
blocks = np .zeros ((num_blocks_height , num_blocks_width , block_size , block_size ), dtype = np .uint8 )
27
+
25
28
# Split the image into blocks
26
- for i in range (num_blocks_height ):
27
- for j in range (num_blocks_width ):
28
- blocks [i , j ] = image [i * block_size :(i + 1 )* block_size , j * block_size :(j + 1 )* block_size ]
29
+ for i , j in np .ndindex (num_blocks_height , num_blocks_width ):
30
+ blocks [i , j ] = image [i * block_size :(i + 1 )* block_size , j * block_size :(j + 1 )* block_size ]
29
31
30
32
return blocks
31
33
32
34
def merge_blocks_into_image (image_blocks ):
35
+
33
36
# Get the image dimensions
34
37
num_blocks_height , num_blocks_width , block_size , _ = image_blocks .shape
35
38
@@ -41,9 +44,8 @@ def merge_blocks_into_image(image_blocks):
41
44
image = np .zeros ((image_height , image_width ), dtype = np .uint8 )
42
45
43
46
# Merge the blocks into the image
44
- for i in range (num_blocks_height ):
45
- for j in range (num_blocks_width ):
46
- image [i * block_size :(i + 1 )* block_size , j * block_size :(j + 1 )* block_size ] = np .clip (image_blocks [i , j ], 0 , 255 )
47
+ for i , j in np .ndindex (num_blocks_height , num_blocks_width ):
48
+ image [i * block_size :(i + 1 )* block_size , j * block_size :(j + 1 )* block_size ] = np .clip (image_blocks [i , j ], 0 , 255 )
47
49
48
50
return image
49
51
@@ -120,64 +122,50 @@ def idct_2D(block):
120
122
return dct_block
121
123
122
124
def transform_coding (image_blocks , decimals = 0 ):
123
- # Get the image dimensions
124
- num_blocks_height , num_blocks_width , block_height , block_width = image_blocks .shape
125
125
126
126
# Create an empty array to store the DCT coefficients blocks
127
- dct_coefficients_blocks = np .zeros (( num_blocks_height , num_blocks_width , block_height , block_width ) )
127
+ dct_coefficients_blocks = np .zeros (image_blocks . shape )
128
128
129
129
# Iterate over the blocks
130
- for i in range (num_blocks_height ):
131
- for j in range (num_blocks_width ):
132
- # Calculate the DCT coefficients
133
-
134
- dct_coefficients_blocks [i , j ] = dct_2D (image_blocks [i , j ])
130
+ for i , j in np .ndindex (image_blocks .shape [:2 ]):
131
+ # Calculate the DCT coefficients
132
+ dct_coefficients_blocks [i , j ] = dct_2D (image_blocks [i , j ])
135
133
136
134
return np .round (dct_coefficients_blocks , decimals = decimals )
137
135
138
136
def inverse_transform_coding (dct_coefficients_blocks , decimals = 0 ):
139
- # Get the image dimensions
140
- num_blocks_height , num_blocks_width , block_height , block_width = dct_coefficients_blocks .shape
141
137
142
138
# Create an empty array to store the DCT coefficients blocks
143
- image_blocks = np .zeros (( num_blocks_height , num_blocks_width , block_height , block_width ) )
139
+ image_blocks = np .zeros (dct_coefficients_blocks . shape )
144
140
145
141
# Iterate over the blocks
146
- for i in range (num_blocks_height ):
147
- for j in range (num_blocks_width ):
148
- # Calculate the DCT coefficients
149
-
150
- image_blocks [i , j ] = idct_2D (dct_coefficients_blocks [i , j ])
142
+ for i , j in np .ndindex (dct_coefficients_blocks .shape [:2 ]):
143
+ # Calculate the DCT coefficients
144
+ image_blocks [i , j ] = idct_2D (dct_coefficients_blocks [i , j ])
151
145
152
146
return np .round (image_blocks , decimals = decimals )
153
147
154
148
def quantization (dct_coefficients_blocks , quantization_matrix , decimals = 0 ):
155
- # Get the image dimensions
156
- num_blocks_height , num_blocks_width , block_height , block_width = dct_coefficients_blocks .shape
157
149
158
150
# Create an empty array to store the quantized blocks
159
- quantized_blocks = np .zeros (( num_blocks_height , num_blocks_width , block_height , block_width ) )
151
+ quantized_blocks = np .zeros (dct_coefficients_blocks . shape )
160
152
161
153
# Iterate over the blocks
162
- for i in range (num_blocks_height ):
163
- for j in range (num_blocks_width ):
164
- # Calculate the quantized coefficients
165
- quantized_blocks [i , j ] = dct_coefficients_blocks [i , j ] / quantization_matrix
154
+ for i , j in np .ndindex (dct_coefficients_blocks .shape [:2 ]):
155
+ # Calculate the quantized coefficients
156
+ quantized_blocks [i , j ] = dct_coefficients_blocks [i , j ] / quantization_matrix
166
157
167
158
return np .round (quantized_blocks , decimals = decimals )
168
159
169
160
def inverse_quantization (quantized_blocks , quantization_matrix , decimals = 0 ):
170
- # Get the image dimensions
171
- num_blocks_height , num_blocks_width , block_height , block_width = quantized_blocks .shape
172
161
173
162
# Create an empty array to store the quantized blocks
174
- dct_coefficients_blocks = np .zeros (( num_blocks_height , num_blocks_width , block_height , block_width ) )
163
+ dct_coefficients_blocks = np .zeros (quantized_blocks . shape )
175
164
176
165
# Iterate over the blocks
177
- for i in range (num_blocks_height ):
178
- for j in range (num_blocks_width ):
179
- # Calculate the quantized coefficients
180
- dct_coefficients_blocks [i , j ] = quantized_blocks [i , j ] * quantization_matrix
166
+ for i , j in np .ndindex (quantized_blocks .shape [:2 ]):
167
+ # Calculate the quantized coefficients
168
+ dct_coefficients_blocks [i , j ] = quantized_blocks [i , j ] * quantization_matrix
181
169
182
170
return np .round (dct_coefficients_blocks , decimals = decimals )
183
171
@@ -252,71 +240,52 @@ def zigzag_scan_array_to_block(array, block_size):
252
240
return block
253
241
254
242
255
- def zigzag_scan (quanitzed_blocks , num_blocks_height , num_blocks_width ):
243
+ def zigzag_scan (quanitzed_blocks ):
256
244
257
245
# Create an empty array to store the zigzag scanned blocks
258
246
zigzag_blocks = []
259
247
260
248
# Iterate over the blocks
261
- for i in range (num_blocks_height ):
262
- for j in range (num_blocks_width ):
263
- # Calculate the zigzag scanned block and
264
- # Remove the trailing zeros
265
- tmp = np .trim_zeros (zigzag_scan_block_to_array (quanitzed_blocks [i , j ]), 'b' )
266
-
267
- # Convert each element to string
268
- tmp = [str (element ) for element in tmp ]
269
-
270
- zigzag_blocks .extend (tmp + ["EOB" ])
249
+ for i , j in np .ndindex (quanitzed_blocks .shape [:2 ]):
250
+ # Parse the block on a zigzag pattern and remove the trailing zeros
251
+ block_1D = np .trim_zeros (zigzag_scan_block_to_array (quanitzed_blocks [i , j ]), 'b' )
252
+ if not block_1D :
253
+ # Empty block after trimming the zeros
254
+ block_1D = [0 ]
255
+ zigzag_blocks .extend (block_1D )
256
+
257
+ # Add infinity as a separator between blocks (EOB)
258
+ zigzag_blocks .append (np .inf )
259
+
271
260
return zigzag_blocks
272
261
273
262
def inverse_zigzag_scan (zigzag_blocks , num_blocks_height , num_blocks_width , block_size ):
274
263
275
- # Create an empty array to reconstruct the blocks
276
- inverse_zigzag_blocks = np .zeros ((num_blocks_height , num_blocks_width , block_size , block_size ))
277
-
278
- separator = "EOB"
279
- result = []
280
- sublist = []
281
-
282
- for element in zigzag_blocks :
283
- if element == separator :
284
- result .append (sublist )
285
- sublist = []
286
- else :
287
- sublist .append (np .float64 (element ))
288
-
289
- # Iterate over the blocks
290
- for i in range (num_blocks_height ):
291
- for j in range (num_blocks_width ):
292
- # Calculate the inverse zigzag scanned block
293
- index = i * num_blocks_width + j
294
- inverse_zigzag_blocks [i , j ] = zigzag_scan_array_to_block (result [index ], block_size )
295
-
296
- return inverse_zigzag_blocks
264
+ # Split the bitstream into list of 2D blocks
265
+ separator = np .inf
266
+ inverse_zigzag_blocks = [zigzag_scan_array_to_block (list (group ), block_size ) for key , group in groupby (zigzag_blocks , lambda x : x != separator ) if key ]
267
+ # Reshape the list of 2D blocks to a 4D array
268
+ return np .array (inverse_zigzag_blocks ).reshape ((num_blocks_height , num_blocks_width , block_size , block_size ))
297
269
298
270
def entropy_coding (zigzag_blocks ):
299
271
300
- zigzag_blocks = tuple (zigzag_blocks )
301
272
# Create a Huffman codec object
302
273
codec = HuffmanCodec .from_data (zigzag_blocks )
303
274
304
275
# Encode the data
305
- encoded_blocks = codec .encode (zigzag_blocks )
276
+ bitstream = codec .encode (zigzag_blocks )
306
277
307
- return encoded_blocks , codec
278
+ return bitstream , codec
308
279
309
- def inverse_entropy_coding (encoded_blocks , codec ):
310
-
280
+ def inverse_entropy_coding (bitstream , codec ):
311
281
# Decode the data
312
- decoded_blocks = codec .decode (encoded_blocks )
282
+ return codec .decode (bitstream )
313
283
314
- return decoded_blocks
315
284
316
- def encode (image_array , quantization_matrix , num_blocks_width , num_blocks_height , block_size = 8 , decimals = 0 ):
285
+ def encode (image_array , quantization_matrix , num_blocks_height , num_blocks_width , block_size = 8 , decimals = 0 ):
317
286
318
287
# Split the image into blocks
319
- image_blocks = split_image_into_blocks (image_array , num_blocks_width , num_blocks_height , block_size )
288
+ image_blocks = split_image_into_blocks (image_array , num_blocks_height , num_blocks_width , block_size )
320
289
321
290
# Transform coding
322
291
dct_coefficients_blocks = transform_coding (image_blocks , decimals = decimals )
@@ -325,21 +294,21 @@ def encode(image_array, quantization_matrix, num_blocks_width, num_blocks_height
325
294
quantized_blocks = quantization (dct_coefficients_blocks , quantization_matrix , decimals = decimals )
326
295
327
296
# Zigzag scan
328
- zigzad_blocks = zigzag_scan (quantized_blocks , num_blocks_width , num_blocks_height )
297
+ zigzad_blocks = zigzag_scan (quantized_blocks )
329
298
330
299
# Entropy coding
331
- encoded_blocks , codec = entropy_coding (zigzad_blocks )
300
+ bitstream , codec = entropy_coding (zigzad_blocks )
332
301
333
302
# Return the quantized blocks
334
- return encoded_blocks , codec
303
+ return bitstream , codec
335
304
336
- def decode (encoded_blocks , codec , quantization_matrix , num_blocks_width , num_blocks_height , block_size = 8 , decimals = 0 ):
305
+ def decode (bitstream , codec , quantization_matrix , num_blocks_height , num_blocks_width , block_size = 8 , decimals = 0 ):
337
306
338
307
# Inverse entropy coding
339
- zigzad_blocks = inverse_entropy_coding (encoded_blocks , codec )
308
+ zigzad_blocks = inverse_entropy_coding (bitstream , codec )
340
309
341
310
# Inverse zigzag scan
342
- quantized_blocks = inverse_zigzag_scan (zigzad_blocks , num_blocks_width , num_blocks_height , block_size )
311
+ quantized_blocks = inverse_zigzag_scan (zigzad_blocks , num_blocks_height , num_blocks_width , block_size )
343
312
344
313
# Inverse quantization
345
314
dct_coefficients_blocks = inverse_quantization (quantized_blocks , quantization_matrix , decimals = decimals )
@@ -382,17 +351,20 @@ def PSNR(original_image, compressed_image):
382
351
return psnr
383
352
384
353
# (PSNR vs quantization scale)
385
- def rate_distortion_curve (gray_image , block_size , quantization_matrix ):
354
+ def rate_distortion_curve (gray_image , quantization_matrix , num_blocks_height , num_blocks_width , block_size , decimals ):
386
355
# Create an empty array to store the PSNR values
387
356
psnr_values = []
388
357
389
358
# Iterate over the quantization matrix
390
359
for i in range (1 , 100 ):
360
+ # Print the current quantization scale
361
+ print (f"Quantization scale: { i } " )
362
+
391
363
# Encode the image
392
- encoded_image , codec , shape = encode (gray_image , block_size , quantization_matrix * i )
364
+ encoded_image , codec = encode (gray_image , block_size , quantization_matrix * i , num_blocks_height , num_blocks_width , block_size )
393
365
394
366
# Decode the image
395
- decoded_image = decode (encoded_image , codec , shape , quantization_matrix * i )
367
+ decoded_image = decode (encoded_image , codec , quantization_matrix * i , num_blocks_height , num_blocks_width , block_size )
396
368
397
369
# Calculate the PSNR
398
370
psnr = PSNR (gray_image , decoded_image )
@@ -407,18 +379,21 @@ def rate_distortion_curve(gray_image, block_size, quantization_matrix):
407
379
plt .show ()
408
380
409
381
# (PSNR vs data size)
410
- def rate_distortion_curve_2 (gray_image , block_size , quantization_matrix ):
382
+ def rate_distortion_curve_2 (gray_image , quantization_matrix , num_blocks_height , num_blocks_width , block_size , decimals ):
411
383
# Create an empty array to store the PSNR values
412
384
psnr_values = []
413
385
data_size = []
414
386
415
387
# Iterate over the quantization matrix
416
388
for i in range (1 , 100 ):
389
+ # Print the current quantization scale
390
+ print (f"Quantization scale: { i } " )
391
+
417
392
# Encode the image
418
- encoded_image , codec , shape = encode (gray_image , quantization_matrix * i , block_size )
393
+ encoded_image , codec = encode (gray_image , quantization_matrix * i , num_blocks_height , num_blocks_width , block_size )
419
394
420
395
# Decode the image
421
- decoded_image = decode (encoded_image , codec , shape , quantization_matrix * i )
396
+ decoded_image = decode (encoded_image , codec , quantization_matrix * i , num_blocks_height , num_blocks_width , block_size )
422
397
423
398
# Calculate the PSNR
424
399
psnr = PSNR (gray_image , decoded_image )
@@ -433,7 +408,7 @@ def rate_distortion_curve_2(gray_image, block_size, quantization_matrix):
433
408
plt .ylabel ('PSNR' )
434
409
plt .show ()
435
410
436
- def display_image (original_image , compressed_image ):
411
+ def display_images (original_image , compressed_image ):
437
412
# Create a figure
438
413
fig = plt .figure (figsize = (10 , 10 ))
439
414
@@ -453,10 +428,9 @@ def display_image(original_image, compressed_image):
453
428
454
429
if __name__ == "__main__" :
455
430
456
- filename = "task_1/lena1.raw"
457
- image_width , image_height = 256 , 256
458
- block_size = 8
459
- # Calculate the number of blocks in the image
431
+ filename = "media/input/lena1.raw"
432
+ image_height , image_width , block_size = 256 , 256 , 8
433
+ # The number of blocks in the height and width axes
460
434
num_blocks_height = image_height // block_size
461
435
num_blocks_width = image_width // block_size
462
436
decimals = 0
@@ -472,14 +446,14 @@ def display_image(original_image, compressed_image):
472
446
quantization_matrix *= 1
473
447
gray_image = open_raw_image (filename , image_width , image_height )
474
448
t = time .time ()
475
- encoded_image , codec = encode (gray_image , quantization_matrix , num_blocks_width , num_blocks_height , block_size , decimals )
449
+ encoded_image , codec = encode (gray_image , quantization_matrix , num_blocks_height , num_blocks_width , block_size , decimals )
450
+ print (f"Encoding time: { round (time .time () - t , 2 )} s" )
476
451
t1 = time .time ()
477
- print ("Encoding time: " , t1 - t )
478
- decoded_image = decode (encoded_image , codec , quantization_matrix , num_blocks_width , num_blocks_height , block_size , decimals )
479
- print ("Decoding time: " , time .time () - t1 )
452
+ decoded_image = decode (encoded_image , codec , quantization_matrix , num_blocks_height , num_blocks_width , block_size , decimals )
453
+ print (f"Decoding time: { round (time .time () - t1 , 2 )} s" )
480
454
481
455
compression_quality (gray_image , encoded_image )
482
- display_image (gray_image , decoded_image )
456
+ display_images (gray_image , decoded_image )
483
457
484
- # rate_distortion_curve(gray_image, block_size, quantization_matrix )
485
- # rate_distortion_curve_2(gray_image, block_size, quantization_matrix )
458
+ # rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals )
459
+ # rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals )
0 commit comments