Skip to content

Commit d2438d4

Browse files
committed
- Fix errors
- Start Task 5
1 parent 8d703bf commit d2438d4

File tree

7 files changed

+255
-165
lines changed

7 files changed

+255
-165
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
resources/
2+
*ipynb
3+
.vscode/
4+
__pycache__/
5+
media/output/

Task_1.py

+79-105
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,38 @@
11
import numpy as np
22
import matplotlib.pyplot as plt
3+
from itertools import groupby
34
import time
45
from dahuffman import HuffmanCodec
56

67

78
def open_raw_image(file_path, image_width, image_height):
9+
810
# Read the raw file as binary
911
with open(file_path, 'rb') as f:
1012
# Read the binary data
1113
raw_data = f.read()
1214

13-
# Convert the raw data for 8-bit grayscale image to a numpy integer array
15+
# Convert the binary data to an array
1416
image_array = np.frombuffer(raw_data, dtype=np.uint8)
1517

16-
# Reshape the array based on the image dimensions
18+
# Reshape the array to a 2D image
1719
image_array = image_array.reshape((image_height, image_width))
1820

1921
return image_array
2022

21-
def split_image_into_blocks(image, num_blocks_width, num_blocks_height, block_size):
23+
def split_image_into_blocks(image, num_blocks_height, num_blocks_width, block_size):
2224

2325
# Create an empty array to store the blocks
2426
blocks = np.zeros((num_blocks_height, num_blocks_width, block_size, block_size), dtype=np.uint8)
27+
2528
# Split the image into blocks
26-
for i in range(num_blocks_height):
27-
for j in range(num_blocks_width):
28-
blocks[i, j] = image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
29+
for i, j in np.ndindex(num_blocks_height, num_blocks_width):
30+
blocks[i, j] = image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
2931

3032
return blocks
3133

3234
def merge_blocks_into_image(image_blocks):
35+
3336
# Get the image dimensions
3437
num_blocks_height, num_blocks_width, block_size, _ = image_blocks.shape
3538

@@ -41,9 +44,8 @@ def merge_blocks_into_image(image_blocks):
4144
image = np.zeros((image_height, image_width), dtype=np.uint8)
4245

4346
# Merge the blocks into the image
44-
for i in range(num_blocks_height):
45-
for j in range(num_blocks_width):
46-
image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size] = np.clip(image_blocks[i, j], 0, 255)
47+
for i, j in np.ndindex(num_blocks_height, num_blocks_width):
48+
image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size] = np.clip(image_blocks[i, j], 0, 255)
4749

4850
return image
4951

@@ -120,64 +122,50 @@ def idct_2D(block):
120122
return dct_block
121123

122124
def transform_coding(image_blocks, decimals=0):
123-
# Get the image dimensions
124-
num_blocks_height, num_blocks_width, block_height, block_width = image_blocks.shape
125125

126126
# Create an empty array to store the DCT coefficients blocks
127-
dct_coefficients_blocks = np.zeros((num_blocks_height, num_blocks_width, block_height, block_width))
127+
dct_coefficients_blocks = np.zeros(image_blocks.shape)
128128

129129
# Iterate over the blocks
130-
for i in range(num_blocks_height):
131-
for j in range(num_blocks_width):
132-
# Calculate the DCT coefficients
133-
134-
dct_coefficients_blocks[i, j] = dct_2D(image_blocks[i, j])
130+
for i, j in np.ndindex(image_blocks.shape[:2]):
131+
# Calculate the DCT coefficients
132+
dct_coefficients_blocks[i, j] = dct_2D(image_blocks[i, j])
135133

136134
return np.round(dct_coefficients_blocks, decimals=decimals)
137135

138136
def inverse_transform_coding(dct_coefficients_blocks, decimals=0):
139-
# Get the image dimensions
140-
num_blocks_height, num_blocks_width, block_height, block_width = dct_coefficients_blocks.shape
141137

142138
# Create an empty array to store the DCT coefficients blocks
143-
image_blocks = np.zeros((num_blocks_height, num_blocks_width, block_height, block_width))
139+
image_blocks = np.zeros(dct_coefficients_blocks.shape)
144140

145141
# Iterate over the blocks
146-
for i in range(num_blocks_height):
147-
for j in range(num_blocks_width):
148-
# Calculate the DCT coefficients
149-
150-
image_blocks[i, j] = idct_2D(dct_coefficients_blocks[i, j])
142+
for i, j in np.ndindex(dct_coefficients_blocks.shape[:2]):
143+
# Calculate the DCT coefficients
144+
image_blocks[i, j] = idct_2D(dct_coefficients_blocks[i, j])
151145

152146
return np.round(image_blocks, decimals=decimals)
153147

154148
def quantization(dct_coefficients_blocks, quantization_matrix, decimals=0):
155-
# Get the image dimensions
156-
num_blocks_height, num_blocks_width, block_height, block_width = dct_coefficients_blocks.shape
157149

158150
# Create an empty array to store the quantized blocks
159-
quantized_blocks = np.zeros((num_blocks_height, num_blocks_width, block_height, block_width))
151+
quantized_blocks = np.zeros(dct_coefficients_blocks.shape)
160152

161153
# Iterate over the blocks
162-
for i in range(num_blocks_height):
163-
for j in range(num_blocks_width):
164-
# Calculate the quantized coefficients
165-
quantized_blocks[i, j] = dct_coefficients_blocks[i, j] / quantization_matrix
154+
for i, j in np.ndindex(dct_coefficients_blocks.shape[:2]):
155+
# Calculate the quantized coefficients
156+
quantized_blocks[i, j] = dct_coefficients_blocks[i, j] / quantization_matrix
166157

167158
return np.round(quantized_blocks, decimals=decimals)
168159

169160
def inverse_quantization(quantized_blocks, quantization_matrix, decimals=0):
170-
# Get the image dimensions
171-
num_blocks_height, num_blocks_width, block_height, block_width = quantized_blocks.shape
172161

173162
# Create an empty array to store the quantized blocks
174-
dct_coefficients_blocks = np.zeros((num_blocks_height, num_blocks_width, block_height, block_width))
163+
dct_coefficients_blocks = np.zeros(quantized_blocks.shape)
175164

176165
# Iterate over the blocks
177-
for i in range(num_blocks_height):
178-
for j in range(num_blocks_width):
179-
# Calculate the quantized coefficients
180-
dct_coefficients_blocks[i, j] = quantized_blocks[i, j] * quantization_matrix
166+
for i, j in np.ndindex(quantized_blocks.shape[:2]):
167+
# Calculate the quantized coefficients
168+
dct_coefficients_blocks[i, j] = quantized_blocks[i, j] * quantization_matrix
181169

182170
return np.round(dct_coefficients_blocks, decimals=decimals)
183171

@@ -252,71 +240,52 @@ def zigzag_scan_array_to_block(array, block_size):
252240
return block
253241

254242

255-
def zigzag_scan(quanitzed_blocks, num_blocks_height, num_blocks_width):
243+
def zigzag_scan(quanitzed_blocks):
256244

257245
# Create an empty array to store the zigzag scanned blocks
258246
zigzag_blocks = []
259247

260248
# Iterate over the blocks
261-
for i in range(num_blocks_height):
262-
for j in range(num_blocks_width):
263-
# Calculate the zigzag scanned block and
264-
# Remove the trailing zeros
265-
tmp = np.trim_zeros(zigzag_scan_block_to_array(quanitzed_blocks[i, j]), 'b')
266-
267-
# Convert each element to string
268-
tmp = [str(element) for element in tmp]
269-
270-
zigzag_blocks.extend(tmp + ["EOB"])
249+
for i, j in np.ndindex(quanitzed_blocks.shape[:2]):
250+
# Parse the block on a zigzag pattern and remove the trailing zeros
251+
block_1D = np.trim_zeros(zigzag_scan_block_to_array(quanitzed_blocks[i, j]), 'b')
252+
if not block_1D:
253+
# Empty block after trimming the zeros
254+
block_1D = [0]
255+
zigzag_blocks.extend(block_1D)
256+
257+
# Add infinity as a separator between blocks (EOB)
258+
zigzag_blocks.append(np.inf)
259+
271260
return zigzag_blocks
272261

273262
def inverse_zigzag_scan(zigzag_blocks, num_blocks_height, num_blocks_width, block_size):
274263

275-
# Create an empty array to reconstruct the blocks
276-
inverse_zigzag_blocks = np.zeros((num_blocks_height, num_blocks_width, block_size, block_size))
277-
278-
separator = "EOB"
279-
result = []
280-
sublist = []
281-
282-
for element in zigzag_blocks:
283-
if element == separator:
284-
result.append(sublist)
285-
sublist = []
286-
else:
287-
sublist.append(np.float64(element))
288-
289-
# Iterate over the blocks
290-
for i in range(num_blocks_height):
291-
for j in range(num_blocks_width):
292-
# Calculate the inverse zigzag scanned block
293-
index = i * num_blocks_width + j
294-
inverse_zigzag_blocks[i, j] = zigzag_scan_array_to_block(result[index], block_size)
295-
296-
return inverse_zigzag_blocks
264+
# Split the bitstream into list of 2D blocks
265+
separator = np.inf
266+
inverse_zigzag_blocks = [zigzag_scan_array_to_block(list(group), block_size) for key, group in groupby(zigzag_blocks, lambda x: x != separator) if key]
267+
# Reshape the list of 2D blocks to a 4D array
268+
return np.array(inverse_zigzag_blocks).reshape((num_blocks_height, num_blocks_width, block_size, block_size))
297269

298270
def entropy_coding(zigzag_blocks):
299271

300-
zigzag_blocks = tuple(zigzag_blocks)
301272
# Create a Huffman codec object
302273
codec = HuffmanCodec.from_data(zigzag_blocks)
303274

304275
# Encode the data
305-
encoded_blocks = codec.encode(zigzag_blocks)
276+
bitstream = codec.encode(zigzag_blocks)
306277

307-
return encoded_blocks, codec
278+
return bitstream, codec
308279

309-
def inverse_entropy_coding(encoded_blocks, codec):
310-
280+
def inverse_entropy_coding(bitstream, codec):
311281
# Decode the data
312-
decoded_blocks = codec.decode(encoded_blocks)
282+
return codec.decode(bitstream)
313283

314-
return decoded_blocks
315284

316-
def encode(image_array, quantization_matrix, num_blocks_width, num_blocks_height, block_size=8, decimals=0):
285+
def encode(image_array, quantization_matrix, num_blocks_height, num_blocks_width, block_size=8, decimals=0):
317286

318287
# Split the image into blocks
319-
image_blocks = split_image_into_blocks(image_array, num_blocks_width, num_blocks_height, block_size)
288+
image_blocks = split_image_into_blocks(image_array, num_blocks_height, num_blocks_width, block_size)
320289

321290
# Transform coding
322291
dct_coefficients_blocks = transform_coding(image_blocks, decimals=decimals)
@@ -325,21 +294,21 @@ def encode(image_array, quantization_matrix, num_blocks_width, num_blocks_height
325294
quantized_blocks = quantization(dct_coefficients_blocks, quantization_matrix, decimals=decimals)
326295

327296
# Zigzag scan
328-
zigzad_blocks = zigzag_scan(quantized_blocks, num_blocks_width, num_blocks_height)
297+
zigzad_blocks = zigzag_scan(quantized_blocks)
329298

330299
# Entropy coding
331-
encoded_blocks, codec = entropy_coding(zigzad_blocks)
300+
bitstream, codec = entropy_coding(zigzad_blocks)
332301

333302
# Return the quantized blocks
334-
return encoded_blocks, codec
303+
return bitstream, codec
335304

336-
def decode(encoded_blocks, codec, quantization_matrix, num_blocks_width, num_blocks_height, block_size=8, decimals=0):
305+
def decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size=8, decimals=0):
337306

338307
# Inverse entropy coding
339-
zigzad_blocks = inverse_entropy_coding(encoded_blocks, codec)
308+
zigzad_blocks = inverse_entropy_coding(bitstream, codec)
340309

341310
# Inverse zigzag scan
342-
quantized_blocks = inverse_zigzag_scan(zigzad_blocks, num_blocks_width, num_blocks_height, block_size)
311+
quantized_blocks = inverse_zigzag_scan(zigzad_blocks, num_blocks_height, num_blocks_width, block_size)
343312

344313
# Inverse quantization
345314
dct_coefficients_blocks = inverse_quantization(quantized_blocks, quantization_matrix, decimals=decimals)
@@ -382,17 +351,20 @@ def PSNR(original_image, compressed_image):
382351
return psnr
383352

384353
# (PSNR vs quantization scale)
385-
def rate_distortion_curve(gray_image, block_size, quantization_matrix):
354+
def rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
386355
# Create an empty array to store the PSNR values
387356
psnr_values = []
388357

389358
# Iterate over the quantization matrix
390359
for i in range(1, 100):
360+
# Print the current quantization scale
361+
print(f"Quantization scale: {i}")
362+
391363
# Encode the image
392-
encoded_image, codec, shape = encode(gray_image, block_size, quantization_matrix * i)
364+
encoded_image, codec = encode(gray_image, block_size, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
393365

394366
# Decode the image
395-
decoded_image = decode(encoded_image, codec, shape, quantization_matrix * i)
367+
decoded_image = decode(encoded_image, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
396368

397369
# Calculate the PSNR
398370
psnr = PSNR(gray_image, decoded_image)
@@ -407,18 +379,21 @@ def rate_distortion_curve(gray_image, block_size, quantization_matrix):
407379
plt.show()
408380

409381
# (PSNR vs data size)
410-
def rate_distortion_curve_2(gray_image, block_size, quantization_matrix):
382+
def rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
411383
# Create an empty array to store the PSNR values
412384
psnr_values = []
413385
data_size = []
414386

415387
# Iterate over the quantization matrix
416388
for i in range(1, 100):
389+
# Print the current quantization scale
390+
print(f"Quantization scale: {i}")
391+
417392
# Encode the image
418-
encoded_image, codec, shape = encode(gray_image, quantization_matrix * i, block_size)
393+
encoded_image, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
419394

420395
# Decode the image
421-
decoded_image = decode(encoded_image, codec, shape, quantization_matrix * i)
396+
decoded_image = decode(encoded_image, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
422397

423398
# Calculate the PSNR
424399
psnr = PSNR(gray_image, decoded_image)
@@ -433,7 +408,7 @@ def rate_distortion_curve_2(gray_image, block_size, quantization_matrix):
433408
plt.ylabel('PSNR')
434409
plt.show()
435410

436-
def display_image(original_image, compressed_image):
411+
def display_images(original_image, compressed_image):
437412
# Create a figure
438413
fig = plt.figure(figsize=(10, 10))
439414

@@ -453,10 +428,9 @@ def display_image(original_image, compressed_image):
453428

454429
if __name__ == "__main__":
455430

456-
filename = "task_1/lena1.raw"
457-
image_width, image_height = 256, 256
458-
block_size = 8
459-
# Calculate the number of blocks in the image
431+
filename = "media/input/lena1.raw"
432+
image_height, image_width, block_size = 256, 256, 8
433+
# The number of blocks in the height and width axes
460434
num_blocks_height = image_height // block_size
461435
num_blocks_width = image_width // block_size
462436
decimals = 0
@@ -472,14 +446,14 @@ def display_image(original_image, compressed_image):
472446
quantization_matrix *= 1
473447
gray_image = open_raw_image(filename, image_width, image_height)
474448
t = time.time()
475-
encoded_image, codec = encode(gray_image, quantization_matrix, num_blocks_width, num_blocks_height, block_size, decimals)
449+
encoded_image, codec = encode(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
450+
print(f"Encoding time: {round(time.time() - t, 2)}s")
476451
t1 = time.time()
477-
print("Encoding time: ", t1 - t)
478-
decoded_image = decode(encoded_image, codec, quantization_matrix, num_blocks_width, num_blocks_height, block_size, decimals)
479-
print("Decoding time: ", time.time() - t1)
452+
decoded_image = decode(encoded_image, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
453+
print(f"Decoding time: {round(time.time() - t1, 2)}s")
480454

481455
compression_quality(gray_image, encoded_image)
482-
display_image(gray_image, decoded_image)
456+
display_images(gray_image, decoded_image)
483457

484-
# rate_distortion_curve(gray_image, block_size, quantization_matrix)
485-
# rate_distortion_curve_2(gray_image, block_size, quantization_matrix)
458+
# rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
459+
# rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)

0 commit comments

Comments
 (0)