Skip to content

Commit 50ce653

Browse files
committed
* Fix errors
* Task 5 completed
1 parent d2438d4 commit 50ce653

File tree

4 files changed

+199
-94
lines changed

4 files changed

+199
-94
lines changed

Task_1.py

+39-57
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
import matplotlib.pyplot as plt
3+
import plotly.graph_objects as go
34
from itertools import groupby
45
import time
56
from dahuffman import HuffmanCodec
@@ -348,65 +349,47 @@ def PSNR(original_image, compressed_image):
348349
return 100
349350
max_pixel = 255.0
350351
psnr = 10 * np.log10(max_pixel ** 2 / mse)
351-
return psnr
352+
return round(psnr, 2)
352353

353-
# (PSNR vs quantization scale)
354-
def rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
354+
# PSNR rate-distortion curve
355+
def rate_distortion_curve(gray_image, type, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
355356
# Create an empty array to store the PSNR values
356357
psnr_values = []
358+
x_values = []
359+
quantization_levels = np.arange(0.1, 1.1, 0.1)
357360

358-
# Iterate over the quantization matrix
359-
for i in range(1, 100):
360-
# Print the current quantization scale
361-
print(f"Quantization scale: {i}")
362-
363-
# Encode the image
364-
encoded_image, codec = encode(gray_image, block_size, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
365-
366-
# Decode the image
367-
decoded_image = decode(encoded_image, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
368-
369-
# Calculate the PSNR
370-
psnr = PSNR(gray_image, decoded_image)
371-
372-
# Append the PSNR value
373-
psnr_values.append(psnr)
374-
375-
# Plot the rate-distortion curve
376-
plt.plot(range(1, 100), psnr_values)
377-
plt.xlabel('Compression Rate')
378-
plt.ylabel('PSNR')
379-
plt.show()
380-
381-
# (PSNR vs data size)
382-
def rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
383-
# Create an empty array to store the PSNR values
384-
psnr_values = []
385-
data_size = []
386-
387-
# Iterate over the quantization matrix
388-
for i in range(1, 100):
389-
# Print the current quantization scale
390-
print(f"Quantization scale: {i}")
361+
# Control of the compression rate
362+
for i in quantization_levels:
363+
364+
print(f"Quantization level: {i}")
391365

392366
# Encode the image
393-
encoded_image, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
367+
bitstream, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size, decimals)
394368

395369
# Decode the image
396-
decoded_image = decode(encoded_image, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
370+
decoded_image = decode(bitstream, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size, decimals)
397371

398372
# Calculate the PSNR
399373
psnr = PSNR(gray_image, decoded_image)
400374

401-
# Append the PSNR value
375+
# Append the values
376+
x_values.append(len(bitstream))
402377
psnr_values.append(psnr)
403-
data_size.append(len(encoded_image) * 8)
404378

405-
# Plot the rate-distortion curve
406-
plt.plot(data_size, psnr_values)
407-
plt.xlabel('Data Size')
408-
plt.ylabel('PSNR')
409-
plt.show()
379+
if type == 'bpp':
380+
x_values = np.array(x_values) * 8 / (gray_image.shape[0] * gray_image.shape[1])
381+
label = 'Bit per pixel (BPP)'
382+
elif type == 'scale':
383+
x_values = quantization_levels
384+
label = 'Quantization Scale'
385+
elif type == 'size':
386+
label = 'File Size (bytes)'
387+
else:
388+
raise Exception('Invalid type')
389+
390+
fig = go.Figure(data=go.Scatter(x=x_values, y=psnr_values, mode='lines+markers', name='lines+markers', text=np.round(quantization_levels, 1)))
391+
fig.update_layout(title='Rate-Distortion Curve (PSNR vs. Quantization Scale)', xaxis_title=label, yaxis_title='PSNR (dB)')
392+
fig.show()
410393

411394
def display_images(original_image, compressed_image):
412395
# Create a figure
@@ -445,15 +428,14 @@ def display_images(original_image, compressed_image):
445428

446429
quantization_matrix *= 1
447430
gray_image = open_raw_image(filename, image_width, image_height)
448-
t = time.time()
449-
encoded_image, codec = encode(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
450-
print(f"Encoding time: {round(time.time() - t, 2)}s")
451-
t1 = time.time()
452-
decoded_image = decode(encoded_image, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
453-
print(f"Decoding time: {round(time.time() - t1, 2)}s")
454-
455-
compression_quality(gray_image, encoded_image)
456-
display_images(gray_image, decoded_image)
457-
458-
# rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
459-
# rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
431+
# t = time.time()
432+
# encoded_image, codec = encode(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
433+
# print(f"Encoding time: {round(time.time() - t, 2)}s")
434+
# t1 = time.time()
435+
# decoded_image = decode(encoded_image, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
436+
# print(f"Decoding time: {round(time.time() - t1, 2)}s")
437+
438+
# compression_quality(gray_image, encoded_image)
439+
# display_images(gray_image, decoded_image)
440+
441+
rate_distortion_curve(gray_image, "size", quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)

Task_2.py

+57-15
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,45 @@ def create_y4m_video(video_path, frames, metadata):
5555
# Write the frame data
5656
file.write(frame_bytes)
5757

58+
def rate_distortion_curve_task2(frames, quantization_matrix, fps, num_blocks_height, num_blocks_width, block_size, decimals):
59+
60+
psnr_values = []
61+
bps_values = []
62+
quantization_levels = [1] + [2 ** i for i in range(1, 6)]
63+
for quantization_level in quantization_levels:
64+
65+
print(f"Quantization level: {quantization_level}")
66+
psnr_sum, size_sum = 0, 0
67+
68+
for frame in frames:
69+
70+
# Encode frame
71+
bitstream, codec = encode(frame, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
72+
73+
# Calculate frame size in bits
74+
size_sum += len(bitstream) * 8
75+
76+
# Decode frame
77+
decoded_frame = decode(bitstream, codec, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
78+
79+
# Calculate PSNR
80+
psnr_sum += PSNR(frame, decoded_frame)
81+
82+
# Calculate average PSNR and BPS
83+
psnr = psnr_sum / len(frames)
84+
bps = size_sum / (len(frames) / fps)
85+
86+
# Store PSNR and BPS values
87+
psnr_values.append(round(psnr, 2))
88+
bps_values.append(round(bps/ 1000, 2))
89+
90+
return psnr_values, bps_values, quantization_levels
91+
92+
def plot_rate_distortion_curve(psnr_values, bps_values, levels):
93+
fig = go.Figure(data=go.Scatter(x=bps_values, y=psnr_values, mode='lines+markers', text=levels))
94+
fig.update_layout(title='Rate-Distortion Curve', xaxis_title='BitsPerSecond (kbps)', yaxis_title='PSNR (dB)')
95+
fig.show()
96+
5897
if __name__ == "__main__":
5998

6099
filename = "media/input/foreman_qcif_mono.y4m"
@@ -72,19 +111,22 @@ def create_y4m_video(video_path, frames, metadata):
72111
[49, 64, 78, 87, 103, 121, 120, 101],
73112
[72, 92, 95, 98, 112, 100, 103, 99]])
74113

75-
encoded_frames = []
76-
t = time.time()
77-
for i, frame in enumerate(frames):
78-
print(f"Encoding frame {i}")
79-
bitstream, codec = encode(frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
80-
encoded_frames.append((bitstream, codec))
81-
82-
compressed_frames = []
83-
for i, encoded_frame in enumerate(encoded_frames):
84-
print(f"Decoding frame {i}")
85-
bitstream, codec = encoded_frame
86-
decoded_frame = decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
87-
compressed_frames.append(decoded_frame)
114+
# encoded_frames = []
115+
# t = time.time()
116+
# for i, frame in enumerate(frames):
117+
# print(f"Encoding frame {i}")
118+
# bitstream, codec = encode(frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
119+
# encoded_frames.append((bitstream, codec))
120+
121+
# compressed_frames = []
122+
# for i, encoded_frame in enumerate(encoded_frames):
123+
# print(f"Decoding frame {i}")
124+
# bitstream, codec = encoded_frame
125+
# decoded_frame = decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
126+
# compressed_frames.append(decoded_frame)
88127

89-
print(f"Time: {round(time.time() - t, 2)}")
90-
create_y4m_video("media/output/foreman_qcif_mono_task2.y4m", compressed_frames, metadata)
128+
# print(f"Time: {round(time.time() - t, 2)}")
129+
# create_y4m_video("media/output/foreman_qcif_mono_task2.y4m", compressed_frames, metadata)
130+
131+
psnr_values, bps_values, quantization_levels = rate_distortion_curve_task2(frames[:30], quantization_matrix, 30, num_blocks_height, num_blocks_width, block_size, decimals)
132+
plot_rate_distortion_curve(psnr_values, bps_values, quantization_levels)

Task_5.py renamed to Task_5_image.py

+19-22
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from Task_1 import *
22
from PIL import Image
3-
import os
4-
import matplotlib.pyplot as plt
3+
from os.path import getsize
54

65
def open_raw_image_PIL(filename, image_width, image_height):
76
with open(filename, 'rb') as file:
@@ -11,10 +10,6 @@ def open_raw_image_PIL(filename, image_width, image_height):
1110
image = Image.frombytes('L', (image_width, image_height), raw_data)
1211
return image
1312

14-
def get_file_size(file_path):
15-
return os.path.getsize(file_path)
16-
17-
1813
if __name__ == "__main__":
1914

2015
filename = "media/input/lena1.raw"
@@ -39,9 +34,11 @@ def get_file_size(file_path):
3934

4035
psnr_values_task1 = []
4136
file_sizes_task1 = []
37+
# scale from 0.1 to 1.0 (best) and from 10 to 90 (worst) representing the quantization scale factor
38+
quantization_levels = np.concatenate((np.arange(0.1, 1.1, 0.1), np.arange(5, 30, 5)))
4239

4340
# Iterate over the quantization matrix
44-
for i in range(1, 100, 10):
41+
for i in quantization_levels:
4542

4643
# Encode the image
4744
encoded_image, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
@@ -52,18 +49,20 @@ def get_file_size(file_path):
5249
# Calculate the PSNR
5350
psnr = PSNR(gray_image, decoded_image)
5451

55-
# Append the PSNR value
52+
# Append the values
5653
psnr_values_task1.append(psnr)
5754
file_sizes_task1.append(len(encoded_image))
5855

5956
psnr_values_jpeg = []
6057
file_sizes_jpeg = []
58+
# scale from 0 (worst) to 95 (best)
59+
quality_range = range(0, 96, 10)
6160

62-
for quality in range(0, 96, 10):
61+
for quality in quality_range:
6362
# Compress using JPEG
6463
jpeg_compressed_image = 'media/output/jpeg_compression.jpg'
6564
gray_image_PIL.save(jpeg_compressed_image, 'JPEG', quality=quality)
66-
jpeg_file_size = get_file_size(jpeg_compressed_image)
65+
jpeg_file_size = getsize(jpeg_compressed_image)
6766
psnr = PSNR(gray_image, np.array(Image.open(jpeg_compressed_image)).astype(np.uint8))
6867

6968
psnr_values_jpeg.append(psnr)
@@ -72,28 +71,26 @@ def get_file_size(file_path):
7271

7372
psnr_values_jpeg2000 = []
7473
file_sizes_jpeg2000 = []
74+
# scale from 10 to 90 representing an approximate size compression
75+
quality_layers_range = range(10, 100, 10)
7576

76-
for layers in range(10, 91, 10):
77+
for layers in quality_layers_range:
7778
# Compress using JPEG2000
7879
jpeg2000_compressed_image = 'media/output/jpeg2000_compression.jp2'
7980
gray_image_PIL.save(jpeg2000_compressed_image, 'JPEG2000', quality_mode='rates', quality_layers=[layers], codeblock_size=(8, 8))
80-
jpeg2000_file_size = get_file_size(jpeg2000_compressed_image)
81+
jpeg2000_file_size = getsize(jpeg2000_compressed_image)
8182
psnr = PSNR(gray_image, np.array(Image.open(jpeg2000_compressed_image)).astype(np.uint8))
8283

8384
psnr_values_jpeg2000.append(psnr)
8485
file_sizes_jpeg2000.append(jpeg2000_file_size)
8586

8687
# Plot rate-distortion curve
87-
plt.figure()
88-
plt.plot(file_sizes_task1, psnr_values_task1, 'o-', label='Task 1')
89-
plt.plot(file_sizes_jpeg, psnr_values_jpeg, 'o-', label='JPEG')
90-
plt.plot(file_sizes_jpeg2000, psnr_values_jpeg2000, 'o-', label='JPEG2000')
91-
plt.xlabel('File Size (bytes)')
92-
plt.ylabel('PSNR (dB)')
93-
plt.title('Rate-Distortion Curve (PSNR vs. File Size)')
94-
plt.legend()
95-
plt.grid(True)
96-
plt.show()
88+
fig = go.Figure()
89+
fig.add_trace(go.Scatter(x=file_sizes_task1, y=psnr_values_task1, mode='lines+markers', name='Task 1', text=np.round(quantization_levels, 1)))
90+
fig.add_trace(go.Scatter(x=file_sizes_jpeg, y=psnr_values_jpeg, mode='lines+markers', name='JPEG', text=list(quality_range)))
91+
fig.add_trace(go.Scatter(x=file_sizes_jpeg2000, y=psnr_values_jpeg2000, mode='lines+markers', name='JPEG2000', text=list(quality_layers_range)))
92+
fig.update_layout(title='Rate-Distortion Curve (PSNR vs. File Size)', xaxis_title='File Size (bytes)', yaxis_title='PSNR (dB)')
93+
fig.show()
9794

9895
# display_images(original_image, Image.open(jpeg_compressed_image))
9996
# display_images(original_image, Image.open(jpeg2000_compressed_image))

Task_5_video.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from moviepy.editor import VideoFileClip
2+
import os
3+
from Task_1 import *
4+
from Task_2 import *
5+
6+
def rate_distortion_curve_task5(input_path, output_path, codec):
7+
8+
psnr_values = []
9+
bps_values = []
10+
11+
with VideoFileClip(input_path, audio=False, fps_source='fps') as video_clip:
12+
13+
frames = [frame[:, :, 0] for frame in video_clip.iter_frames()][30]
14+
15+
size = os.path.getsize(input_path)
16+
duration = video_clip.duration
17+
bitrate = (size * 8 / 1000) / duration
18+
19+
# numpy array of 2 powers
20+
ratios = np.array([2 ** i for i in range(1, 6)])
21+
# Calculate the target bitrates based on the compression ratios
22+
target_bitrates = bitrate / ratios
23+
24+
for target_bitrate in target_bitrates:
25+
26+
print(f"Target bitrate: {target_bitrate} kbps")
27+
28+
# Set the target bitrate (in kbps)
29+
video_clip.write_videofile(output_path, codec=codec, bitrate=f'{target_bitrate}k', logger=None)
30+
31+
with VideoFileClip(output_path, audio=False, fps_source='fps') as video_clip_compressed:
32+
33+
compressed_frames = [frame[:, :, 0] for frame in video_clip_compressed.iter_frames()][30]
34+
35+
psnr_sum = sum([PSNR(frame, compressed_frame) for frame, compressed_frame in zip(frames, compressed_frames)])
36+
37+
# Calculate average PSNR and BPS
38+
psnr = psnr_sum / len(frames)
39+
bps = os.path.getsize(output_path) * 8 / duration
40+
41+
# Store PSNR and BPS values
42+
psnr_values.append(round(psnr, 2))
43+
bps_values.append(round(bps/ 1000, 2))
44+
45+
return psnr_values, bps_values, ratios
46+
47+
48+
def plot_curves():
49+
50+
psnr_values, bps_values, quantization_levels = rate_distortion_curve_task2(frames[:30], quantization_matrix, 30, num_blocks_height, num_blocks_width, block_size, decimals)
51+
psnr_values_h264, bps_values_h264, ratios_h264 = rate_distortion_curve_task5(input_video_path, output_video_path_h264, 'libx264')
52+
psnr_values_h265, bps_values_h265, ratios_h265 = rate_distortion_curve_task5(input_video_path, output_video_path_h265, 'libx265')
53+
54+
fig = go.Figure()
55+
fig.add_trace(go.Scatter(x=bps_values, y=psnr_values, mode='lines+markers', name='Task 2', text=quantization_levels))
56+
fig.add_trace(go.Scatter(x=bps_values_h264, y=psnr_values_h264, mode='lines+markers', name='H.264', text=ratios_h264))
57+
fig.add_trace(go.Scatter(x=bps_values_h265, y=psnr_values_h265, mode='lines+markers', name='H.265', text=ratios_h265))
58+
fig.update_layout(title='Rate-Distortion Curve', xaxis_title='BitsPerSecond (kbps)', yaxis_title='PSNR (dB)')
59+
fig.show()
60+
61+
62+
if __name__ == "__main__":
63+
64+
input_video_path = 'media/input/foreman_qcif_mono.y4m'
65+
output_video_path_h264 = 'media/output/foreman_qcif_mono_h264.mp4'
66+
output_video_path_h265 = 'media/output/foreman_qcif_mono_h265.mp4'
67+
68+
frames, metadata = read_y4m_video(input_video_path)
69+
frame_height, frame_width, block_size = int(metadata["H"]), int(metadata["W"]), 8
70+
num_blocks_height = frame_height // block_size
71+
num_blocks_width = frame_width // block_size
72+
decimals = 0
73+
quantization_matrix = np.array([[16, 11, 10, 16, 24, 40, 51, 61],
74+
[12, 12, 14, 19, 26, 58, 60, 55],
75+
[14, 13, 16, 24, 40, 57, 69, 56],
76+
[14, 17, 22, 29, 51, 87, 80, 62],
77+
[18, 22, 37, 56, 68, 109, 103, 77],
78+
[24, 35, 55, 64, 81, 104, 113, 92],
79+
[49, 64, 78, 87, 103, 121, 120, 101],
80+
[72, 92, 95, 98, 112, 100, 103, 99]])
81+
82+
plot_curves()
83+
84+

0 commit comments

Comments
 (0)