* Fix errors

ybakkali · ybakkali · commit 50ce6532bfa2 · 2023-05-28T12:57:23.000+02:00
* Task 5 completed
diff --git a/Task_1.py b/Task_1.py
@@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+import plotly.graph_objects as go
 from itertools import groupby
 import time
 from dahuffman import HuffmanCodec
@@ -348,65 +349,47 @@ def PSNR(original_image, compressed_image):
         return 100
     max_pixel = 255.0
     psnr = 10 * np.log10(max_pixel ** 2 / mse)
-    return psnr
+    return round(psnr, 2)
 
-# (PSNR vs quantization scale)
-def rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
+# PSNR rate-distortion curve
+def rate_distortion_curve(gray_image, type, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
     # Create an empty array to store the PSNR values
     psnr_values = []
+    x_values = []
+    quantization_levels = np.arange(0.1, 1.1, 0.1)
 
-    # Iterate over the quantization matrix
-    for i in range(1, 100):
-        # Print the current quantization scale
-        print(f"Quantization scale: {i}")
-
-        # Encode the image
-        encoded_image, codec = encode(gray_image, block_size, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
-
-        # Decode the image
-        decoded_image = decode(encoded_image, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
-
-        # Calculate the PSNR
-        psnr = PSNR(gray_image, decoded_image)
-
-        # Append the PSNR value
-        psnr_values.append(psnr)
-
-    # Plot the rate-distortion curve
-    plt.plot(range(1, 100), psnr_values)
-    plt.xlabel('Compression Rate')
-    plt.ylabel('PSNR')
-    plt.show()
-
-# (PSNR vs data size)
-def rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals):
-    # Create an empty array to store the PSNR values
-    psnr_values = []
-    data_size = []
-
-    # Iterate over the quantization matrix
-    for i in range(1, 100):
-        # Print the current quantization scale
-        print(f"Quantization scale: {i}")
+    # Control of the compression rate
+    for i in quantization_levels:
+        
+        print(f"Quantization level: {i}")
 
         # Encode the image
-        encoded_image, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
+        bitstream, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size, decimals)
 
         # Decode the image
-        decoded_image = decode(encoded_image, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
+        decoded_image = decode(bitstream, codec, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size, decimals)
 
         # Calculate the PSNR
         psnr = PSNR(gray_image, decoded_image)
 
-        # Append the PSNR value
+        # Append the values
+        x_values.append(len(bitstream))
         psnr_values.append(psnr)
-        data_size.append(len(encoded_image) * 8)
 
-    # Plot the rate-distortion curve
-    plt.plot(data_size, psnr_values)
-    plt.xlabel('Data Size')
-    plt.ylabel('PSNR')
-    plt.show()
+    if type == 'bpp':
+        x_values = np.array(x_values) * 8 / (gray_image.shape[0] * gray_image.shape[1])
+        label = 'Bit per pixel (BPP)'
+    elif type == 'scale':
+        x_values = quantization_levels
+        label = 'Quantization Scale'
+    elif type == 'size':
+        label = 'File Size (bytes)'
+    else:
+        raise Exception('Invalid type')
+    
+    fig = go.Figure(data=go.Scatter(x=x_values, y=psnr_values, mode='lines+markers', name='lines+markers', text=np.round(quantization_levels, 1)))
+    fig.update_layout(title='Rate-Distortion Curve (PSNR vs. Quantization Scale)', xaxis_title=label, yaxis_title='PSNR (dB)')
+    fig.show()
 
 def display_images(original_image, compressed_image):
     # Create a figure
@@ -445,15 +428,14 @@ def display_images(original_image, compressed_image):
 
     quantization_matrix *= 1
     gray_image = open_raw_image(filename, image_width, image_height)
-    t = time.time()
-    encoded_image, codec = encode(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
-    print(f"Encoding time: {round(time.time() - t, 2)}s")
-    t1 = time.time()
-    decoded_image = decode(encoded_image, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
-    print(f"Decoding time: {round(time.time() - t1, 2)}s")
-
-    compression_quality(gray_image, encoded_image)
-    display_images(gray_image, decoded_image)
-
-    # rate_distortion_curve(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
-    # rate_distortion_curve_2(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
+    # t = time.time()
+    # encoded_image, codec = encode(gray_image, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
+    # print(f"Encoding time: {round(time.time() - t, 2)}s")
+    # t1 = time.time()
+    # decoded_image = decode(encoded_image, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
+    # print(f"Decoding time: {round(time.time() - t1, 2)}s")
+
+    # compression_quality(gray_image, encoded_image)
+    # display_images(gray_image, decoded_image)
+
+    rate_distortion_curve(gray_image, "size", quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
diff --git a/Task_2.py b/Task_2.py
@@ -55,6 +55,45 @@ def create_y4m_video(video_path, frames, metadata):
             # Write the frame data
             file.write(frame_bytes)
 
+def rate_distortion_curve_task2(frames, quantization_matrix, fps, num_blocks_height, num_blocks_width, block_size, decimals):
+      
+    psnr_values = []
+    bps_values = []
+    quantization_levels = [1] + [2 ** i for i in range(1, 6)]
+    for quantization_level in quantization_levels:
+
+        print(f"Quantization level: {quantization_level}")
+        psnr_sum, size_sum = 0, 0
+
+        for frame in frames:
+
+            # Encode frame
+            bitstream, codec = encode(frame, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
+
+            # Calculate frame size in bits
+            size_sum += len(bitstream) * 8
+
+            # Decode frame
+            decoded_frame = decode(bitstream, codec, quantization_matrix * quantization_level, num_blocks_height, num_blocks_width, block_size, decimals)
+
+            # Calculate PSNR
+            psnr_sum += PSNR(frame, decoded_frame)
+
+        # Calculate average PSNR and BPS
+        psnr = psnr_sum / len(frames)
+        bps = size_sum / (len(frames) / fps)
+
+        # Store PSNR and BPS values
+        psnr_values.append(round(psnr, 2))
+        bps_values.append(round(bps/ 1000, 2))
+
+    return psnr_values, bps_values, quantization_levels
+
+def plot_rate_distortion_curve(psnr_values, bps_values, levels):
+    fig = go.Figure(data=go.Scatter(x=bps_values, y=psnr_values, mode='lines+markers', text=levels))
+    fig.update_layout(title='Rate-Distortion Curve', xaxis_title='BitsPerSecond (kbps)', yaxis_title='PSNR (dB)')
+    fig.show()
+
 if __name__ == "__main__":
 
     filename = "media/input/foreman_qcif_mono.y4m"
@@ -72,19 +111,22 @@ def create_y4m_video(video_path, frames, metadata):
                                     [49, 64, 78, 87, 103, 121, 120, 101],
                                     [72, 92, 95, 98, 112, 100, 103, 99]])
 
-    encoded_frames = []
-    t = time.time()
-    for i, frame in enumerate(frames):
-        print(f"Encoding frame {i}")
-        bitstream, codec = encode(frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
-        encoded_frames.append((bitstream, codec))
-
-    compressed_frames = []
-    for i, encoded_frame in enumerate(encoded_frames):
-        print(f"Decoding frame {i}")
-        bitstream, codec = encoded_frame
-        decoded_frame = decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
-        compressed_frames.append(decoded_frame)
+    # encoded_frames = []
+    # t = time.time()
+    # for i, frame in enumerate(frames):
+    #     print(f"Encoding frame {i}")
+    #     bitstream, codec = encode(frame, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
+    #     encoded_frames.append((bitstream, codec))
+
+    # compressed_frames = []
+    # for i, encoded_frame in enumerate(encoded_frames):
+    #     print(f"Decoding frame {i}")
+    #     bitstream, codec = encoded_frame
+    #     decoded_frame = decode(bitstream, codec, quantization_matrix, num_blocks_height, num_blocks_width, block_size, decimals)
+    #     compressed_frames.append(decoded_frame)
     
-    print(f"Time: {round(time.time() - t, 2)}")
-    create_y4m_video("media/output/foreman_qcif_mono_task2.y4m", compressed_frames, metadata)
+    # print(f"Time: {round(time.time() - t, 2)}")
+    # create_y4m_video("media/output/foreman_qcif_mono_task2.y4m", compressed_frames, metadata)
+
+    psnr_values, bps_values, quantization_levels = rate_distortion_curve_task2(frames[:30], quantization_matrix, 30, num_blocks_height, num_blocks_width, block_size, decimals)
+    plot_rate_distortion_curve(psnr_values, bps_values, quantization_levels)
diff --git a/Task_5_image.py b/Task_5_image.py
@@ -1,7 +1,6 @@
 from Task_1 import *
 from PIL import Image
-import os
-import matplotlib.pyplot as plt
+from os.path import getsize
 
 def open_raw_image_PIL(filename, image_width, image_height):
     with open(filename, 'rb') as file:
@@ -11,10 +10,6 @@ def open_raw_image_PIL(filename, image_width, image_height):
     image = Image.frombytes('L', (image_width, image_height), raw_data)
     return image
 
-def get_file_size(file_path):
-    return os.path.getsize(file_path)
-
-
 if __name__ == "__main__":
 
     filename = "media/input/lena1.raw"
@@ -39,9 +34,11 @@ def get_file_size(file_path):
 
     psnr_values_task1 = []
     file_sizes_task1 = []
+    # scale from 0.1 to 1.0 (best) and from 10 to 90 (worst) representing the quantization scale factor
+    quantization_levels = np.concatenate((np.arange(0.1, 1.1, 0.1), np.arange(5, 30, 5)))
 
     # Iterate over the quantization matrix
-    for i in range(1, 100, 10):
+    for i in quantization_levels:
   
         # Encode the image
         encoded_image, codec = encode(gray_image, quantization_matrix * i, num_blocks_height, num_blocks_width, block_size)
@@ -52,18 +49,20 @@ def get_file_size(file_path):
         # Calculate the PSNR
         psnr = PSNR(gray_image, decoded_image)
 
-        # Append the PSNR value
+        # Append the values
         psnr_values_task1.append(psnr)
         file_sizes_task1.append(len(encoded_image))
 
     psnr_values_jpeg = []
     file_sizes_jpeg = []
+    # scale from 0 (worst) to 95 (best)
+    quality_range = range(0, 96, 10)
 
-    for quality in range(0, 96, 10):
+    for quality in quality_range:
         # Compress using JPEG
         jpeg_compressed_image = 'media/output/jpeg_compression.jpg'
         gray_image_PIL.save(jpeg_compressed_image, 'JPEG', quality=quality)
-        jpeg_file_size = get_file_size(jpeg_compressed_image)
+        jpeg_file_size = getsize(jpeg_compressed_image)
         psnr = PSNR(gray_image, np.array(Image.open(jpeg_compressed_image)).astype(np.uint8))
         
         psnr_values_jpeg.append(psnr)
@@ -72,28 +71,26 @@ def get_file_size(file_path):
 
     psnr_values_jpeg2000 = []
     file_sizes_jpeg2000 = []
+    # scale from 10 to 90 representing an approximate size compression
+    quality_layers_range = range(10, 100, 10)
 
-    for layers in range(10, 91, 10):
+    for layers in quality_layers_range:
         # Compress using JPEG2000
         jpeg2000_compressed_image = 'media/output/jpeg2000_compression.jp2'
         gray_image_PIL.save(jpeg2000_compressed_image, 'JPEG2000', quality_mode='rates', quality_layers=[layers], codeblock_size=(8, 8))
-        jpeg2000_file_size = get_file_size(jpeg2000_compressed_image)
+        jpeg2000_file_size = getsize(jpeg2000_compressed_image)
         psnr = PSNR(gray_image, np.array(Image.open(jpeg2000_compressed_image)).astype(np.uint8))
         
         psnr_values_jpeg2000.append(psnr)
         file_sizes_jpeg2000.append(jpeg2000_file_size)
 
     # Plot rate-distortion curve
-    plt.figure()
-    plt.plot(file_sizes_task1, psnr_values_task1, 'o-', label='Task 1')
-    plt.plot(file_sizes_jpeg, psnr_values_jpeg, 'o-', label='JPEG')
-    plt.plot(file_sizes_jpeg2000, psnr_values_jpeg2000, 'o-', label='JPEG2000')
-    plt.xlabel('File Size (bytes)')
-    plt.ylabel('PSNR (dB)')
-    plt.title('Rate-Distortion Curve (PSNR vs. File Size)')
-    plt.legend()
-    plt.grid(True)
-    plt.show()
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=file_sizes_task1, y=psnr_values_task1, mode='lines+markers', name='Task 1', text=np.round(quantization_levels, 1)))
+    fig.add_trace(go.Scatter(x=file_sizes_jpeg, y=psnr_values_jpeg, mode='lines+markers', name='JPEG', text=list(quality_range)))
+    fig.add_trace(go.Scatter(x=file_sizes_jpeg2000, y=psnr_values_jpeg2000, mode='lines+markers', name='JPEG2000', text=list(quality_layers_range)))
+    fig.update_layout(title='Rate-Distortion Curve (PSNR vs. File Size)', xaxis_title='File Size (bytes)', yaxis_title='PSNR (dB)')
+    fig.show()
 
     # display_images(original_image, Image.open(jpeg_compressed_image))
     # display_images(original_image, Image.open(jpeg2000_compressed_image))
diff --git a/Task_5_video.py b/Task_5_video.py
@@ -0,0 +1,84 @@
+from moviepy.editor import VideoFileClip
+import os
+from Task_1 import *
+from Task_2 import *
+
+def rate_distortion_curve_task5(input_path, output_path, codec):
+      
+    psnr_values = []
+    bps_values = []
+
+    with VideoFileClip(input_path, audio=False, fps_source='fps') as video_clip:
+
+        frames = [frame[:, :, 0] for frame in video_clip.iter_frames()][30]
+
+        size =  os.path.getsize(input_path)
+        duration = video_clip.duration
+        bitrate = (size * 8 / 1000) / duration
+
+        # numpy array of 2 powers
+        ratios = np.array([2 ** i for i in range(1, 6)])
+        # Calculate the target bitrates based on the compression ratios
+        target_bitrates = bitrate / ratios
+
+        for target_bitrate in target_bitrates:
+
+            print(f"Target bitrate: {target_bitrate} kbps")
+
+            # Set the target bitrate (in kbps)
+            video_clip.write_videofile(output_path, codec=codec, bitrate=f'{target_bitrate}k', logger=None)
+
+            with VideoFileClip(output_path, audio=False, fps_source='fps') as video_clip_compressed:
+
+                compressed_frames = [frame[:, :, 0] for frame in video_clip_compressed.iter_frames()][30]
+
+                psnr_sum = sum([PSNR(frame, compressed_frame) for frame, compressed_frame in zip(frames, compressed_frames)])
+
+                # Calculate average PSNR and BPS
+                psnr = psnr_sum / len(frames)
+                bps = os.path.getsize(output_path) * 8 / duration
+
+            # Store PSNR and BPS values
+            psnr_values.append(round(psnr, 2))
+            bps_values.append(round(bps/ 1000, 2))
+
+    return psnr_values, bps_values, ratios
+
+
+def plot_curves():
+
+    psnr_values, bps_values, quantization_levels = rate_distortion_curve_task2(frames[:30], quantization_matrix, 30, num_blocks_height, num_blocks_width, block_size, decimals)
+    psnr_values_h264, bps_values_h264, ratios_h264 = rate_distortion_curve_task5(input_video_path, output_video_path_h264, 'libx264')
+    psnr_values_h265, bps_values_h265, ratios_h265 = rate_distortion_curve_task5(input_video_path, output_video_path_h265, 'libx265')
+
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=bps_values, y=psnr_values, mode='lines+markers', name='Task 2', text=quantization_levels))
+    fig.add_trace(go.Scatter(x=bps_values_h264, y=psnr_values_h264, mode='lines+markers', name='H.264', text=ratios_h264))
+    fig.add_trace(go.Scatter(x=bps_values_h265, y=psnr_values_h265, mode='lines+markers', name='H.265', text=ratios_h265))
+    fig.update_layout(title='Rate-Distortion Curve', xaxis_title='BitsPerSecond (kbps)', yaxis_title='PSNR (dB)')
+    fig.show()
+
+
+if __name__ == "__main__":
+        
+    input_video_path = 'media/input/foreman_qcif_mono.y4m'
+    output_video_path_h264 = 'media/output/foreman_qcif_mono_h264.mp4'
+    output_video_path_h265 = 'media/output/foreman_qcif_mono_h265.mp4'
+
+    frames, metadata = read_y4m_video(input_video_path)
+    frame_height, frame_width, block_size = int(metadata["H"]), int(metadata["W"]), 8
+    num_blocks_height = frame_height // block_size
+    num_blocks_width = frame_width // block_size
+    decimals = 0
+    quantization_matrix = np.array([[16, 11, 10, 16, 24, 40, 51, 61],
+                                    [12, 12, 14, 19, 26, 58, 60, 55],
+                                    [14, 13, 16, 24, 40, 57, 69, 56],
+                                    [14, 17, 22, 29, 51, 87, 80, 62],
+                                    [18, 22, 37, 56, 68, 109, 103, 77],
+                                    [24, 35, 55, 64, 81, 104, 113, 92],
+                                    [49, 64, 78, 87, 103, 121, 120, 101],
+                                    [72, 92, 95, 98, 112, 100, 103, 99]])
+
+    plot_curves()
+
+