gfxdisp · MalihaAshraf · Dec 31, 2022 · Dec 31, 2022 · Jan 8, 2023 · Jan 8, 2023
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -1,3 +1,8 @@
+# v0.3 (05/09/2023)
+* Added value range checks when the metric is running on HDR data (to avoid passing relative values)
+* Added SSIM as an alternative metric
+* Better handling of paths to configuration files
+
 # v0.2
 * Updated ColourVideoVDP model with cross-channel masking and more advanced pooling, different calibration and better prediction accuracy.
 * Added distograms

diff --git a/README.md b/README.md
diff --git a/example_media/structure/ferris-test-wb.mp4 b/example_media/structure/ferris-test-wb.mp4
diff --git a/generate_timings.py b/generate_timings.py
@@ -0,0 +1,97 @@
+import numpy as np
+import pandas as pd
+import pycvvdp
+from time import time
+import torch
+from tqdm import tqdm, trange
+import logging
+
+class DummyVS:
+    def __init__(self, num_frames, height, width, fps=30, device='cuda'):
+        self.n = num_frames
+        self.h = height
+        self.w = width
+        self.fps = fps
+        self.frame = torch.randn(1,3,1,height,width, device=device)
+
+    def __len__(self):
+        return self.n
+
+    def get_video_size(self):
+        return self.h, self.w, self.n
+
+    def get_frames_per_second(self):
+        return self.fps
+
+    def get_reference_frame(self, i, device=None, colorspace=None):
+        return self.frame[:,:1] if colorspace == 'Y' else self.frame
+
+    def get_test_frame(self, i, device=None, colorspace=None):
+        return self.get_reference_frame(i, device, colorspace)
+
+    def to(self, device):
+        self.frame = self.frame.to(device)
+
+debug = True
+metrics = ['PU21-VMAF', 'cvvdp', 'fvvdp']
+device = torch.device('cuda')
+dims = ((720, 1280), (1080, 1920), (1440, 2560), (2160, 3840))
+# h, w = 1080, 1920
+# n = (30, 60, 120, 240, 480)
+num_frames = 50
+num_samples = 5
+
+# VMAF paths set for Param's PC
+ffmpeg_path = '../vmaf/ffmpeg-6.0-amd64-static/ffmpeg'
+vmaf_cache = '/local/scratch/pmh64/tmp'
+
+logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.DEBUG if debug else logging.INFO)
+
+timings = []
+# for num_frames in tqdm(n):
+for h, w in tqdm(dims):
+    vs = DummyVS(num_frames, h, w, device=device)
+    pbar = tqdm(metrics, leave=False)
+    for metric_type in pbar:
+        pbar.set_description(f'N={num_frames}, h={h}, w={w}, metric={metric_type}')
+
+        if metric_type == 'cvvdp':
+            metric = pycvvdp.cvvdp(quiet=True, device=device, temp_padding='replicate', heatmap=None)
+            if debug:
+                metric.debug = True
+        elif metric_type == 'fvvdp':
+            from pyfvvdp import fvvdp
+            # Add argument "colorspace='Y'" while retriving frames to run on images
+            # Lines 233 and 244 in commit 5bf67f92341604d238ebe72fdeeb4ad825db5485
+            metric = fvvdp(quiet=True, device=device, temp_padding='replicate', heatmap=None)
+        elif metric_type == 'PSNR-RGB':
+            metric = pycvvdp.psnr_rgb(device=device)
+        elif metric_type == 'FLIP':
+            metric = pycvvdp.flip(device=device)
+        elif metric_type == 'PU21-VMAF':
+            metric = pycvvdp.pu_vmaf(ffmpeg_path, vmaf_cache, device=device)
+        else:
+            raise RuntimeError( f'Unknown metric {metric_type}' )
+
+        with torch.no_grad():
+            metric.predict_video_source(vs)     # dummy run
+
+            times = []
+            for _ in trange(num_samples, leave=False):
+                if metric_type == 'PU21-VMAF':
+                    times.append(metric.predict_video_source(vs, record_time=True))
+                else:
+                    start = time()
+                    metric.predict_video_source(vs)
+                    times.append(time() - start)
+
+        timings.append({'name': metric_type,
+                        'num_samples': num_samples,
+                        'num_frames': num_frames,
+                        'height': h,
+                        'width': w,
+                        'time_mean': np.mean(times),
+                        'time_std': np.std(times)})
+
+df = pd.DataFrame(timings)
+df.to_csv('timings.csv', index=False)
diff --git a/pycvvdp/__init__.py b/pycvvdp/__init__.py
@@ -1,6 +1,15 @@
-from pycvvdp.cvvdp_metric import cvvdp
+from pycvvdp.cvvdp_metric import cvvdp, cvvdp_image
 from pycvvdp.cvvdp_nn_metric import cvvdp_nn
 from pycvvdp.pupsnr import pu_psnr_y, pu_psnr_rgb2020, psnr_rgb
+from pycvvdp.e_itp import e_itp
+from pycvvdp.e_sitp import e_sitp
+from pycvvdp.de2000 import de2000
+from pycvvdp.de2000_spatial import s_de2000
+from pycvvdp.flip import flip
+from pycvvdp.pu_lpips import pu_lpips
+from pycvvdp.dolby_ictcp import ictcp
+from pycvvdp.pu_vmaf import pu_vmaf
+from pycvvdp.hyab import hyab
 from pycvvdp.video_source_file import video_source_file, video_source_video_file, load_image_as_array
 from pycvvdp.display_model import vvdp_display_photometry, vvdp_display_photo_eotf, vvdp_display_geometry
 from pycvvdp.video_source_yuv import video_source_yuv_file
diff --git a/pycvvdp/cvvdp_metric.py b/pycvvdp/cvvdp_metric.py
@@ -838,4 +838,40 @@ def export_distogram(self, stats, fname, jod_max=None, base_size=6):
         # plt.waitforbuttonpress()        
 
 
+class cvvdp_image(vq_metric):
+    def __init__(self, display_name="standard_4k", display_photometry=None, display_geometry=None, config_paths=[], heatmap=None, quiet=False, device=None, temp_padding="replicate", use_checkpoints=False, calibrated_ckpt=None):
+        # Use GPU if available
+        if device is None:
+            if torch.cuda.is_available() and torch.cuda.device_count()>0:
+                self.device = torch.device('cuda:0')
+            else:
+                self.device = torch.device('cpu')
+        else:
+            self.device = device
 
+        # Create a dummy display photometry object that does not change input frame
+        self.cvvdp_metric = cvvdp(display_name, None, None, config_paths, heatmap, quiet, self.device, temp_padding, use_checkpoints, calibrated_ckpt)
+
+    def set_display_model(self, display_name="standard_4k", display_photometry=None, display_geometry=None, config_paths=[]):
+        self.linear_dm = vvdp_display_photo_eotf(display_photometry.Y_peak, contrast=display_photometry.contrast, source_colorspace='BT.2020-linear', E_ambient=display_photometry.E_ambient, k_refl=display_photometry.k_refl)
+        self.cvvdp_metric.set_display_model(display_photometry=self.linear_dm, display_geometry=display_geometry)
+
+    def predict_video_source(self, vid_source):
+
+        _, _, N_frames = vid_source.get_video_size()
+
+        avg = 0
+        for ff in range(N_frames):
+            T = vid_source.get_test_frame(ff, device=self.device, colorspace='RGB2020')
+            R = vid_source.get_reference_frame(ff, device=self.device, colorspace='RGB2020')
+            test_vs = video_source_array( T, R, fps=0, display_photometry=self.linear_dm )
+            Q, _ = self.cvvdp_metric.predict_video_source(test_vs)
+            avg += Q / N_frames
+
+        return avg, None
+
+    def short_name(self):
+        return "cvvdp-image"
+
+    def quality_unit(self):
+        return "JOD"
diff --git a/pycvvdp/cvvdp_nn_metric.py b/pycvvdp/cvvdp_nn_metric.py
@@ -105,7 +105,7 @@ def do_pooling_and_jods(self, Q_per_ch, base_rho_band, fps):
         # Q_per_ch[channel,frame,sp_band]
         feat_in = Q_per_ch.permute(1, 0, 2).flatten(start_dim=1)
         feat_intermediate, _ = self.pooling_net[0](feat_in)
-        feat_intermediate = torch.cat((feat_intermediate[-1], base_rho_band.unsqueeze(0)))
+        feat_intermediate = torch.cat((feat_intermediate[-1], torch.as_tensor(base_rho_band, device=self.device, dtype=torch.float32).unsqueeze(0)))
         Q = self.pooling_net[1](feat_intermediate).squeeze() * 10
         return Q
 

diff --git a/pycvvdp/de2000.py b/pycvvdp/de2000.py
@@ -0,0 +1,92 @@
+import torch
+import numpy as np
+
+from pycvvdp.utils import deltaE00
+from pycvvdp.vq_metric import vq_metric
+
+"""
+DE2000 metric. Usage is same as the FovVideoVDP metric (see pytorch_examples).
+"""
+class de2000(vq_metric):
+    def __init__(self, device=None,display_name=None,display_geometry=None,display_photometry=None):
+        # Use GPU if available
+        if device is None:
+            if torch.cuda.is_available() and torch.cuda.device_count()>0:
+                self.device = torch.device('cuda:0')
+            else:
+                self.device = torch.device('cpu')
+        else:
+            self.device = device
+
+        # D65 White point
+        self.w = (0.9505, 1.0000, 1.0888)
+        self.colorspace = 'XYZ'       
+
+    '''
+    The same as `predict` but takes as input fvvdp_video_source_* object instead of Numpy/Pytorch arrays.
+    '''
+    def predict_video_source(self, vid_source, frame_padding="replicate"):
+
+        # T_vid and R_vid are the tensors of the size (1,3,N,H,W)
+        # where:
+        # N - the number of frames
+        # H - height in pixels
+        # W - width in pixels
+        # Both images must contain linear absolute luminance values in cd/m^2
+        # 
+        # We assume the pytorch default NCDHW layout
+
+        _, _, N_frames = vid_source.get_video_size()
+
+        e00 = 0.0
+        for ff in range(N_frames):
+            T = vid_source.get_test_frame(ff, device=self.device, colorspace=self.colorspace)
+            R = vid_source.get_reference_frame(ff, device=self.device, colorspace=self.colorspace)
+
+            # XYZ to Lab
+            w = self.max_L*self.w
+            T_lab = self.xyz_to_lab(T, w)
+            R_lab = self.xyz_to_lab(R, w)
+
+            # Meancdm of Per-pixel DE2000            
+            e00 = e00 + self.e00_fn(T_lab, R_lab) / N_frames
+        return e00, None
+
+    def xyz_to_lab(self, img, W):
+        Lab = torch.empty_like(img)
+        Lab[...,0:,:,:] = 116*self.lab_fn(img[...,1,:,:,:]/W[1])-16
+        Lab[...,1:,:,:] = 500*(self.lab_fn(img[...,0,:,:,:]/W[0]) - self.lab_fn(img[...,1,:,:,:]/W[1]))
+        Lab[...,2:,:,:] = 200*(self.lab_fn(img[...,1,:,:,:]/W[1]) - self.lab_fn(img[...,2,:,:,:]/W[2]))
+        return Lab
+
+    def lab_fn(self, x):
+        y = torch.empty_like(x)
+        sigma = (6/29)
+        y_1 = x**(1/3)
+        y_2 = (x/(3*(sigma**2)))+(4/29)
+        condition = torch.less(x, sigma**3)
+        y = torch.where(condition, y_2, y_1)
+        return y
+
+    def e00_fn(self, img1, img2):
+        sz = torch.numel(img1[...,0,:,:,:])
+        img1_row = torch.cat((torch.reshape(img1[...,0,:,:,:], (1,sz)), torch.reshape(img1[...,1,:,:,:], (1,sz)), torch.reshape(img1[...,2,:,:,:], (1,sz))), 0)
+        img2_row = torch.cat((torch.reshape(img2[...,0,:,:,:], (1,sz)), torch.reshape(img2[...,1,:,:,:], (1,sz)), torch.reshape(img2[...,2,:,:,:], (1,sz))), 0)
+        e00 = deltaE00(img1_row, img2_row)
+        # e00_mean = torch.empty_like(torch.reshape(img1[...,0,:,:,:], (1,sz)))
+        # e00_mean = torch.mean(torch.from_numpy(e00).to(e00_mean))
+        e00_mean = torch.mean(e00)
+        return e00_mean
+
+    def short_name(self):
+        return "DE-2000"
+
+    def quality_unit(self):
+        return "Delta E2000"
+
+    def get_info_string(self):
+        return None
+
+    def set_display_model(self, display_photometry, display_geometry):
+        self.max_L = display_photometry.get_peak_luminance()
+        self.max_L = np.where( self.max_L < 300, self.max_L, 300)
diff --git a/pycvvdp/de2000_spatial.py b/pycvvdp/de2000_spatial.py
@@ -0,0 +1,125 @@
+import torch
+import numpy as np
+
+from pycvvdp.utils import SCIELAB_filter, deltaE00
+from pycvvdp.vq_metric import vq_metric
+
+"""
+Spatial DE2000 metric. Usage is same as the FovVideoVDP metric (see pytorch_examples).
+"""
+class s_de2000(vq_metric):
+    def __init__(self, device=None,display_name=None,display_geometry=None,display_photometry=None):
+        # Use GPU if available
+        if device is None:
+            if torch.cuda.is_available() and torch.cuda.device_count()>0:
+                self.device = torch.device('cuda:0')
+            else:
+                self.device = torch.device('cpu')
+        else:
+            self.device = device
+
+        self.slab = SCIELAB_filter(device=device)
+        # D65 White point
+        self.w = (0.9505, 1.0000, 1.0888)
+        self.colorspace = 'XYZ'       
+
+    '''
+    The same as `predict` but takes as input fvvdp_video_source_* object instead of Numpy/Pytorch arrays.
+    '''
+    def predict_video_source(self, vid_source, frame_padding="replicate"):
+
+        # T_vid and R_vid are the tensors of the size (1,3,N,H,W)
+        # where:
+        # N - the number of frames
+        # H - height in pixels
+        # W - width in pixels
+        # Both images must contain linear absolute luminance values in cd/m^2
+        # 
+        # We assume the pytorch default NCDHW layout
+
+        _, _, N_frames = vid_source.get_video_size()
+
+        e_s00 = 0.0
+        for ff in range(N_frames):
+            T = vid_source.get_test_frame(ff, device=self.device, colorspace=self.colorspace)
+            R = vid_source.get_reference_frame(ff, device=self.device, colorspace=self.colorspace)
+
+            # XYZ to Opp
+            T_opp = self.slab.xyz_to_opp(T)
+            R_opp = self.slab.xyz_to_opp(R)
+
+            # Spatially filtered opponent colour images
+            T_s_opp = self.opp_to_sopp(T_opp, self.ppd)
+            R_s_opp = self.opp_to_sopp(R_opp, self.ppd)
+
+            # S-OPP to S-XYZ
+            T_s_xyz = self.slab.opp_to_xyz(T_s_opp)
+            R_s_xyz = self.slab.opp_to_xyz(R_s_opp)
+
+            # S-XYZ to S-Lab
+            w = self.max_L*self.w
+            T_s_lab = self.xyz_to_lab(T_s_xyz, w)
+            R_s_lab = self.xyz_to_lab(R_s_xyz, w)
+
+            # Meancdm of Per-pixel DE2000            
+            e_s00 = e_s00 + self.e00_fn(T_s_lab, R_s_lab) / N_frames
+        return e_s00, None
+
+    def opp_to_sopp(self, img, ppd):
+        S_OPP = torch.empty_like(img)
+        ## Filters are low-dimensional; construct using np
+        #[k1, k2, k3] = [torch.as_tensor(filter).to(img) for filter in self.slab.separableFilters_torch(ppd)]
+        #S_OPP[...,0:,:,:] = self.slab.separableConv_torch(torch.squeeze(img[...,0,:,:,:]), k1, torch.abs(k1))
+        #S_OPP[...,1:,:,:] = self.slab.separableConv_torch(torch.squeeze(img[...,1,:,:,:]), k2, torch.abs(k2))
+        #S_OPP[...,2:,:,:] = self.slab.separableConv_torch(torch.squeeze(img[...,2,:,:,:]), k3, torch.abs(k3))
+
+        # Simpler SCIELAB filters implementation
+        [k1, k2, k3] = self.slab.generateSCIELABfiltersParams(ppd)
+        # Limit filter width to 1-degree visual angle, and odd number of sampling points
+        # (so that the gaussians generated from Rick's gauss routine are symmetric).
+        width = int(np.ceil(ppd / 2) * 2 - 1)
+        S_OPP[...,0:,:,:] = self.slab.applyGaussFilter(img[...,0,:,:,:], width, k1)
+        S_OPP[...,1:,:,:] = self.slab.applyGaussFilter(img[...,1,:,:,:], width, k2)
+        S_OPP[...,2:,:,:] = self.slab.applyGaussFilter(img[...,2,:,:,:], width, k3)
+
+        return S_OPP
+
+    def xyz_to_lab(self, img, W):
+        Lab = torch.empty_like(img)
+        Lab[...,0:,:,:] = 116*self.lab_fn(img[...,1,:,:,:]/W[1])-16
+        Lab[...,1:,:,:] = 500*(self.lab_fn(img[...,0,:,:,:]/W[0]) - self.lab_fn(img[...,1,:,:,:]/W[1]))
+        Lab[...,2:,:,:] = 200*(self.lab_fn(img[...,1,:,:,:]/W[1]) - self.lab_fn(img[...,2,:,:,:]/W[2]))
+        return Lab
+
+    def lab_fn(self, x):
+        # y = torch.empty_like(x)
+        sigma = (6/29)
+        y_1 = x**(1/3)
+        y_2 = (x/(3*(sigma**2)))+(4/29)
+        condition = torch.less(x, sigma**3)
+        y = torch.where(condition, y_2, y_1)
+        return y
+
+    def e00_fn(self, img1, img2):
+        sz = torch.numel(img1[...,0,:,:,:])
+        img1_row = torch.cat((torch.reshape(img1[...,0,:,:,:], (1,sz)), torch.reshape(img1[...,1,:,:,:], (1,sz)), torch.reshape(img1[...,2,:,:,:], (1,sz))), 0)
+        img2_row = torch.cat((torch.reshape(img2[...,0,:,:,:], (1,sz)), torch.reshape(img2[...,1,:,:,:], (1,sz)), torch.reshape(img2[...,2,:,:,:], (1,sz))), 0)
+        e00 = deltaE00(img1_row, img2_row)
+        # e00_mean = torch.empty_like(torch.reshape(img1[...,0,:,:,:], (1,sz)))
+        # e00_mean = torch.mean(torch.from_numpy(e00).to(e00_mean))
+        e00_mean = torch.mean(e00)
+        return e00_mean
+
+    def short_name(self):
+        return "S-DE-2000"
+
+    def quality_unit(self):
+        return "Delta E2000"
+
+    def get_info_string(self):
+        return None
+
+    def set_display_model(self, display_photometry, display_geometry):
+        self.ppd = display_geometry.get_ppd()
+        self.max_L = display_photometry.get_peak_luminance()
+        self.max_L = np.where( self.max_L < 300, self.max_L, 300)