Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
e14f926
E_ITP metric
MalihaAshraf Dec 31, 2022
c996e26
E_ITP metric
MalihaAshraf Dec 31, 2022
f0cfcf9
ITP metric updated
MalihaAshraf Jan 8, 2023
cbab380
E_ITP metric merged
MalihaAshraf Jan 8, 2023
8db4833
E_ITP metric
MalihaAshraf Dec 31, 2022
e8eb047
Fixed a few issues with training
mantiuk Jan 5, 2023
ce5f4b0
Reduced mem requirements
mantiuk Jan 5, 2023
2042f1c
Updated default params
mantiuk Jan 6, 2023
0651919
Fixed optimization of betas
mantiuk Jan 7, 2023
977ec1e
Code cleanup
mantiuk Jan 7, 2023
388a1aa
Fixed bugs with the temporal filters
mantiuk Jan 7, 2023
e099c59
Some bug fixes for retaining gradients; removed update device
Jan 8, 2023
0acd9e6
Created a super-class for all metrics
mantiuk Jan 8, 2023
a935924
increased memory usage estimate when training
mantiuk Jan 8, 2023
e7e4923
Fixed
mantiuk Jan 8, 2023
aaa824e
ITP metric updated
MalihaAshraf Jan 8, 2023
1734712
Updated mem requirements
mantiuk Jan 8, 2023
5a573d0
Fixed issue with stack that failed when params were not tensors
mantiuk Jan 8, 2023
323920c
E_ITP tested with example video
MalihaAshraf Jan 9, 2023
4e4f8a1
Merge conflict resolved
MalihaAshraf Jan 9, 2023
dee58b8
SCIELab implementation added
MalihaAshraf Jan 9, 2023
87bc480
Delta E2000 code added
MalihaAshraf Jan 9, 2023
b66304a
DE2000 without spatial filtering added
MalihaAshraf Jan 9, 2023
1453570
Spatial DE2000 implemented and tested
MalihaAshraf Jan 9, 2023
e86d43c
Fixed typo and removed redundant function
MalihaAshraf Jan 9, 2023
fc3f2dc
Added HDR-FLIP and PU21-LPIPS
Jan 10, 2023
0d62457
Added names to flip and lpips
Jan 10, 2023
b129ca1
Fixed LPIPS
Jan 10, 2023
f3d7a3a
Added Dolby ICTCP; TODO check implementation
Jan 10, 2023
f9f1eda
Updated units
MalihaAshraf Jan 10, 2023
7254527
White point for de2000
MalihaAshraf Jan 10, 2023
e9d068d
Fixed some metrics to run on GPU
Jan 11, 2023
ee8fd75
separableFilters_torch function
MalihaAshraf Jan 11, 2023
f12799b
Fixed dolbyitp metric. We can use this as the E_ITP metric
MalihaAshraf Jan 13, 2023
7f31f07
Separable convolution converted to pytorch
MalihaAshraf Jan 13, 2023
8dc76a2
Fixed gradient propagation for mask_q
paramhanji Jan 13, 2023
f2abe0c
Fixed pytorch separable filter; added vmaf
paramhanji Jan 14, 2023
d53cb07
Updated luminance clipping for some metrics
Feb 1, 2023
089f6b4
Added cvvdp with RNN pooling
paramhanji Feb 14, 2023
79baab6
Added MLP masking model
paramhanji Feb 17, 2023
fee6cfd
Minor fixes to run other metrics
Feb 17, 2023
c1d312a
Minor changes to run metrics
Mar 4, 2023
9ac4d43
PSNR MSE function added
MalihaAshraf Mar 7, 2023
b573e28
Function definition for psnr mse
Mar 8, 2023
a76bdcf
Updated spatial E-ITP
MalihaAshraf Mar 21, 2023
b5ed8a6
S-DE-2000 pytorch implementation
MalihaAshraf Apr 5, 2023
4832d6c
File renamed to work with latest metric_calibration
Apr 17, 2023
44a78b0
Ported ffmpeg fix for reading long videos
paramhanji Apr 17, 2023
3f37ae6
Introduced batching and fixed device handling for flip
paramhanji Apr 17, 2023
4215c53
Fixed vmaf
paramhanji Apr 22, 2023
1723284
fix conflicts with main branch
MalihaAshraf Apr 25, 2023
e37667c
Merge remote-tracking branch 'origin/main' into comparison-metrics
MalihaAshraf Apr 28, 2023
6cab571
Merged origin/main
MalihaAshraf May 1, 2023
7a143fd
Merge remote-tracking branch 'origin/main' into comparison-metrics
MalihaAshraf May 4, 2023
9af03e3
Update pupsnr.py
MalihaAshraf May 15, 2023
416d315
HyAB metric added
MalihaAshraf May 18, 2023
d14c86d
Updated vmaf
paramhanji May 18, 2023
576994e
Video encoding class added for STRRED
MalihaAshraf May 19, 2023
ceb439e
Added script to time metrics
paramhanji May 21, 2023
2401e46
Added vmaf to timing script
paramhanji May 23, 2023
b1c4fe4
Updated timing script
paramhanji May 23, 2023
364ed90
Merge branch 'main' into comparison-metrics
paramhanji Aug 1, 2023
a2092ad
Fixed pupsnr to ruhandle "config_paths"
paramhanji Aug 2, 2023
12848d8
Added support for different fps for reference and test videos
paramhanji Aug 14, 2023
80d5d28
Merge branch 'main' into comparison-metrics
paramhanji Aug 14, 2023
f42319f
Merge branch 'main' into comparison-metrics
Aug 25, 2023
c2d1b86
Merge branch 'main' into comparison-metrics
Aug 28, 2023
74665c1
Added cvvdp-image
paramhanji Aug 29, 2023
2518198
Fixed LPIPS colorspace
Sep 25, 2023
d5618ed
Updated files for v0.3
mantiuk Sep 5, 2023
a9781be
Improved README.md
mantiuk Sep 5, 2023
afe6860
Fixed typo
mantiuk Sep 19, 2023
5b0f623
Updated packages to fix vulnerability issues
mantiuk Sep 19, 2023
0911dd5
Recoded white-balance example with hevc
paramhanji Oct 2, 2023
a98b34c
Minor change to cvvdp_nn
paramhanji Oct 13, 2023
558d51e
Different fps for test and reference for YUV files
paramhanji Oct 15, 2023
497f6ac
Use reference video to check number of frames
Nov 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# v0.3 (05/09/2023)
* Added value range checks when the metric is running on HDR data (to avoid passing relative values)
* Added SSIM as an alternative metric
* Better handling of paths to configuration files

# v0.2
* Updated ColourVideoVDP model with cross-channel masking and more advanced pooling, different calibration and better prediction accuracy.
* Added distograms
Expand Down
81 changes: 39 additions & 42 deletions README.md

Large diffs are not rendered by default.

Binary file modified example_media/structure/ferris-test-wb.mp4
Binary file not shown.
97 changes: 97 additions & 0 deletions generate_timings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
import pandas as pd
import pycvvdp
from time import time
import torch
from tqdm import tqdm, trange
import logging

class DummyVS:
def __init__(self, num_frames, height, width, fps=30, device='cuda'):
self.n = num_frames
self.h = height
self.w = width
self.fps = fps
self.frame = torch.randn(1,3,1,height,width, device=device)

def __len__(self):
return self.n

def get_video_size(self):
return self.h, self.w, self.n

def get_frames_per_second(self):
return self.fps

def get_reference_frame(self, i, device=None, colorspace=None):
return self.frame[:,:1] if colorspace == 'Y' else self.frame

def get_test_frame(self, i, device=None, colorspace=None):
return self.get_reference_frame(i, device, colorspace)

def to(self, device):
self.frame = self.frame.to(device)

debug = True
metrics = ['PU21-VMAF', 'cvvdp', 'fvvdp']
device = torch.device('cuda')
dims = ((720, 1280), (1080, 1920), (1440, 2560), (2160, 3840))
# h, w = 1080, 1920
# n = (30, 60, 120, 240, 480)
num_frames = 50
num_samples = 5

# VMAF paths set for Param's PC
ffmpeg_path = '../vmaf/ffmpeg-6.0-amd64-static/ffmpeg'
vmaf_cache = '/local/scratch/pmh64/tmp'

logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.DEBUG if debug else logging.INFO)

timings = []
# for num_frames in tqdm(n):
for h, w in tqdm(dims):
vs = DummyVS(num_frames, h, w, device=device)
pbar = tqdm(metrics, leave=False)
for metric_type in pbar:
pbar.set_description(f'N={num_frames}, h={h}, w={w}, metric={metric_type}')

if metric_type == 'cvvdp':
metric = pycvvdp.cvvdp(quiet=True, device=device, temp_padding='replicate', heatmap=None)
if debug:
metric.debug = True
elif metric_type == 'fvvdp':
from pyfvvdp import fvvdp
# Add argument "colorspace='Y'" while retriving frames to run on images
# Lines 233 and 244 in commit 5bf67f92341604d238ebe72fdeeb4ad825db5485
metric = fvvdp(quiet=True, device=device, temp_padding='replicate', heatmap=None)
elif metric_type == 'PSNR-RGB':
metric = pycvvdp.psnr_rgb(device=device)
elif metric_type == 'FLIP':
metric = pycvvdp.flip(device=device)
elif metric_type == 'PU21-VMAF':
metric = pycvvdp.pu_vmaf(ffmpeg_path, vmaf_cache, device=device)
else:
raise RuntimeError( f'Unknown metric {metric_type}' )

with torch.no_grad():
metric.predict_video_source(vs) # dummy run

times = []
for _ in trange(num_samples, leave=False):
if metric_type == 'PU21-VMAF':
times.append(metric.predict_video_source(vs, record_time=True))
else:
start = time()
metric.predict_video_source(vs)
times.append(time() - start)

timings.append({'name': metric_type,
'num_samples': num_samples,
'num_frames': num_frames,
'height': h,
'width': w,
'time_mean': np.mean(times),
'time_std': np.std(times)})

df = pd.DataFrame(timings)
df.to_csv('timings.csv', index=False)
11 changes: 10 additions & 1 deletion pycvvdp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from pycvvdp.cvvdp_metric import cvvdp
from pycvvdp.cvvdp_metric import cvvdp, cvvdp_image
from pycvvdp.cvvdp_nn_metric import cvvdp_nn
from pycvvdp.pupsnr import pu_psnr_y, pu_psnr_rgb2020, psnr_rgb
from pycvvdp.e_itp import e_itp
from pycvvdp.e_sitp import e_sitp
from pycvvdp.de2000 import de2000
from pycvvdp.de2000_spatial import s_de2000
from pycvvdp.flip import flip
from pycvvdp.pu_lpips import pu_lpips
from pycvvdp.dolby_ictcp import ictcp
from pycvvdp.pu_vmaf import pu_vmaf
from pycvvdp.hyab import hyab
from pycvvdp.video_source_file import video_source_file, video_source_video_file, load_image_as_array
from pycvvdp.display_model import vvdp_display_photometry, vvdp_display_photo_eotf, vvdp_display_geometry
from pycvvdp.video_source_yuv import video_source_yuv_file
36 changes: 36 additions & 0 deletions pycvvdp/cvvdp_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,4 +838,40 @@ def export_distogram(self, stats, fname, jod_max=None, base_size=6):
# plt.waitforbuttonpress()


class cvvdp_image(vq_metric):
def __init__(self, display_name="standard_4k", display_photometry=None, display_geometry=None, config_paths=[], heatmap=None, quiet=False, device=None, temp_padding="replicate", use_checkpoints=False, calibrated_ckpt=None):
# Use GPU if available
if device is None:
if torch.cuda.is_available() and torch.cuda.device_count()>0:
self.device = torch.device('cuda:0')
else:
self.device = torch.device('cpu')
else:
self.device = device

# Create a dummy display photometry object that does not change input frame
self.cvvdp_metric = cvvdp(display_name, None, None, config_paths, heatmap, quiet, self.device, temp_padding, use_checkpoints, calibrated_ckpt)

def set_display_model(self, display_name="standard_4k", display_photometry=None, display_geometry=None, config_paths=[]):
self.linear_dm = vvdp_display_photo_eotf(display_photometry.Y_peak, contrast=display_photometry.contrast, source_colorspace='BT.2020-linear', E_ambient=display_photometry.E_ambient, k_refl=display_photometry.k_refl)
self.cvvdp_metric.set_display_model(display_photometry=self.linear_dm, display_geometry=display_geometry)

def predict_video_source(self, vid_source):

_, _, N_frames = vid_source.get_video_size()

avg = 0
for ff in range(N_frames):
T = vid_source.get_test_frame(ff, device=self.device, colorspace='RGB2020')
R = vid_source.get_reference_frame(ff, device=self.device, colorspace='RGB2020')
test_vs = video_source_array( T, R, fps=0, display_photometry=self.linear_dm )
Q, _ = self.cvvdp_metric.predict_video_source(test_vs)
avg += Q / N_frames

return avg, None

def short_name(self):
return "cvvdp-image"

def quality_unit(self):
return "JOD"
2 changes: 1 addition & 1 deletion pycvvdp/cvvdp_nn_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def do_pooling_and_jods(self, Q_per_ch, base_rho_band, fps):
# Q_per_ch[channel,frame,sp_band]
feat_in = Q_per_ch.permute(1, 0, 2).flatten(start_dim=1)
feat_intermediate, _ = self.pooling_net[0](feat_in)
feat_intermediate = torch.cat((feat_intermediate[-1], base_rho_band.unsqueeze(0)))
feat_intermediate = torch.cat((feat_intermediate[-1], torch.as_tensor(base_rho_band, device=self.device, dtype=torch.float32).unsqueeze(0)))
Q = self.pooling_net[1](feat_intermediate).squeeze() * 10
return Q

Expand Down
92 changes: 92 additions & 0 deletions pycvvdp/de2000.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import torch
import numpy as np

from pycvvdp.utils import deltaE00
from pycvvdp.vq_metric import vq_metric

"""
DE2000 metric. Usage is same as the FovVideoVDP metric (see pytorch_examples).
"""
class de2000(vq_metric):
def __init__(self, device=None,display_name=None,display_geometry=None,display_photometry=None):
# Use GPU if available
if device is None:
if torch.cuda.is_available() and torch.cuda.device_count()>0:
self.device = torch.device('cuda:0')
else:
self.device = torch.device('cpu')
else:
self.device = device

# D65 White point
self.w = (0.9505, 1.0000, 1.0888)
self.colorspace = 'XYZ'

'''
The same as `predict` but takes as input fvvdp_video_source_* object instead of Numpy/Pytorch arrays.
'''
def predict_video_source(self, vid_source, frame_padding="replicate"):

# T_vid and R_vid are the tensors of the size (1,3,N,H,W)
# where:
# N - the number of frames
# H - height in pixels
# W - width in pixels
# Both images must contain linear absolute luminance values in cd/m^2
#
# We assume the pytorch default NCDHW layout

_, _, N_frames = vid_source.get_video_size()

e00 = 0.0
for ff in range(N_frames):
T = vid_source.get_test_frame(ff, device=self.device, colorspace=self.colorspace)
R = vid_source.get_reference_frame(ff, device=self.device, colorspace=self.colorspace)

# XYZ to Lab
w = self.max_L*self.w
T_lab = self.xyz_to_lab(T, w)
R_lab = self.xyz_to_lab(R, w)

# Meancdm of Per-pixel DE2000
e00 = e00 + self.e00_fn(T_lab, R_lab) / N_frames
return e00, None

def xyz_to_lab(self, img, W):
Lab = torch.empty_like(img)
Lab[...,0:,:,:] = 116*self.lab_fn(img[...,1,:,:,:]/W[1])-16
Lab[...,1:,:,:] = 500*(self.lab_fn(img[...,0,:,:,:]/W[0]) - self.lab_fn(img[...,1,:,:,:]/W[1]))
Lab[...,2:,:,:] = 200*(self.lab_fn(img[...,1,:,:,:]/W[1]) - self.lab_fn(img[...,2,:,:,:]/W[2]))
return Lab

def lab_fn(self, x):
y = torch.empty_like(x)
sigma = (6/29)
y_1 = x**(1/3)
y_2 = (x/(3*(sigma**2)))+(4/29)
condition = torch.less(x, sigma**3)
y = torch.where(condition, y_2, y_1)
return y

def e00_fn(self, img1, img2):
sz = torch.numel(img1[...,0,:,:,:])
img1_row = torch.cat((torch.reshape(img1[...,0,:,:,:], (1,sz)), torch.reshape(img1[...,1,:,:,:], (1,sz)), torch.reshape(img1[...,2,:,:,:], (1,sz))), 0)
img2_row = torch.cat((torch.reshape(img2[...,0,:,:,:], (1,sz)), torch.reshape(img2[...,1,:,:,:], (1,sz)), torch.reshape(img2[...,2,:,:,:], (1,sz))), 0)
e00 = deltaE00(img1_row, img2_row)
# e00_mean = torch.empty_like(torch.reshape(img1[...,0,:,:,:], (1,sz)))
# e00_mean = torch.mean(torch.from_numpy(e00).to(e00_mean))
e00_mean = torch.mean(e00)
return e00_mean

def short_name(self):
return "DE-2000"

def quality_unit(self):
return "Delta E2000"

def get_info_string(self):
return None

def set_display_model(self, display_photometry, display_geometry):
self.max_L = display_photometry.get_peak_luminance()
self.max_L = np.where( self.max_L < 300, self.max_L, 300)
125 changes: 125 additions & 0 deletions pycvvdp/de2000_spatial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import torch
import numpy as np

from pycvvdp.utils import SCIELAB_filter, deltaE00
from pycvvdp.vq_metric import vq_metric

"""
Spatial DE2000 metric. Usage is same as the FovVideoVDP metric (see pytorch_examples).
"""
class s_de2000(vq_metric):
def __init__(self, device=None,display_name=None,display_geometry=None,display_photometry=None):
# Use GPU if available
if device is None:
if torch.cuda.is_available() and torch.cuda.device_count()>0:
self.device = torch.device('cuda:0')
else:
self.device = torch.device('cpu')
else:
self.device = device

self.slab = SCIELAB_filter(device=device)
# D65 White point
self.w = (0.9505, 1.0000, 1.0888)
self.colorspace = 'XYZ'

'''
The same as `predict` but takes as input fvvdp_video_source_* object instead of Numpy/Pytorch arrays.
'''
def predict_video_source(self, vid_source, frame_padding="replicate"):

# T_vid and R_vid are the tensors of the size (1,3,N,H,W)
# where:
# N - the number of frames
# H - height in pixels
# W - width in pixels
# Both images must contain linear absolute luminance values in cd/m^2
#
# We assume the pytorch default NCDHW layout

_, _, N_frames = vid_source.get_video_size()

e_s00 = 0.0
for ff in range(N_frames):
T = vid_source.get_test_frame(ff, device=self.device, colorspace=self.colorspace)
R = vid_source.get_reference_frame(ff, device=self.device, colorspace=self.colorspace)

# XYZ to Opp
T_opp = self.slab.xyz_to_opp(T)
R_opp = self.slab.xyz_to_opp(R)

# Spatially filtered opponent colour images
T_s_opp = self.opp_to_sopp(T_opp, self.ppd)
R_s_opp = self.opp_to_sopp(R_opp, self.ppd)

# S-OPP to S-XYZ
T_s_xyz = self.slab.opp_to_xyz(T_s_opp)
R_s_xyz = self.slab.opp_to_xyz(R_s_opp)

# S-XYZ to S-Lab
w = self.max_L*self.w
T_s_lab = self.xyz_to_lab(T_s_xyz, w)
R_s_lab = self.xyz_to_lab(R_s_xyz, w)

# Meancdm of Per-pixel DE2000
e_s00 = e_s00 + self.e00_fn(T_s_lab, R_s_lab) / N_frames
return e_s00, None

def opp_to_sopp(self, img, ppd):
S_OPP = torch.empty_like(img)
## Filters are low-dimensional; construct using np
#[k1, k2, k3] = [torch.as_tensor(filter).to(img) for filter in self.slab.separableFilters_torch(ppd)]
#S_OPP[...,0:,:,:] = self.slab.separableConv_torch(torch.squeeze(img[...,0,:,:,:]), k1, torch.abs(k1))
#S_OPP[...,1:,:,:] = self.slab.separableConv_torch(torch.squeeze(img[...,1,:,:,:]), k2, torch.abs(k2))
#S_OPP[...,2:,:,:] = self.slab.separableConv_torch(torch.squeeze(img[...,2,:,:,:]), k3, torch.abs(k3))

# Simpler SCIELAB filters implementation
[k1, k2, k3] = self.slab.generateSCIELABfiltersParams(ppd)
# Limit filter width to 1-degree visual angle, and odd number of sampling points
# (so that the gaussians generated from Rick's gauss routine are symmetric).
width = int(np.ceil(ppd / 2) * 2 - 1)
S_OPP[...,0:,:,:] = self.slab.applyGaussFilter(img[...,0,:,:,:], width, k1)
S_OPP[...,1:,:,:] = self.slab.applyGaussFilter(img[...,1,:,:,:], width, k2)
S_OPP[...,2:,:,:] = self.slab.applyGaussFilter(img[...,2,:,:,:], width, k3)

return S_OPP

def xyz_to_lab(self, img, W):
Lab = torch.empty_like(img)
Lab[...,0:,:,:] = 116*self.lab_fn(img[...,1,:,:,:]/W[1])-16
Lab[...,1:,:,:] = 500*(self.lab_fn(img[...,0,:,:,:]/W[0]) - self.lab_fn(img[...,1,:,:,:]/W[1]))
Lab[...,2:,:,:] = 200*(self.lab_fn(img[...,1,:,:,:]/W[1]) - self.lab_fn(img[...,2,:,:,:]/W[2]))
return Lab

def lab_fn(self, x):
# y = torch.empty_like(x)
sigma = (6/29)
y_1 = x**(1/3)
y_2 = (x/(3*(sigma**2)))+(4/29)
condition = torch.less(x, sigma**3)
y = torch.where(condition, y_2, y_1)
return y

def e00_fn(self, img1, img2):
sz = torch.numel(img1[...,0,:,:,:])
img1_row = torch.cat((torch.reshape(img1[...,0,:,:,:], (1,sz)), torch.reshape(img1[...,1,:,:,:], (1,sz)), torch.reshape(img1[...,2,:,:,:], (1,sz))), 0)
img2_row = torch.cat((torch.reshape(img2[...,0,:,:,:], (1,sz)), torch.reshape(img2[...,1,:,:,:], (1,sz)), torch.reshape(img2[...,2,:,:,:], (1,sz))), 0)
e00 = deltaE00(img1_row, img2_row)
# e00_mean = torch.empty_like(torch.reshape(img1[...,0,:,:,:], (1,sz)))
# e00_mean = torch.mean(torch.from_numpy(e00).to(e00_mean))
e00_mean = torch.mean(e00)
return e00_mean

def short_name(self):
return "S-DE-2000"

def quality_unit(self):
return "Delta E2000"

def get_info_string(self):
return None

def set_display_model(self, display_photometry, display_geometry):
self.ppd = display_geometry.get_ppd()
self.max_L = display_photometry.get_peak_luminance()
self.max_L = np.where( self.max_L < 300, self.max_L, 300)
Loading