data release note

VQAssessment · Nov 21, 2023 · 29366ef · 29366ef
1 parent 45727ac
commit 29366ef
Show file tree

Hide file tree

Showing 8 changed files with 5,298 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -1,10 +1,11 @@
 # DOVER
 
 Official Code for [ICCV2023] Paper *"Exploring Video Quality Assessment on User Generated Contents from Aesthetic and Technical Perspectives"*. 
-Official Code, Demo, Weights for the [Disentangled Objective Video Quality Evaluator (DOVER)](arxiv.org/abs/2211.04894v3).
+Official Code, Demo, Weights for the [Disentangled Objective Video Quality Evaluator (DOVER)](arxiv.org/abs/2211.04894).
 
+- 21 Nov, 2023: The release note of [DIVIDE database](get_divide_dataset/) is updated. 
 - 1 Aug, 2023: ONNX conversion script for DOVER has been released. Short tip: after installation, run [this](https://github.com/VQAssessment/DOVER/blob/master/convert_to_onnx.py) and then [this](https://github.com/VQAssessment/DOVER/blob/master/onnx_inference.py).
-- 17 Jul, 2023: DOVER has been accepted by ICCV2023. We will release the DIVIDE-3k dataset to train DOVER++ via fully-supervised LVBS soon.
+- 17 Jul, 2023: DOVER has been accepted by ICCV2023.
 - 9 Feb, 2023: **DOVER-Mobile** is available! Evaluate on CPU with Very High Speed!
 - 16 Jan, 2023: Full Training Code Available (include LVBS). See below.
 - 10 Dec, 2022: Now the evaluation tool can directly predict a fused score for any video. See [here](https://github.com/QualityAssessment/DOVER#new-get-the-fused-quality-score-for-use).

diff --git a/divide.yml b/divide.yml
@@ -0,0 +1,91 @@
+name: DOVER
+num_epochs: 20
+l_num_epochs: 10
+warmup_epochs: 2.5
+ema: true
+save_model: true
+batch_size: 16
+num_workers: 8
+split_seed: 42
+
+wandb:
+    project_name: DOVER
+
+data:   
+    val-dividemaxwell:
+        type: ViewDecompositionDataset
+        args:
+            weight: 0.598
+            phase: test
+            anno_file: ./examplar_data_labels/DIVIDE_MaxWell/val_labels.txt
+            data_prefix: ../datasets/DIVIDE_MaxWell/videos/
+            sample_types:
+                technical:
+                    fragments_h: 7
+                    fragments_w: 7
+                    fsize_h: 32
+                    fsize_w: 32
+                    aligned: 32
+                    clip_len: 32
+                    frame_interval: 2
+                    num_clips: 3
+                aesthetic:
+                    size_h: 224
+                    size_w: 224
+                    clip_len: 32
+                    frame_interval: 2
+                    t_frag: 32
+                    num_clips: 1
+    train-dividemaxwell:
+        type: ViewDecompositionDataset
+        args:
+            weight: 0.598
+            phase: train
+            fully_supervised: True
+
+            anno_file: ./examplar_data_labels/DIVIDE_MaxWell/train_labels.txt
+            data_prefix: ../datasets/DIVIDE_MaxWell/videos/
+            sample_types:
+                technical:
+                    fragments_h: 7
+                    fragments_w: 7
+                    fsize_h: 32
+                    fsize_w: 32
+                    aligned: 32
+                    clip_len: 32
+                    frame_interval: 2
+                    num_clips: 1
+                aesthetic:
+                    size_h: 224
+                    size_w: 224
+                    clip_len: 32
+                    frame_interval: 2
+                    t_frag: 32
+                    num_clips: 1
+
+
+model:
+    type: DOVER
+    args:
+        backbone:
+            technical:
+                type: swin_tiny_grpb
+                checkpoint: true
+                pretrained:
+            aesthetic:
+                type: conv_tiny
+        backbone_preserve_keys: technical,aesthetic
+        divide_head: true
+        vqa_head:
+            in_channels: 768
+            hidden_channels: 64
+
+optimizer:
+    lr: !!float 1e-3
+    backbone_lr_mult: !!float 1e-1
+    wd: 0.05
+
+test_load_path: ./pretrained_weights/DOVER.pth
+
+
+
diff --git a/dover/models/conv_backbone.py b/dover/models/conv_backbone.py
@@ -4,8 +4,6 @@
 from timm.models.layers import trunc_normal_, DropPath
 from timm.models.registry import register_model
 
-from open_clip import CLIP3D
-import open_clip
 
 class GRN(nn.Module):
     """ GRN (Global Response Normalization) layer

diff --git a/dover/version.py b/dover/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.5.0"
+__version__ = "1.0.0"
 
 
 def parse_version_info(version_str):