Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@ unzip BBC/videos.zip -d BBC
rm -rf BBC/videos.zip
```

### Evaluation
### AutoShot
Download `AutoShot_test.tar.gz` from [Google drive](https://drive.google.com/file/d/17diRkLlNUUjHDooXdqFUTXYje2-x4Yt6/view?usp=sharing).
```
tar -zxvf AutoShot.tar.gz
rm AutoShot.tar.gz
```

## Evaluation
To evaluate PySceneDetect on a dataset, run the following command:
```
python benchmark.py -d <dataset_name> --detector <detector_name>
Expand All @@ -28,7 +35,8 @@ python evaluate.py -d BBC --detector detect-content

### Result
The performance is computed as recall, precision, f1, and elapsed time.
The following results indicate that ContentDetector achieves the highest performance on the BBC dataset.

#### BBC

| Detector | Recall | Precision | F1 | Elapsed time (second) |
|:-----------------:|:------:|:---------:|:-----:|:---------------------:|
Expand All @@ -38,6 +46,16 @@ The following results indicate that ContentDetector achieves the highest perform
| HistogramDetector | 90.55 | 72.76 | 80.68 | 16.13 |
| ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 |

#### AutoShot

| Detector | Recall | Precision | F1 | Elapsed time (second) |
|:-----------------:|:------:|:---------:|:-----:|:---------------------:|
| AdaptiveDetector | 70.77 | 77.65 | 74.05 | 1.23 |
| ContentDetector | 63.67 | 76.40 | 69.46 | 1.21 |
| HashDetector | 56.66 | 76.35 | 65.05 | 1.16 |
| HistogramDetector | 63.36 | 53.34 | 57.92 | 1.23 |
| ThresholdDetector | 0.75 | 38.64 | 1.47 | 1.24 |

## Citation
### BBC
```
Expand All @@ -47,4 +65,14 @@ The following results indicate that ContentDetector achieves the highest perform
booktitle = {Proceedings of the 23rd ACM International Conference on Multimedia},
year = {2015},
}
```
```

### AutoShot
```
@InProceedings{autoshot_dataset,
author = {Wentao Zhu and Yufang Huang and Xiufeng Xie and Wenxian Liu and Jincan Deng and Debing Zhang and Zhangyang Wang and Ji Liu},
title = {AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary Detection},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
year = {2023},
}
```
30 changes: 30 additions & 0 deletions benchmarks/autoshot_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import glob
import os

class AutoShotDataset:
"""
The AutoShot Dataset (test splits) proposed by Zhu et al. in AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary Detection
Link: https://openaccess.thecvf.com/content/CVPR2023W/NAS/html/Zhu_AutoShot_A_Short_Video_Dataset_and_State-of-the-Art_Shot_Boundary_Detection_CVPRW_2023_paper.html
The original test set consists of 200 videos, but 36 videos are missing (AutoShot/videos/<video_id>.mp4).
The annotated scenes are provided in corresponding files (AutoShot/annotations/<video_id>.txt)
"""

def __init__(self, dataset_dir: str):
self._video_files = [
file for file in sorted(glob.glob(os.path.join(dataset_dir, "videos", "*.mp4")))
]
self._scene_files = [
file for file in sorted(glob.glob(os.path.join(dataset_dir, "annotations", "*.txt")))
]
for video_file, scene_file in zip(self._video_files, self._scene_files):
video_id = os.path.basename(video_file).split(".")[0]
scene_id = os.path.basename(scene_file).split(".")[0]
assert video_id == scene_id

def __getitem__(self, index):
video_file = self._video_files[index]
scene_file = self._scene_files[index]
return video_file, scene_file

def __len__(self):
return len(self._video_files)
2 changes: 1 addition & 1 deletion benchmarks/bbc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(self, dataset_dir: str):
assert len(self._video_files) == len(self._scene_files)
for video_file, scene_file in zip(self._video_files, self._scene_files):
video_id = os.path.basename(video_file).replace("bbc_", "").split(".")[0]
scene_id = os.path.basename(scene_file).split("_")[0]
scene_id = os.path.basename(scene_file).split("-")[0]
assert video_id == scene_id

def __getitem__(self, index):
Expand Down
26 changes: 23 additions & 3 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import time

from bbc_dataset import BBCDataset
from autoshot_dataset import AutoShotDataset

from evaluator import Evaluator
from tqdm import tqdm

Expand All @@ -15,7 +17,7 @@
)


def make_detector(detector_name: str):
def _make_detector(detector_name: str):
detector_map = {
"detect-adaptive": AdaptiveDetector(),
"detect-content": ContentDetector(),
Expand All @@ -26,11 +28,19 @@ def make_detector(detector_name: str):
return detector_map[detector_name]


def _make_dataset(dataset_name: str):
dataset_map = {
"BBC": BBCDataset("BBC"),
"AutoShot": AutoShotDataset("AutoShot"),
}
return dataset_map[dataset_name]


def _detect_scenes(detector_type: str, dataset):
pred_scenes = {}
for video_file, scene_file in tqdm(dataset):
start = time.time()
detector = make_detector(detector_type)
detector = _make_detector(detector_type)
pred_scene_list = detect(video_file, detector)
elapsed = time.time() - start
scenes = {
Expand All @@ -53,7 +63,7 @@ def _detect_scenes(detector_type: str, dataset):


def main(args):
pred_scenes = _detect_scenes(detector_type=args.detector, dataset=BBCDataset("BBC"))
pred_scenes = _detect_scenes(detector_type=args.detector, dataset=_make_dataset(args.dataset))
result = Evaluator().evaluate_performance(pred_scenes)
print("Overall Results:")
print(
Expand All @@ -65,6 +75,16 @@ def main(args):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Benchmarking PySceneDetect performance.")
parser.add_argument(
"--dataset",
type=str,
choices=[
"BBC",
"AutoShot",
],
default="BBC",
help="Dataset name. Supported datasets are BBC and AutoShot.",
)
parser.add_argument(
"--detector",
type=str,
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ def evaluate_performance(self, pred_scenes):
total_pred += len(pred_list)
total_gt += len(gt_scene_list)

assert total_pred, pred_scenes
recall = total_correct / total_gt
precision = total_correct / total_pred
precision = total_correct / total_pred if total_pred != 0 else 0
f1 = 2 * recall * precision / (recall + precision) if (recall + precision) != 0 else 0
avg_elapsed = mean([x["elapsed"] for x in pred_scenes.values()])
result = {
Expand Down