From 15e88a8d177de4836b70f26510c239913008ae59 Mon Sep 17 00:00:00 2001 From: awkrail Date: Wed, 19 Feb 2025 14:03:06 +0900 Subject: [PATCH] benchmark detectors on the AutoShot dataset --- benchmarks/README.md | 34 +++++++++++++++++++++++++++++++--- benchmarks/autoshot_dataset.py | 30 ++++++++++++++++++++++++++++++ benchmarks/bbc_dataset.py | 2 +- benchmarks/benchmark.py | 26 +++++++++++++++++++++++--- benchmarks/evaluator.py | 3 +-- 5 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 benchmarks/autoshot_dataset.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 19a16190..2ad81c08 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -16,7 +16,14 @@ unzip BBC/videos.zip -d BBC rm -rf BBC/videos.zip ``` -### Evaluation +### AutoShot +Download `AutoShot_test.tar.gz` from [Google drive](https://drive.google.com/file/d/17diRkLlNUUjHDooXdqFUTXYje2-x4Yt6/view?usp=sharing). +``` +tar -zxvf AutoShot.tar.gz +rm AutoShot.tar.gz +``` + +## Evaluation To evaluate PySceneDetect on a dataset, run the following command: ``` python benchmark.py -d --detector @@ -28,7 +35,8 @@ python evaluate.py -d BBC --detector detect-content ### Result The performance is computed as recall, precision, f1, and elapsed time. -The following results indicate that ContentDetector achieves the highest performance on the BBC dataset. + +#### BBC | Detector | Recall | Precision | F1 | Elapsed time (second) | |:-----------------:|:------:|:---------:|:-----:|:---------------------:| @@ -38,6 +46,16 @@ The following results indicate that ContentDetector achieves the highest perform | HistogramDetector | 90.55 | 72.76 | 80.68 | 16.13 | | ThresholdDetector | 0.00 | 0.00 | 0.00 | 18.95 | +#### AutoShot + +| Detector | Recall | Precision | F1 | Elapsed time (second) | +|:-----------------:|:------:|:---------:|:-----:|:---------------------:| +| AdaptiveDetector | 70.77 | 77.65 | 74.05 | 1.23 | +| ContentDetector | 63.67 | 76.40 | 69.46 | 1.21 | +| HashDetector | 56.66 | 76.35 | 65.05 | 1.16 | +| HistogramDetector | 63.36 | 53.34 | 57.92 | 1.23 | +| ThresholdDetector | 0.75 | 38.64 | 1.47 | 1.24 | + ## Citation ### BBC ``` @@ -47,4 +65,14 @@ The following results indicate that ContentDetector achieves the highest perform booktitle = {Proceedings of the 23rd ACM International Conference on Multimedia}, year = {2015}, } -``` \ No newline at end of file +``` + +### AutoShot +``` +@InProceedings{autoshot_dataset, + author = {Wentao Zhu and Yufang Huang and Xiufeng Xie and Wenxian Liu and Jincan Deng and Debing Zhang and Zhangyang Wang and Ji Liu}, + title = {AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary Detection}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, + year = {2023}, +} +``` diff --git a/benchmarks/autoshot_dataset.py b/benchmarks/autoshot_dataset.py new file mode 100644 index 00000000..41f86a17 --- /dev/null +++ b/benchmarks/autoshot_dataset.py @@ -0,0 +1,30 @@ +import glob +import os + +class AutoShotDataset: + """ + The AutoShot Dataset (test splits) proposed by Zhu et al. in AutoShot: A Short Video Dataset and State-of-the-Art Shot Boundary Detection + Link: https://openaccess.thecvf.com/content/CVPR2023W/NAS/html/Zhu_AutoShot_A_Short_Video_Dataset_and_State-of-the-Art_Shot_Boundary_Detection_CVPRW_2023_paper.html + The original test set consists of 200 videos, but 36 videos are missing (AutoShot/videos/.mp4). + The annotated scenes are provided in corresponding files (AutoShot/annotations/.txt) + """ + + def __init__(self, dataset_dir: str): + self._video_files = [ + file for file in sorted(glob.glob(os.path.join(dataset_dir, "videos", "*.mp4"))) + ] + self._scene_files = [ + file for file in sorted(glob.glob(os.path.join(dataset_dir, "annotations", "*.txt"))) + ] + for video_file, scene_file in zip(self._video_files, self._scene_files): + video_id = os.path.basename(video_file).split(".")[0] + scene_id = os.path.basename(scene_file).split(".")[0] + assert video_id == scene_id + + def __getitem__(self, index): + video_file = self._video_files[index] + scene_file = self._scene_files[index] + return video_file, scene_file + + def __len__(self): + return len(self._video_files) diff --git a/benchmarks/bbc_dataset.py b/benchmarks/bbc_dataset.py index 66a5a5b7..1bb7693e 100644 --- a/benchmarks/bbc_dataset.py +++ b/benchmarks/bbc_dataset.py @@ -20,7 +20,7 @@ def __init__(self, dataset_dir: str): assert len(self._video_files) == len(self._scene_files) for video_file, scene_file in zip(self._video_files, self._scene_files): video_id = os.path.basename(video_file).replace("bbc_", "").split(".")[0] - scene_id = os.path.basename(scene_file).split("_")[0] + scene_id = os.path.basename(scene_file).split("-")[0] assert video_id == scene_id def __getitem__(self, index): diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index bd0bc09e..c5c51376 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -2,6 +2,8 @@ import time from bbc_dataset import BBCDataset +from autoshot_dataset import AutoShotDataset + from evaluator import Evaluator from tqdm import tqdm @@ -15,7 +17,7 @@ ) -def make_detector(detector_name: str): +def _make_detector(detector_name: str): detector_map = { "detect-adaptive": AdaptiveDetector(), "detect-content": ContentDetector(), @@ -26,11 +28,19 @@ def make_detector(detector_name: str): return detector_map[detector_name] +def _make_dataset(dataset_name: str): + dataset_map = { + "BBC": BBCDataset("BBC"), + "AutoShot": AutoShotDataset("AutoShot"), + } + return dataset_map[dataset_name] + + def _detect_scenes(detector_type: str, dataset): pred_scenes = {} for video_file, scene_file in tqdm(dataset): start = time.time() - detector = make_detector(detector_type) + detector = _make_detector(detector_type) pred_scene_list = detect(video_file, detector) elapsed = time.time() - start scenes = { @@ -53,7 +63,7 @@ def _detect_scenes(detector_type: str, dataset): def main(args): - pred_scenes = _detect_scenes(detector_type=args.detector, dataset=BBCDataset("BBC")) + pred_scenes = _detect_scenes(detector_type=args.detector, dataset=_make_dataset(args.dataset)) result = Evaluator().evaluate_performance(pred_scenes) print("Overall Results:") print( @@ -65,6 +75,16 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Benchmarking PySceneDetect performance.") + parser.add_argument( + "--dataset", + type=str, + choices=[ + "BBC", + "AutoShot", + ], + default="BBC", + help="Dataset name. Supported datasets are BBC and AutoShot.", + ) parser.add_argument( "--detector", type=str, diff --git a/benchmarks/evaluator.py b/benchmarks/evaluator.py index d38c8cef..ee8801ac 100644 --- a/benchmarks/evaluator.py +++ b/benchmarks/evaluator.py @@ -24,9 +24,8 @@ def evaluate_performance(self, pred_scenes): total_pred += len(pred_list) total_gt += len(gt_scene_list) - assert total_pred, pred_scenes recall = total_correct / total_gt - precision = total_correct / total_pred + precision = total_correct / total_pred if total_pred != 0 else 0 f1 = 2 * recall * precision / (recall + precision) if (recall + precision) != 0 else 0 avg_elapsed = mean([x["elapsed"] for x in pred_scenes.values()]) result = {