diff --git a/benchmark/README.md b/benchmark/README.md index 47777f29..bd7b1680 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -26,12 +26,17 @@ rm AutoShot.tar.gz ## Evaluation To evaluate PySceneDetect on a dataset, run the following command from the root of the repo: ``` -python -m benchmark -d --detector +python -m benchmark --dataset --detector ``` For example, to evaluate ContentDetector on the BBC dataset: ``` -python -m benchmark -d BBC --detector detect-content +python -m benchmark --dataset BBC --detector detect-content ``` +To run all detectors on all datasets: +``` +python -m benchmark --all +``` +The `--all` flag can also be combined with `--dataset` or `--detector`. ### Result The performance is computed as recall, precision, f1, and elapsed time. diff --git a/benchmark/__main__.py b/benchmark/__main__.py index b1a90d67..431dc1b3 100644 --- a/benchmark/__main__.py +++ b/benchmark/__main__.py @@ -1,6 +1,7 @@ import argparse import time import os +import typing as ty from tqdm import tqdm @@ -16,19 +17,13 @@ detect, ) - -def _make_detector(detector_name: str): - if detector_name == "detect-adaptive": - return AdaptiveDetector() - if detector_name == "detect-content": - return ContentDetector() - if detector_name == "detect-hash": - return HashDetector() - if detector_name == "detect-hist": - return HistogramDetector() - if detector_name == "detect-threshold": - return ThresholdDetector() - raise RuntimeError(f"Unknown detector: {detector_name}") +_DETECTORS = { + "detect-adaptive": AdaptiveDetector, + "detect-content": ContentDetector, + "detect-hash": HashDetector, + "detect-hist": HistogramDetector, + "detect-threshold": ThresholdDetector, +} _DATASETS = { @@ -36,17 +31,19 @@ def _make_detector(detector_name: str): "AutoShot": AutoShotDataset("benchmark/AutoShot"), } +_DEFAULT_DETECTOR = "detect-content" +_DEFAULT_DATASET = "BBC" + _RESULT_PRINT_FORMAT = ( "Recall: {recall:.2f}, Precision: {precision:.2f}, F1: {f1:.2f} Elapsed time: {elapsed:.2f}\n" ) -def _detect_scenes(detector_type: str, dataset): +def _detect_scenes(detector: str, dataset: str, detailed: bool): pred_scenes = {} - for video_file, scene_file in tqdm(dataset): + for video_file, scene_file in tqdm(_DATASETS[dataset]): start = time.time() - detector = _make_detector(detector_type) - pred_scene_list = detect(video_file, detector) + pred_scene_list = detect(video_file, _DETECTORS[detector]()) elapsed = time.time() - start filename = os.path.basename(video_file) scenes = { @@ -57,22 +54,28 @@ def _detect_scenes(detector_type: str, dataset): } } result = Evaluator().evaluate_performance(scenes) - print(f"\n{filename} results:") - print(_RESULT_PRINT_FORMAT.format(**result) + "\n") + if detailed: + print(f"\n{filename} results:") + print(_RESULT_PRINT_FORMAT.format(**result) + "\n") pred_scenes.update(scenes) return pred_scenes -def main(args): - print(f"Evaluating {args.detector} on dataset {args.dataset}...\n") - pred_scenes = _detect_scenes(detector_type=args.detector, dataset=_DATASETS[args.dataset]) +def run_benchmark(detector: str, dataset: str, detailed: bool): + print(f"Evaluating {detector} on dataset {dataset}...\n") + pred_scenes = _detect_scenes(detector=detector, dataset=dataset, detailed=detailed) result = Evaluator().evaluate_performance(pred_scenes) - print(f"\nOverall Results for {args.detector} on dataset {args.dataset}:") + # Print extra separators in detailed output to identify overall results vs individual videos. + if detailed: + print("------------------------------------------------------------") + print(f"\nOverall Results for {detector} on dataset {dataset}:") print(_RESULT_PRINT_FORMAT.format(**result)) + if detailed: + print("------------------------------------------------------------") -if __name__ == "__main__": +def create_parser(): parser = argparse.ArgumentParser(description="Benchmarking PySceneDetect performance.") parser.add_argument( "--dataset", @@ -81,7 +84,6 @@ def main(args): "BBC", "AutoShot", ], - default="BBC", help="Dataset name. Supported datasets are BBC and AutoShot.", ) parser.add_argument( @@ -94,8 +96,52 @@ def main(args): "detect-hist", "detect-threshold", ], - default="detect-content", - help="Detector name. Implemented detectors are listed: https://www.scenedetect.com/docs/latest/cli.html", + help="Detector name. Implemented detectors are listed: " + "https://www.scenedetect.com/docs/latest/cli.html", + ) + parser.add_argument( + "--detailed", + action="store_const", + const=True, + help="Print results for each video, in addition to overall summary.", + ) + parser.add_argument( + "--all", + action="store_const", + const=True, + help="Benchmark all detectors on all datasets. If --detector or --dataset are specified, " + "will only run with those.", ) + return parser + + +def run_all_benchmarks(detector: ty.Optional[str], dataset: ty.Optional[str], detailed: bool): + detectors = {detector: _DETECTORS[detector]} if detector else _DETECTORS + datasets = {dataset: _DATASETS[dataset]} if dataset else _DATASETS + print( + "Running benchmarks for:\n" + f" - Detectors: {', '.join(detectors.keys())}\n" + f" - Datasets: {', '.join(datasets.keys())}\n" + ) + for detector in detectors: + for dataset in datasets: + run_benchmark(detector=detector, dataset=dataset, detailed=detailed) + + +def main(): + parser = create_parser() args = parser.parse_args() - main(args) + if args.all: + run_all_benchmarks( + detector=args.detector, dataset=args.dataset, detailed=bool(args.detailed) + ) + else: + run_benchmark( + detector=args.detector if args.detector else _DEFAULT_DETECTOR, + dataset=args.dataset if args.dataset else _DEFAULT_DATASET, + detailed=bool(args.detailed), + ) + + +if __name__ == "__main__": + main()