A benchmark for studying neural codec–based deepfake speech detection.
CodecFake+ is a large-scale benchmark designed to evaluate the detection and generalization capabilities of models across diverse neural audio codec–generated deepfakes.
It enables controlled, taxonomy-aware analysis for codec-based deepfake research.
🚧 The CodecFake+ project page is currently under construction.
Thank you for your patience and continued interest.
🔗 Official webpage: responsiblegenai.github.io/CodecFake-Plus-Dataset
If you find our dataset useful, please consider citing our accompanying papers.
@article{chen2025codecfake+,
title = {CodecFake+: A Large-Scale Neural Audio Codec-Based Deepfake Speech Dataset},
author = {Chen, Xuanjun and Du, Jiawei and Wu, Haibin and Zhang, Lin and Lin, I and Chiu, I and Ren, Wenze and Tseng, Yuan and Tsao, Yu and Jang, Jyh-Shing Roger and others},
journal = {arXiv preprint arXiv:2501.08238},
year = {2025}
}
@inproceedings{wu24p_interspeech,
title = {{CodecFake: Enhancing Anti-Spoofing Models Against Deepfake Audios from Codec-Based Speech Synthesis Systems}},
author = {Haibin Wu and Yuan Tseng and Hung-yi Lee},
year = {2024},
booktitle = {{Interspeech 2024}},
pages = {1770--1774},
doi = {10.21437/Interspeech.2024-2093},
issn = {2958-1796},
}