@inproceedings{c934af806ffd4adea2555d9da8e56382,
title = "HiDF: A Human-Indistinguishable Deepfake Dataset",
abstract = "The rapid development and prevalence of generative AI have made it easy for people to create high-quality deepfake images and videos, but their abuses have also increased exponentially. To mitigate potential social disruption, it is crucial to quickly detect the authenticity of each deepfake content hidden in a sea of information. While researchers have worked on developing deep learning-based methods, the deepfake datasets utilized in these studies are far from the real world in terms of their qualities; most popular deepfake datasets are human-distinguishable. To address this problem, we present a novel deepfake dataset, HiDF, a high-quality and human-indistinguishable deepfake dataset consisting of 62K images and 8K videos. HiDF is a meticulously curated dataset that includes diverse subjects that have undergone rigorous quality checks. A comparison of the quality between HiDF and existing deepfake datasets demonstrates that HiDF is human-indistinguishable. Hence, it can be a valuable benchmark dataset for deepfake detection tasks. Data and code (https://github.com/DSAIL-SKKU/HiDF) are publicly available for future deepfake detection research.",
keywords = "ai, deep-learning, deepfake, human-indistinguishable, multimodal",
author = "Chaewon Kang and Seoyoon Jeong and Jonghyun Lee and Daejin Choi and Woo, \{Simon Sungil\} and Jinyoung Han",
note = "Publisher Copyright: {\textcopyright} 2025 Copyright held by the owner/author(s).; 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2025 ; Conference date: 03-08-2025 Through 07-08-2025",
year = "2025",
month = aug,
day = "3",
doi = "10.1145/3711896.3737399",
language = "English",
series = "Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery",
pages = "5527--5538",
booktitle = "KDD 2025 - Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining",
}