@inproceedings{23a40dc15225419b9f36e125037635ab,
title = "X3A: Efficient Multimodal Deepfake Detection with Score-Level Fusion",
abstract = "Advances in deepfake generation have highlighted the necessity for sophisticated detection methods and realistic datasets to ensure models are effectively generalized. While traditional datasets focused on unimodal manipulations, the emergence of multimodal datasets, which include audio-visual forgeries, increased the complexity of deepfake detection. The recent release of the LAV-DF and AV-Deepfake1M datasets featured partial manipulations in multimodal contents and underscored the need for effective videolevel detection methods to identify these forgeries. In this work, we propose X3A, an efficient multimodal video deepfake detection model exploiting two powerful unimodal models with probabilistic score-level fusion. X3A leverages the advantage of using raw visual and audio inputs without relying on hand-crafted features. We conducted the extensive experiments on multiple different multimodal deepfake benchmark datasets and achieved superior performance on multimodal deepfake detection, successively detecting entirely and partially manipulated scenarios. Our X3A model demonstrates an accuracy of 0.9960 AUC of 0.9999 on the most challenging AV-Deepfake1M benchmark, surpassing all existing models.",
keywords = "deepfake detection, multimodal deepfake, score-level fusion",
author = "Chan Park and Bohyun Moon and Minsun Jeon and Jung, \{Jee Weon\} and Woo, \{Simon S.\}",
note = "Publisher Copyright: Copyright {\textcopyright} 2025 held by the owner/author(s).; 40th Annual ACM Symposium on Applied Computing, SAC 2025 ; Conference date: 31-03-2025 Through 04-04-2025",
year = "2025",
month = may,
day = "14",
doi = "10.1145/3672608.3707934",
language = "English",
series = "Proceedings of the ACM Symposium on Applied Computing",
publisher = "Association for Computing Machinery",
pages = "767--774",
booktitle = "40th Annual ACM Symposium on Applied Computing, SAC 2025",
}