@inproceedings{7cfe4099d5464327b1569e33780435a8,
title = "Face-Dubbing++: LIP-Synchronous, Voice Preserving Translation Of Videos",
abstract = "In this paper, we propose a neural end-to-end system for voice preserving and lip-synchronous video translation. The system is designed to combine multiple component models and produces a video of the original speaker speaking in the target language that is lip-synchronous with the target speech, yet maintains emphases in speech, voice characteristics, and face video of the original speaker. The result is a video of a speaker speaking in another language without actually knowing it. For the evaluation, we present a user study of the complete system and separate evaluations of the single components. Since there is no available dataset to evaluate our whole system, we collect a test set to evaluate our system. The results indicate that our system is able to generate convincing videos of the original speaker speaking the target language while preserving the original speaker's characteristics.",
keywords = "end-to-end video translation, lip generation, speech translation, text-to-speech, voice conversion",
author = "Alexander Waibel and Moritz Behr and Dogucan Yaman and Eyiokur, {Fevziye Irem} and Nguyen, {Tuan Nam} and Carlos Mullov and Demirtas, {Mehmet Arif} and Alperen Kantarci and Stefan Constantin and Ekenel, {Hazim Kemal}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Conference on Acoustics, Speech and Signal Processing Workshops, ICASSPW 2023 ; Conference date: 04-06-2023 Through 10-06-2023",
year = "2023",
doi = "10.1109/ICASSPW59220.2023.10193719",
language = "English (US)",
series = "ICASSPW 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing Workshops, Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "ICASSPW 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing Workshops, Proceedings",
}