@inproceedings{97983800344e4448915cec2344c38f24,
title = "ArzEn-ST: A Three-way Speech Translation Corpus for Code-Switched Egyptian Arabic - English",
abstract = "We present our work on collecting ArzEnST, a code-switched Egyptian Arabic - English Speech Translation Corpus. This corpus is an extension of the ArzEn speech corpus, which was collected through informal interviews with bilingual speakers. In this work, we collect translations in both directions, monolingual Egyptian Arabic and monolingual English, forming a three-way speech translation corpus. We make the translation guidelines and corpus publicly available. We also report results for baseline systems for machine translation and speech translation tasks. We believe this is a valuable resource that can motivate and facilitate further research studying the codeswitching phenomenon from a linguistic perspective and can be used to train and evaluate NLP systems.",
author = "Injy Hamed and Nizar Habash and Slim Abdennadher and Vu, {Ngoc Thang}",
note = "Funding Information: This project has benefited from financial support by DAAD (German Academic Exchange Service). We also thank the reviewers for their insightful comments and constructive feedback. Publisher Copyright: {\textcopyright} 2022 Association for Computational Linguistics.; 7th Arabic Natural Language Processing Workshop, WANLP 2022 held with EMNLP 2022 ; Conference date: 08-12-2022",
year = "2022",
language = "English (US)",
series = "WANLP 2022 - 7th Arabic Natural Language Processing - Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "119--130",
booktitle = "WANLP 2022 - 7th Arabic Natural Language Processing - Proceedings of the Workshop",
}