@inproceedings{fb4a70d94fa548418055c6beb767164c,
title = "Arabic transliteration of Romanized Tunisian dialect text: A preliminary investigation",
abstract = "In this paper, we describe the process of converting Tunisian Dialect text that is written in Latin script (also called Arabizi) into Arabic script following the CODA orthography convention for Dialectal Arabic. Our input consists of messages and comments taken from SMS, social networks and broadcast videos. The language used in social media and SMS messaging is characterized by the use of informal and non-standard vocabulary such as repeated letters for emphasis, typos, non-standard abbreviations, and nonlinguistic content, such as emoticons. There is a high degree of variation is spelling in Arabic dialects due to the lack of orthographic widely supported standards in both Arabic and Latin scripts. In the context of natural language processing, transliterating from Arabizi to Arabic script is a necessary step since most recently available tools for processing Arabic Dialects expect Arabic script input.",
keywords = "CODA, Corpus, Normalization, Transliteration, Tunisian Dialect",
author = "Abir Masmoudi and Nizar Habash and Mariem Ellouze and Yannick Est{\`e}ve and {Hadrich Belguith}, Lamia",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2015.; 16th Annual Conference on Intelligent Text Processing and Computational Linguistics, CICLing 2015 ; Conference date: 14-04-2015 Through 20-04-2015",
year = "2015",
doi = "10.1007/978-3-319-18111-0_46",
language = "English (US)",
isbn = "9783319181103",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "608--619",
editor = "Alexander Gelbukh",
booktitle = "Computational Linguistics and Intelligent Text Processing - 16th International Conference, CICLing 2015, Proceedings",
}