@inproceedings{d753b331dc734efc9c7f44f97102d1d7,
title = "Fine-grained arabic dialect identification",
abstract = "Previous work on the problem of Arabic Dialect Identification typically targeted coarse-grained five dialect classes plus Standard Arabic (6-way classification). This paper presents the first results on a fine-grained dialect classification task covering 25 specific cities from across the Arab World, in addition to Standard Arabic – a very challenging task. We build several classification systems and explore a large space of features. Our results show that we can identify the exact city of a speaker at an accuracy of 67.9% for sentences with an average length of 7 words (a 9% relative error reduction over the state-of-the-art technique for Arabic dialect identification) and reach more than 90% when we consider 16 words. We also report on additional insights from a data analysis of similarity and difference across Arabic dialects.",
author = "Mohammad Salameh and Houda Bouamor and Nizar Habash",
note = "Funding Information: This publication was made possible by grant NPRP 7-290-1-047 from the Qatar National Research Fund (a member of the Qatar Foundation). The statements made herein are solely the responsibility of the authors. Publisher Copyright: {\textcopyright} 2018 COLING 2018 - 27th International Conference on Computational Linguistics, Proceedings. All rights reserved.; 27th International Conference on Computational Linguistics, COLING 2018 ; Conference date: 20-08-2018 Through 26-08-2018",
year = "2018",
language = "English (US)",
series = "COLING 2018 - 27th International Conference on Computational Linguistics, Proceedings",
publisher = "Association for Computational Linguistics (ACL)",
pages = "1332--1344",
editor = "Bender, {Emily M.} and Leon Derczynski and Pierre Isabelle",
booktitle = "COLING 2018 - 27th International Conference on Computational Linguistics, Proceedings",
}