@inproceedings{fd6d253a6f314c8f85e1806fa17f91ed,
title = "Exploiting Arabic diacritization for high quality automatic annotation",
abstract = "We present a novel technique for Arabic morphological annotation. The technique utilizes diacritization to produce morphological annotations of quality comparable to human annotators. Although Arabic text is generally written without diacritics, diacritization is already available for large corpora of Arabic text in several genres. Furthermore, diacritization can be generated at a low cost for new text as it does not require specialized training beyond what educated Arabic typists know. The basic approach is to enrich the input to a state-of-the-art Arabic morphological analyzer with word diacritics (full or partial) to enhance its performance. When applied to fully diacritized text, our approach produces annotations with an accuracy of over 97% on lemma, part-of-speech, and tokenization combined.",
keywords = "Annotation, Arabic, Diacritization, Morphology",
author = "Nizar Habash and Anas Shahrour and Muhamed Al-Khalil",
year = "2016",
language = "English (US)",
series = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
publisher = "European Language Resources Association (ELRA)",
pages = "4298--4304",
editor = "Nicoletta Calzolari and Khalid Choukri and Helene Mazo and Asuncion Moreno and Thierry Declerck and Sara Goggi and Marko Grobelnik and Jan Odijk and Stelios Piperidis and Bente Maegaard and Joseph Mariani",
booktitle = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
note = "10th International Conference on Language Resources and Evaluation, LREC 2016 ; Conference date: 23-05-2016 Through 28-05-2016",
}