@inproceedings{884e3383d3cb47199e6a166d5112b504,
title = "Processing Spontaneous Orthography",
abstract = "In cases in which there is no standard orthography for a language or language variant, written texts will display a variety of orthographic choices. This is problematic for natural language processing (NLP) because it creates spurious data sparseness. We study the transformation of spontaneously spelled Egyptian Arabic into a conventionalized orthography which we have previously proposed for NLP purposes. We show that a two-stage process can reduce divergences from this standard by 69%, making subsequent processing of Egyptian Arabic easier.",
author = "Ramy Eskander and Nizar Habash and Owen Rambow and Nadi Tomeh",
note = "Funding Information: This paper is based upon work supported by the Defense Advanced Research Projects Agency (DARPA) under Contract No. HR0011-12-C-0014. Any opinions, findings and conclusions or recommendations expressed in this paper are those of the authors and do not necessarily reflect the views of DARPA. We thank three anonymous reviewers for helpful comments, and Ryan Roth for help with running MADA. Publisher Copyright: {\textcopyright} 2013 Association for Computational Linguistics; 2nd Workshop on Computational Linguistics for Literature, CLfL 2013 at the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2013 ; Conference date: 14-06-2013",
year = "2013",
language = "English (US)",
series = "Proceedings of the 2nd Workshop on Computational Linguistics for Literature, CLfL 2013 at the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2013",
publisher = "Association for Computational Linguistics (ACL)",
pages = "585--595",
editor = "David Elson and Anna Kazantseva and Stan Szpakowicz",
booktitle = "Proceedings of the 2nd Workshop on Computational Linguistics for Literature, CLfL 2013 at the 2013 Conference of the North American Chapter of the Association for Computational Linguistics",
}