@inproceedings{084b113a495d44dab7f66c326dc10cf2,
title = "A leveled reading corpus of modern standard Arabic",
abstract = "We present a reading corpus in Modern Standard Arabic to enrich the sparse collection of resources that can be leveraged for educational applications. The corpus consists of textbook material from the curriculum of the United Arab Emirates, spanning all 12 grades (1.4 million tokens) and a collection of 129 unabridged works of fiction (5.6 million tokens) all annotated with reading levels from Grade 1 to Post-secondary. We examine reading progression in terms of lexical coverage, and compare the two sub-corpora (curricular, fiction) to others from clearly established genres (news, legal/diplomatic) to measure representation of their respective genres.",
keywords = "Arabic, Corpus, Curriculum, Fiction, Leveled Reading",
author = "Hind Saddiki and Nizar Habash and Latifa Alfalasi and Muhamed Al-Khalil",
note = "Funding Information: The work on this project is funded by a New York University Abu Dhabi Research Enhancement Fund grant. We would like to express our thanks to the UAE Ministry of Education for providing us with the curriculum materials, which are essential for this research project. We also thank Bassel Musfi for annotating the fiction sub-corpus with copyright and publication year information. Publisher Copyright: {\textcopyright} LREC 2018 - 11th International Conference on Language Resources and Evaluation. All rights reserved.; 11th International Conference on Language Resources and Evaluation, LREC 2018 ; Conference date: 07-05-2018 Through 12-05-2018",
year = "2018",
month = may,
day = "7",
language = "English (US)",
series = "LREC 2018 - 11th International Conference on Language Resources and Evaluation",
publisher = "European Language Resources Association (ELRA)",
pages = "2317--2321",
editor = "Hitoshi Isahara and Bente Maegaard and Stelios Piperidis and Christopher Cieri and Thierry Declerck and Koiti Hasida and Helene Mazo and Khalid Choukri and Sara Goggi and Joseph Mariani and Asuncion Moreno and Nicoletta Calzolari and Jan Odijk and Takenobu Tokunaga",
booktitle = "LREC 2018 - 11th International Conference on Language Resources and Evaluation",
}