@inproceedings{b80463ee91c349138fc7cf806c7895f0,
title = "Camel Treebank: An Open Multi-genre Arabic Dependency Treebank",
abstract = "We present the Camel Treebank (CAMELTB), a 188K word open-source dependency treebank of Modern Standard and Classical Arabic. CAMELTB 1.0 includes 13 sub-corpora comprising selections of texts from pre-Islamic poetry to social media online commentaries, and covering a range of genres from religious and philosophical texts to news, novels, and student essays. The texts are all publicly available (out of copyright, creative commons, or under open licenses). The texts were morphologically tokenized and syntactically parsed automatically, and then manually corrected by a team of trained annotators. The annotations follow the guidelines of the Columbia Arabic Treebank (CATiB) dependency representation. We discuss our annotation process and guideline extensions, and we present some initial observations on lexical and syntactic differences among the annotated sub-corpora. This corpus will be publicly available to support and encourage research on Arabic NLP in general and on new, previously unexplored genres that are of interest to a wider spectrum of researchers, from historical linguistics and digital humanities to computer-assisted language pedagogy.",
keywords = "Arabic, Multiple Genres, Open Source, Syntactic Dependency Treebank",
author = "Nizar Habash and Muhammed AbuOdeh and Dima Taji and Reem Faraj and {El Gizuli}, Jamila and Omar Kallas",
note = "Publisher Copyright: {\textcopyright} European Language Resources Association (ELRA), licensed under CC-BY-NC-4.0.; 13th International Conference on Language Resources and Evaluation Conference, LREC 2022 ; Conference date: 20-06-2022 Through 25-06-2022",
year = "2022",
language = "English (US)",
series = "2022 Language Resources and Evaluation Conference, LREC 2022",
publisher = "European Language Resources Association (ELRA)",
pages = "2672--2681",
editor = "Nicoletta Calzolari and Frederic Bechet and Philippe Blache and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Helene Mazo and Jan Odijk and Stelios Piperidis",
booktitle = "2022 Language Resources and Evaluation Conference, LREC 2022",
}