@inproceedings{c3ba569d3dc34d918b459d29378ceca0,
title = "AraSAS: The Open Source Arabic Semantic Tagger",
abstract = "This paper presents (AraSAS) the first open-source Arabic semantic analysis tagging system. AraSAS is a software framework that provides full semantic tagging of text written in Arabic. AraSAS is based on the UCREL Semantic Analysis System (USAS) which was first developed to semantically tag English text. Similarly to USAS, AraSAS uses a hierarchical semantic tag set that contains 21 major discourse fields and 232 fine-grained semantic field tags. The paper describes the creation, validation and evaluation of AraSAS. In addition, we demonstrate a first case study to illustrate the affordances of applying USAS and AraSAS semantic taggers on the Zayed University Arabic-English Bilingual Undergraduate Corpus (ZAEBUC) (Palfreyman and Habash, 2022), where we show and compare the coverage of the two semantic taggers through running them on Arabic and English essays on different topics. The analysis expands to compare the taggers when run on texts in Arabic and English written by the same writer and texts written by male and by female students. Variables for comparison include frequency of use of particular semantic sub-domains, as well as the diversity of semantic elements within a text.",
keywords = "Arabic, Corpus Linguistics, English, Semantics, Taggers",
author = "Mahmoud El-Haj and Paul Rayson and {de Souza}, Elvis and Nouran Khallaf and Nizar Habash",
note = "Publisher Copyright: {\textcopyright} European Language Resources Association (ELRA).; 5th Workshop Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection, OSACT 2022 ; Conference date: 20-06-2022 Through 25-06-2022",
year = "2022",
language = "English (US)",
series = "5th Workshop Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection, OSACT 2022 - Proceedings at Language Resources and Evaluation Conference, LREC 2022",
publisher = "European Language Resources Association (ELRA)",
pages = "23--31",
editor = "Hend Al-Khalifa and Tamer Elsayed and Hamdy Mubarak and Abdulmohsen Al-Thubaity and Walid Magdy and Kareem Darwish",
booktitle = "5th Workshop Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection, OSACT 2022 - Proceedings at Language Resources and Evaluation Conference, LREC 2022",
}