@inproceedings{d7e585583a42493cb34c436806a8b327,
title = "A corpus and phonetic dictionary for tunisian Arabic speech recognition",
abstract = "In this paper we describe an effort to create a corpus and phonetic dictionary for Tunisian Arabic Automatic Speech Recognition (ASR). The corpus, named TARIC (Tunisian Arabic Railway Interaction Corpus) has a collection of audio recordings and transcriptions from dialogues in the Tunisian Railway Transport Network. The phonetic (or pronunciation) dictionary is an important ASR component that serves as an intermediary between acoustic models and language models in ASR systems. The method proposed in this paper, to automatically generate a phonetic dictionary, is rule based. For that reason, we define a set of pronunciation rules and a lexicon of exceptions. To determine the performance of our phonetic rules, we chose to evaluate our pronunciation dictionary on two types of corpora. The word error rate of word grapheme-to-phoneme mapping is around 9%.",
keywords = "Grapheme-to-phoneme, Phonetic dictionary, Speech recognition, Tunisian Arabic",
author = "Abir Masmoudi and Khemakhem, {Mariem Ellouze} and Yannick Est{\`e}ve and Belguith, {Lamia Hadrich} and Nizar Habash",
year = "2014",
language = "English (US)",
series = "Proceedings of the 9th International Conference on Language Resources and Evaluation, LREC 2014",
publisher = "European Language Resources Association (ELRA)",
pages = "306--310",
editor = "Nicoletta Calzolari and Khalid Choukri and Sara Goggi and Thierry Declerck and Joseph Mariani and Bente Maegaard and Asuncion Moreno and Jan Odijk and Helene Mazo and Stelios Piperidis and Hrafn Loftsson",
booktitle = "Proceedings of the 9th International Conference on Language Resources and Evaluation, LREC 2014",
note = "9th International Conference on Language Resources and Evaluation, LREC 2014 ; Conference date: 26-05-2014 Through 31-05-2014",
}