@inproceedings{c5ed5e7eab12491dbad005ba198c4563,
title = "M4: Multi-Generator, Multi-Domain, and Multi-Lingual Black-Box Machine-Generated Text Detection",
abstract = "Large language models (LLMs) have demonstrated remarkable capability to generate fluent responses to a wide variety of user queries. However, this has also raised concerns about the potential misuse of such texts in journalism, education, and academia. In this study, we strive to create automated systems that can detect machine-generated texts and pinpoint potential misuse. We first introduce a large-scale benchmark M4, which is a multi-generator, multi-domain, and multi-lingual corpus for machine-generated text detection. Through an extensive empirical study of this dataset, we show that it is challenging for detectors to generalize well on instances from unseen domains or LLMs. In such cases, detectors tend to misclassify machine-generated text as human-written. These results show that the problem is far from solved and that there is a lot of room for improvement. We believe that our dataset will enable future research towards more robust approaches to this pressing societal problem. The dataset is available at https://github.com/mbzuai-nlp/M4.",
author = "Yuxia Wang and Jonibek Mansurov and Petar Ivanov and Jinyan Su and Artem Shelmanov and Akim Tsvigun and Chenxi Whitehouse and Afzal, {Osama Mohammed} and Tarek Mahmoud and Toru Sasaki and Thomas Arnold and Aji, {Alham Fikri} and Nizar Habash and Iryna Gurevych and Preslav Nakov",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 18th Conference of the European Chapter of the Association for Computational Linguistics, EACL 2024 ; Conference date: 17-03-2024 Through 22-03-2024",
year = "2024",
language = "English (US)",
series = "EACL 2024 - 18th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "1369--1407",
editor = "Yvette Graham and Matthew Purver and Matthew Purver",
booktitle = "EACL 2024 - 18th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference",
}