@inproceedings{28629ddbd34048c3ab5384c70476a62a,
title = "CrunchQA: A Synthetic Dataset for Question Answering over Crunchbase Knowledge Graph",
abstract = "The digital transformation in the finance and enterprise sector has been driven by the advances made in big data and artificial intelligence technologies. For instance, data integration enables businesses to make better decisions by consolidating and mining heterogeneous data repositories. In particular, knowledge graphs (KGs) are used to facilitate the integration of disparate data sources and can be utilized to answer complex queries. This work proposes a new dataset for question-answering on knowledge graphs (KGQA) to reflect the challenges we identified in real-world applications which are not covered by existing benchmarks, namely, multi-hop constraints, numeric and literal embeddings, ranking, reification, and hyper-relations. To build the dataset, we create a new Knowledge Graph from the Crunchbase database using a lightweight schema to support high-quality entity embeddings in large graphs. Next, we create a Question Answering dataset based on natural language question generation using predefined multiple-hop templates and paraphrasing. Finally, we conduct extensive experiments with state-of-the-art KGQA models and compare their performance on CrunchQA. The results show that the existing models do not perform well, for example, on multi-hop constrained queries. Hence, CrunchQA can be used as a challenging benchmark dataset for future KGQA reasoning models. The dataset and scripts are available on the project repository.",
keywords = "Enterprise KB, Graph Embedding, Knowledge Graph, Question Answering",
author = "Lifan Yu and Madjid, {Nadya Abdel} and Djellel Difallah",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Big Data, Big Data 2022 ; Conference date: 17-12-2022 Through 20-12-2022",
year = "2022",
doi = "10.1109/BigData55660.2022.10021012",
language = "English (US)",
series = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4635--4641",
editor = "Shusaku Tsumoto and Yukio Ohsawa and Lei Chen and {Van den Poel}, Dirk and Xiaohua Hu and Yoichi Motomura and Takuya Takagi and Lingfei Wu and Ying Xie and Akihiro Abe and Vijay Raghavan",
booktitle = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
}