@inproceedings{577e89d480924907a9cf7fc7afe32fa8,
title = "Missed Connections: Lateral Thinking Puzzles for Large Language Models",
abstract = "The Connections puzzle published each day by the New York Times tasks players with dividing a bank of sixteen words into four groups of four words that each relate to a common theme. Solving the puzzle requires both common linguistic knowledge (i.e. definitions and typical usage) as well as, in many cases, lateral or abstract thinking. This is because the four categories ascend in complexity, with the most challenging category often requiring thinking about words in uncommon ways or as parts of larger phrases. We investigate the capacity for automated AI systems to play Connections and explore the game's potential as an automated benchmark for abstract reasoning and a way to measure the semantic information encoded by data-driven linguistic systems. In particular, we study both a sentence-embedding baseline and modern large language models (LLMs). We report their accuracy on the task, measure the impacts of chain-of-thought prompting, and discuss their failure modes. Overall, we find that the Connections task is challenging yet feasible, and a strong test-bed for future work.",
keywords = "AI, evaluation, Language models, reasoning",
author = "Graham Todd and Tim Merino and Sam Earle and Julian Togelius",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 6th Annual IEEE Conference on Games, CoG 2024 ; Conference date: 05-08-2024 Through 08-08-2024",
year = "2024",
doi = "10.1109/CoG60054.2024.10645557",
language = "English (US)",
series = "IEEE Conference on Computatonal Intelligence and Games, CIG",
publisher = "IEEE Computer Society",
booktitle = "Proceedings of the 2024 IEEE Conference on Games, CoG 2024",
}