@inproceedings{a6ec03a66a8043d8b5509cf7430dc26e,
title = "On the Expressivity of Markov Reward (Extended Abstract)",
abstract = "Reward is the driving force for reinforcement-learning agents. We here set out to understand the expressivity of Markov reward as a way to capture tasks that we would want an agent to perform. We frame this study around three new abstract notions of “task”: (1) a set of acceptable behaviors, (2) a partial ordering over behaviors, or (3) a partial ordering over trajectories. Our main results prove that while reward can express many of these tasks, there exist instances of each task type that no Markov reward function can capture. We then provide a set of polynomial-time algorithms that construct a Markov reward function that allows an agent to perform each task type, and correctly determine when no such reward function exists.",
author = "David Abel and Will Dabney and Anna Harutyunyan and Ho, {Mark K.} and Littman, {Michael L.} and Doina Precup and Satinder Singh",
note = "Publisher Copyright: {\textcopyright} 2022 International Joint Conferences on Artificial Intelligence. All rights reserved.; 31st International Joint Conference on Artificial Intelligence, IJCAI 2022 ; Conference date: 23-07-2022 Through 29-07-2022",
year = "2022",
language = "English (US)",
series = "IJCAI International Joint Conference on Artificial Intelligence",
publisher = "International Joint Conferences on Artificial Intelligence",
pages = "5254--5258",
editor = "{De Raedt}, Luc and {De Raedt}, Luc",
booktitle = "Proceedings of the 31st International Joint Conference on Artificial Intelligence, IJCAI 2022",
}