@inproceedings{da67e5600dce464c847ee22571319780,
title = "Show, attend and tell: Neural image caption generation with visual attention",
abstract = "Inspired by recent work in machine translation and object detection, we introduce an attention based model that automatically learns to describe the content of images. We describe how we can train this model in a deterministic manner using standard backpropagation techniques and stochastically by maximizing a variational lower bound. We also show through visualization how the model is able to automatically learn to fix its gaze on salient objects while generating the corresponding words in the output sequence. We validate the use of attention with state-of-the-art performance on three benchmark datasets: Flickr9k, Flickr30k and MS COCO.",
author = "Kelvin Xu and Ba, {Jimmy Lei} and Ryan Kiros and Kyunghyun Cho and Aaron Courville and Ruslan Salakhutdinov and Zemel, {Richard S.} and Yoshua Bengio",
note = "Funding Information: The authors would like to thank the developers of Theano (Bergstra et al., 2010; Bastien et al., 2012). We acknowledge the support of the following organizations for research funding and computing support: NSERC, Samsung, NVIDIA, Calcul Qu{\'e}bec, Compute Canada, the Canada Research Chairs and CIFAR. The authors would also like to thank Nitish Srivastava for assistance with his ConvNet package as well as preparing the Oxford convolutional network and Relu Patrascu for helping with numerous infrastructure-related problems. Publisher Copyright: {\textcopyright} Copyright 2015 by International Machine Learning Society (IMLS). All rights reserved.; 32nd International Conference on Machine Learning, ICML 2015 ; Conference date: 06-07-2015 Through 11-07-2015",
year = "2015",
language = "English (US)",
series = "32nd International Conference on Machine Learning, ICML 2015",
publisher = "International Machine Learning Society (IMLS)",
pages = "2048--2057",
editor = "Francis Bach and David Blei",
booktitle = "32nd International Conference on Machine Learning, ICML 2015",
}