@inproceedings{91f26f0da12c4d3ba6ad235cd6f44885,
title = "The 8-Point Algorithm as an Inductive Bias for Relative Pose Prediction by ViTs",
abstract = "We present a simple baseline for directly estimating the relative pose (rotation and translation, including scale) between two images. Deep methods have recently shown strong progress but often require complex or multi-stage architectures. We show that a handful of modifications can be applied to a Vision Transformer (ViT) to bring its computations close to the Eight-Point Algorithm. This inductive bias enables a simple method to be competitive in multiple settings, often substantially improving over the state of the art with strong performance gains in limited data regimes.",
keywords = "Camera Pose, Eight Point Algorithm, Vision Transformer",
author = "Chris Rockwell and Justin Johnson and Fouhey, {David F.}",
note = "Funding Information: Toyota Research Institute ({"}TRI{"}) provided funds to assist the authors with their research but this article solely reflects the opinions and conclusions of its authors and not TRI or any other Toyota entity. Publisher Copyright: {\textcopyright} 2022 IEEE.; 10th International Conference on 3D Vision, 3DV 2022 ; Conference date: 12-09-2022 Through 15-09-2022",
year = "2022",
doi = "10.1109/3DV57658.2022.00028",
language = "English (US)",
series = "Proceedings - 2022 International Conference on 3D Vision, 3DV 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "155--165",
booktitle = "Proceedings - 2022 International Conference on 3D Vision, 3DV 2022",
}