@inproceedings{1a3710a8dcb04147a499d3a14557a1e4,
title = "Meta-Det3D: Learn to Learn Few-Shot 3D Object Detection",
abstract = "This paper addresses the problem of few-shot indoor 3D object detection by proposing a meta-learning-based framework that only relies on a few labeled samples from novel classes for training. Our model has two major components: a 3D meta-detector and a 3D object detector. Given a query 3D point cloud and a few support samples, the 3D meta-detector is trained over different 3D detection tasks to learn task distributions for different object classes and dynamically adapt the 3D object detector to complete a specific detection task. The 3D object detector takes task-specific information as input and produces 3D object detection results for the query point cloud. Specifically, the 3D object detector first extracts object candidates and their features from the query point cloud using a point feature learning network. Then, a class-specific re-weighting module generates class-specific re-weighting vectors from the support samples to characterize the task information, one for each distinct object class. Each re-weighting vector performs channel-wise attention to the candidate features to re-calibrate the query object features, adapting them to detect objects of the same classes. Finally, the adapted features are fed into a detection head to predict classification scores and bounding boxes for novel objects in the query point cloud. Several experiments on two 3D object detection benchmark datasets demonstrate that our proposed method acquired the ability to detect 3D objects in the few-shot setting.",
keywords = "3D object detection, Channel-wise attention, Few-shot learning, Indoor scene, Meta-learning",
author = "Shuaihang Yuan and Xiang Li and Hao Huang and Yi Fang",
note = "Funding Information: The authors appreciate the generous support provided by Inception Institute of Artificial Intelligence (IIAI) in the form of NYUAD Global Ph.D. Student Fellowship. This work was also partially supported by the NYUAD Center for Artificial Intelligence and Robotics (CAIR), funded by Tamkeen under the NYUAD Research Institute Award CG010. Funding Information: Acknowledgements. The authors appreciate the generous support provided by Inception Institute of Artificial Intelligence (IIAI) in the form of NYUAD Global Ph.D. Student Fellowship. This work was also partially supported by the NYUAD Center for Artificial Intelligence and Robotics (CAIR), funded by Tamkeen under the NYUAD Research Institute Award CG010. Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 16th Asian Conference on Computer Vision, ACCV 2022 ; Conference date: 04-12-2022 Through 08-12-2022",
year = "2023",
doi = "10.1007/978-3-031-26319-4_15",
language = "English (US)",
isbn = "9783031263187",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "245--261",
editor = "Lei Wang and Juergen Gall and Tat-Jun Chin and Imari Sato and Rama Chellappa",
booktitle = "Computer Vision – ACCV 2022 - 16th Asian Conference on Computer Vision, 2022, Proceedings",
address = "Germany",
}