@inproceedings{ec06ac737f6d4306a8543ee59ed9e49f,
title = "Zero-Shot Multi-Modal Artist-Controlled Retrieval and Exploration of 3D Object Sets",
abstract = "When creating 3D content, highly specialized skills are generally needed to design and generate models of objects and other assets by hand. We address this problem through high-quality 3D asset retrieval from multi-modal inputs, including 2D sketches, images and text. We use CLIP as it provides a bridge to higher-level latent features. We use these features to perform a multi-modality fusion to address the lack of artistic control that affects common data-driven approaches. Our approach allows for multi-modal conditional feature-driven retrieval through a 3D asset database, by utilizing a combination of input latent embeddings. We explore the effects of different combinations of feature embeddings across different input types and weighting methods.",
keywords = "computer graphics, neural networks",
author = "Kristofer Schlachter and Benjamin Ahlbrand and Zhu Wang and Ken Perlin and Valerio Ortenzi",
note = "Publisher Copyright: {\textcopyright} 2022 ACM.; SIGGRAPH Asia 2022 Technical Communications - Computer Graphics and Interactive Techniques Conference - Asia, SA 2022 ; Conference date: 06-12-2022 Through 09-12-2022",
year = "2022",
month = dec,
day = "6",
doi = "10.1145/3550340.3564216",
language = "English (US)",
series = "Proceedings - SIGGRAPH Asia 2022: Technical Communications",
publisher = "Association for Computing Machinery, Inc",
editor = "Spencer, {Stephen N.}",
booktitle = "Proceedings - SIGGRAPH Asia 2022",
}