@inproceedings{489d36c2b4254f37b1a3cc8c9912af80,
title = "FEW-SHOT DRUM TRANSCRIPTION IN POLYPHONIC MUSIC",
abstract = "Data-driven approaches to automatic drum transcription (ADT) are often limited to a predefined, small vocabulary of percussion instrument classes. Such models cannot recognize out-of-vocabulary classes nor are they able to adapt to finer-grained vocabularies. In this work, we address open vocabulary ADT by introducing few-shot learning to the task. We train a Prototypical Network on a synthetic dataset and evaluate the model on multiple real-world ADT datasets with polyphonic accompaniment. We show that, given just a handful of selected examples at inference time, we can match and in some cases outperform a state-of-the-art supervised ADT approach under a fixed vocabulary setting. At the same time, we show that our model can successfully generalize to finer-grained or extended vocabularies unseen during training, a scenario where supervised approaches cannot operate at all. We provide a detailed analysis of our experimental results, including a breakdown of performance by sound class and by polyphony.",
author = "Yu Wang and Justin Salamon and Mark Cartwright and Bryan, {Nicholas J.} and Bello, {Juan Pablo}",
note = "Publisher Copyright: {\textcopyright} Y. Wang, J. Salamon, M. Cartwright, N. J. Bryan, and J. P. Bello.; 21st International Society for Music Information Retrieval Conference, ISMIR 2020 ; Conference date: 11-10-2020 Through 16-10-2020",
year = "2020",
language = "English (US)",
series = "Proceedings of the 21st International Society for Music Information Retrieval Conference, ISMIR 2020",
publisher = "International Society for Music Information Retrieval",
pages = "255--262",
editor = "Julie Cumming and Lee, {Jin Ha} and Brian McFee and Markus Schedl and Johanna Devaney and Johanna Devaney and Cory McKay and Eva Zangerle and {de Reuse}, Timothy",
booktitle = "Proceedings of the 21st International Society for Music Information Retrieval Conference, ISMIR 2020",
}