@inproceedings{e4ede498a8c844ff929fc4882adbe164,
title = "Audio source separation with discriminative scattering networks",
abstract = "Many monaural signal decomposition techniques proposed in the literature operate on a feature space consisting of a time-frequency representation of the input data. A challenge faced by these approaches is to effectively exploit the temporal dependencies of the signals at scales larger than the duration of a time-frame. In this work we propose to tackle this problem by modeling the signals using a time-frequency representation with multiple temporal resolutions. For this reason we use a signal representation that consists of a pyramid of wavelet scattering operators, which generalizes Constant Q Transforms (CQT) with extra layers of convolution and complex modulus. We first show that learning standard models with this multi-resolution setting improves source separation results over fixed-resolution methods. As study case, we use Non-Negative Matrix Factorizations (NMF) that has been widely considered in many audio application. Then, we investigate the inclusion of the proposed multi-resolution setting into a discriminative training regime. We discuss several alternatives using different deep neural network architectures, and our preliminary experiments suggest that in this task, finite impulse, multi-resolution Convolutional Networks are a competitive baseline compared to recurrent alternatives.",
keywords = "Deep learning, Non-negative matrix factorization, Scattering, Source separation",
author = "Pablo Sprechmann and Joan Bruna and Yann LeCun",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2015.; 12th International Conference on Latent Variable Analysis and Signal Separation, LVA/ICA 2015 ; Conference date: 25-08-2015 Through 28-08-2015",
year = "2015",
doi = "10.1007/978-3-319-22482-4_30",
language = "English (US)",
isbn = "9783319224817",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "259--267",
editor = "Zbynĕk Koldovsk{\'y} and Emmanuel Vincent and Arie Yeredor and Petr Tichavsk{\'y}",
booktitle = "Latent Variable Analysis and Signal Separation - 12th International Conference, LVA/ICA 2015, Proceedings",
}