@inproceedings{91728c0d32ba4f5f9e71c1b308e3ba6b,
title = "JointNF: Enhancing DNN Performance through Adaptive N:M Pruning across both Weight and Activation",
abstract = "Balancing accuracy and hardware efficiency remains a challenge with traditional pruning methods. N:M sparsity is a recent approach offering a compromise, allowing up to N non-zero weights in a group of M consecutive weights. However, N:M pruning enforces a uniform sparsity level of N/M across all layers, which does not align well sparse nature of deep neural networks (DNNs). To achieve a more flexible sparsity pattern and a higher overall sparsity level, we present JointNF, a novel joint N:M and structured pruning algorithm to enable fine-grained structured pruning with adaptive sparsity levels across the DNN layers. Moreover, we show for the first time that N:M pruning can also be applied over the input activation for further performance enhancement.",
keywords = "hardware accelerator, pruning, transformer",
author = "Zhang, {Sai Qian} and Thierry Tambe and Wei, {Gu Yeon} and David Brooks",
note = "Publisher Copyright: {\textcopyright} 2024 Copyright is held by the owner/author(s). Publication rights licensed to ACM.; 29th ACM/IEEE International Symposium on Low Power Electronics and Design, ISLPED 2024 ; Conference date: 05-08-2024 Through 07-08-2024",
year = "2024",
month = aug,
day = "5",
doi = "10.1145/3665314.3670813",
language = "English (US)",
series = "Proceedings of the 29th International Symposium on Low Power Electronics and Design, ISLPED 2024",
publisher = "Association for Computing Machinery, Inc",
booktitle = "Proceedings of the 29th International Symposium on Low Power Electronics and Design, ISLPED 2024",
}