@inproceedings{33de5f12e10b4337838bce8f659059ab,
title = "Full-stack optimization for accelerating CNNs using powers-of-two weights with FPGA validation",
abstract = "We present a full-stack optimization framework for accelerating inference of CNNs (Convolutional Neural Networks) and validate the approach with a field-programmable gate array (FPGA) implementation. By jointly optimizing CNN models, computing architectures, and hardware implementations, our full-stack approach achieves unprecedented performance in the trade-off space characterized by inference latency, energy efficiency, hardware utilization, and inference accuracy. An FPGA implementation is used as the validation vehicle for our design, achieving a 2.28ms inference latency for the ImageNet benchmark. Our implementation shines in that it has 9x higher energy efficiency compared to other implementations while achieving comparable latency. A highlight of our approach which contributes to the achieved high energy efficiency is an efficient Selector-Accumulator (SAC) architecture for implementing CNNs with powers-of-two weights. Compared to an FPGA implementation for a traditional 8-bit MAC, SAC substantially reduces required hardware resources (4.85x fewer lookup tables) and power consumption (2.48x).",
keywords = "Co-design, Joint optimization, Neural networks, Powers-of-two weights, Sparsity, Systolic arrays",
author = "Bradley McDanel and Zhang, \{Sai Qian\} and Kung, \{H. T.\} and Xin Dong",
note = "Publisher Copyright: {\textcopyright} 2019 ACM.; 33rd ACM International Conference on Supercomputing, ICS 2019, held in conjunction with the Federated Computing Research Conference, FCRC 2019 ; Conference date: 26-06-2019",
year = "2019",
month = jun,
day = "26",
doi = "10.1145/3330345.3330385",
language = "English (US)",
series = "Proceedings of the International Conference on Supercomputing",
publisher = "Association for Computing Machinery",
pages = "449--460",
booktitle = "ICS 2019 - International Conference on Supercomputing",
}