@inproceedings{1720d9b0e3e842e98dd1e3c72491958c,
title = "Dependable Deep Learning: Towards Cost-Efficient Resilience of Deep Neural Network Accelerators against Soft Errors and Permanent Faults",
abstract = "Deep Learning has enabled machines to learn computational models (i.e., Deep Neural Networks-DNNs) that can perform certain complex tasks with claims to be close to human-level precision. This state-of-the-art performance offered by DNNs in many Artificial Intelligence (AI) applications has paved their way to being used in several safety-critical applications where even a single failure can lead to catastrophic results. Therefore, improving the robustness of these models to hardware-induced faults (such as soft errors, aging, and manufacturing defects) is of significant importance to avoid any disastrous event. Traditional redundancy-based fault mitigation techniques cannot be employed in a wide of applications due to their high overheads, which, when coupled with the compute-intensive nature of DNNs, lead to undesirable resource consumption. In this article, we present an overview of different low-cost fault-mitigation techniques that exploit the intrinsic characteristics of DNNs to limit their overheads. We discuss how each technique can contribute to the overall resilience of a DNN-based system, and how they can be integrated together to offer resilience against multiple diverse hardware-induced reliability threats. Towards the end, we highlight several key future directions that are envisioned to help in achieving highly dependable DL-based systems. ",
keywords = "Accelerator, Aging, Architecture, Cost, Deep Learning, Deep Neural Networks, Dependability, DL, DNNs, Efficiency, Faults, Manufacturing Defects, Permanent Faults, Reliability, Resilience, Robustness, Soft Errors, Systems, Yield",
author = "Hanif, {Muhammad Abdullah} and Muhammad Shafique",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE. Copyright: Copyright 2020 Elsevier B.V., All rights reserved.; 26th IEEE International Symposium on On-Line Testing and Robust System Design, IOLTS 2020 ; Conference date: 13-07-2020 Through 16-07-2020",
year = "2020",
month = jul,
doi = "10.1109/IOLTS50870.2020.9159734",
language = "English (US)",
series = "Proceedings - 2020 26th IEEE International Symposium on On-Line Testing and Robust System Design, IOLTS 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "Proceedings - 2020 26th IEEE International Symposium on On-Line Testing and Robust System Design, IOLTS 2020",
}