@inproceedings{da39b13d372e40d18ef32bf531c91c79,
title = "SBanTEM: A Novel Methodology for Sparse Band Tensors as Soft-Error Mitigation in Sparse Convolutional Neural Networks",
abstract = "Over the last two decades, Convolutional Neural Networks (CNNs) have become common in a wide variety of tasks, including safety-critical ones such as autonomous driving, leading to optimizations such as Sparse Convolutional Neural Networks (SparseCNNs). Scaling technologica nodes has led to an exponential increase in transient faults affecting the systems, generating critical soft errors. We introduce SBanTEM a novel methodology for employing sparse band tensors as soft-error mitigation in SparseCNNs. SBanTEM includes a novel mitigation technique, employing band tensors, as they do not require using indices for storing data. We employ progressive reduction of the bandwidth of the selected tensors, allowing the network to train in-between successive prunings, and compensat accuracy loss. Additionally, we implement a Genetic Algorithm (GA) to optimally select the tensors bandwidths in the network. We analyze the resilience of many state-of-the-art CNNs on multiple datasets, showin that resilience is much lower for SparseCNNs, and using SBanTEM makes them as resilient as standard CNNs. SBanTEM's code and result is available at github.com/Alexei95/SBanTEM to boost reproducibility and reusability of the implementation.",
keywords = "band matrix, deep neura networks, fault injection, fault tolerance, pruning, resilience, sparse",
author = "Alessio Colucci and Andreas Steininger and Muhammad Shafique",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 30th IEEE International Symposium on On-line Testing and Robust System Design, IOLTS 2024 ; Conference date: 03-07-2024 Through 05-07-2024",
year = "2024",
doi = "10.1109/IOLTS60994.2024.10616070",
language = "English (US)",
series = "Proceedings - 2024 IEEE 30th International Symposium on On-line Testing and Robust System Design, IOLTS 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "Proceedings - 2024 IEEE 30th International Symposium on On-line Testing and Robust System Design, IOLTS 2024",
}