@inproceedings{5ee59234b4d2444ca8fcdde053b41e6a,
title = "Toward an analytical performance model to select between GPU and CPU Execution",
abstract = "Automating the device selection in heterogeneous computing platforms requires the modelling of performance both on CPUs and on accelerators. This work argues for the use of a hybrid analytical performance modelling approach is a practical way to build fast and efficient methods to select an appropriate target for a given computation kernel. The target selection problem has been addressed in the literature, however there has been a strong emphasis on building empirical models with machine learning techniques. We argue that the applicability of such solutions is often limited in production systems. This paper focus on the issue of building a selector to decide if an OpenMP loop nest should be executed in a CPU or in a GPU. To this end, it offers a comprehensive comparison evaluation of the difference in GPU kernel performance on devices of multiple generations of architectures. The idea is to underscore the need for accurate analytical performance models and to provide insights in the evolution of GPU accelerators. This work also highlights a drawback of existing approaches to modelling GPU performance - accurate modelling of memory coalescing characteristics. To that end, we examine a novel application of an inter-thread difference analysis that can further improve analytical models. Finally, this work presents an initial study of an OpenMP runtime framework for target-offloading target selection.",
keywords = "GPGPU, Heterogeneous Computing, Hybrid Analysis, MIC, OpenMP, Performance Model, Static Analysis",
author = "Artem Chikin and Amaral, {Jose Nelson} and Karim Ali and Ettore Tiotto",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 33rd IEEE International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2019 ; Conference date: 20-05-2019 Through 24-05-2019",
year = "2019",
month = may,
doi = "10.1109/IPDPSW.2019.00068",
language = "English (US)",
series = "Proceedings - 2019 IEEE 33rd International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "353--362",
booktitle = "Proceedings - 2019 IEEE 33rd International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2019",
}