@inproceedings{5a8aaec24be34b5bac6769c8e105692e,
title = "Poster: Git blame who?: Stylistic authorship attribution of small, incomplete source code fragments",
abstract = "Program authorship attribution has implications for the privacy of programmers who wish to contribute code anonymously. While previous work has shown that complete files that are individually authored can be attributed, these efforts have focused on ideal data sets such as the Google Code Jam data. We explore the problem of attribution {"}in the wild,{"} examining source code obtained from open source version control systems, and investigate if and how such contributions can be attributed to their authors, either individually or on a per-Account basis. In this work we show that accounts belonging to open source contributors containing short, incomplete, and typically uncompilable fragments can be effectively attributed.",
keywords = "Machine learning, Source code authorship attribution, Stylometry",
author = "Edwin Dauber and Aylin Caliskan and Richard Harang and Rachel Greenstadt",
note = "Publisher Copyright: {\textcopyright} 2018 Authors.; 40th ACM/IEEE International Conference on Software Engineering, ICSE 2018 ; Conference date: 27-05-2018 Through 03-06-2018",
year = "2018",
month = may,
day = "27",
doi = "10.1145/3183440.3195007",
language = "English (US)",
series = "Proceedings - International Conference on Software Engineering",
publisher = "IEEE Computer Society",
pages = "356--357",
booktitle = "Proceedings - International Conference on Software Engineering",
}