@article{a43e64090fa8496ea0fb5e51abb3a77e,
title = "A survey on collecting, managing, and analyzing provenance from scripts",
abstract = "Scripts are widely used to design and run scientific experiments. Scripting languages are easy to learn and use, and they allow complex tasks to be specified and executed in fewer steps than with traditional programming languages. However, they also have important limitations for reproducibility and data management. As experiments are iteratively refined, it is challenging to reason about each experiment run (or trial), to keep track of the association between trials and experiment instances as well as the differences across trials, and to connect results to specific input data and parameters. Approaches have been proposed that address these limitations by collecting, managing, and analyzing the provenance of scripts. In this article, we survey the state of the art in provenance for scripts. We have identified the approaches by following an exhaustive protocol of forward and backward literature snowballing. Based on a detailed study, we propose a taxonomy and classify the approaches using this taxonomy.",
keywords = "Analyzing, Collecting, Managing, Provenance, Scripts, Survey",
author = "Pimentel, {Jo{\~a}o Felipe} and Juliana Freire and Leonardo Murta and Vanessa Braganholo",
note = "Funding Information: This work is supported by CAPES, CNPq, FAPERJ, Moore-Sloan Data Science Environment at NYU, AT&T, DARPA, and NSF. J. F. Pimentel is a CAPES scholarship holder (PDSE/Process No. 88881.131563/2016-01). J. Freire is partially funded by the Moore-Sloan Data Science Environment at NYU, AT&T, DARPA, NSF Awards No. CNS-1229185, No. CNS-1405927, and No. CCF-1533564. Authors{\textquoteright} addresses: J. F. Pimentel, Universidade Federal Fluminense, Av. Gal. Milton Tavares de Souza, s/n. Niter{\'o}i, RJ, Brazil; email: jpimentel@ic.uff.br; J. Freire, New York University, 2 MetroTech Center, Brooklyn, New York, NY, United States of America; email: juliana.freire@nyu.edu; L. Murta, Universidade Federal Fluminense; email: leomurta@ic.uff.br; V. Braganholo, Universidade Federal Fluminense; email: vanessa@ic.uff.br. Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than ACM must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org. {\textcopyright} 2019 Association for Computing Machinery. 0360-0300/2019/06-ART47 $15.00 https://doi.org/10.1145/3311955 Publisher Copyright: {\textcopyright} 2019 Association for Computing Machinery. All rights reserved.",
year = "2019",
month = jun,
doi = "10.1145/3311955",
language = "English (US)",
volume = "52",
journal = "ACM Computing Surveys",
issn = "0360-0300",
publisher = "Association for Computing Machinery (ACM)",
number = "3",
}