@inproceedings{8a7754e5aa204732b0c8c2e38bf182a2,
title = "Finding frequent items in data streams",
abstract = "We present a 1-pass algorithm for estimating the most frequent items in a data stream using very limited storage space. Our method relies on a novel data structure called a count sketch, which allows us to estimate the frequencies of all the items in the stream. Our algorithm achieves better space bounds than the previous best known algorithms for this problem for many natural distributions on the item frequencies. In addition, our algorithm leads directly to a 2-pass algorithm for the problem of estimating the items with the largest (absolute) change in frequency between two data streams. To our knowledge, this problem has not been previously studied in the literature.",
author = "Moses Charikar and Kevin Chen and Martin Farach-Colton",
year = "2002",
doi = "10.1007/3-540-45465-9_59",
language = "English (US)",
isbn = "3540438645",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "693--703",
editor = "Peter Widmayer and Stephan Eidenbenz and Francisco Triguero and Rafael Morales and Ricardo Conejo and Matthew Hennessy",
booktitle = "Automata, Languages and Programming - 29th International Colloquium, ICALP 2002, Proceedings",
note = "29th International Colloquium on Automata, Languages, and Programming, ICALP 2002 ; Conference date: 08-07-2002 Through 13-07-2002",
}