@inproceedings{d1e84ed8335d4af5a5a6c0a4bad89e27,
title = "Improved methods for static index pruning",
abstract = "Static Index Pruning is a performance optimization technique for search engines that attempts to identify and remove index postings that are unlikely to lead to top results for typical user queries. The goal is to obtain a much smaller inverted index that can quickly return results that are (almost) as good as those for the unpruned index. We make two contributions: First, we improve on previous results for pruned index size through a careful analysis of both document and query distribution characteristics. We derive an initial model based on unigram probabilities that obtains gains over previous work in some cases, and a bigram-based approach that achieves some additional improvements. We also devise a simple method for generating query logs in the absence of real-life queries, useful in modeling top results. Our second contribution is to explore, and compare to previously proposed approaches that perform pruning based on how often documents or postings appeared in top positions in the past.",
keywords = "index, search, static pruning",
author = "Wei Jiang and Juan Rodriguez and Torsten Suel",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 4th IEEE International Conference on Big Data, Big Data 2016 ; Conference date: 05-12-2016 Through 08-12-2016",
year = "2016",
doi = "10.1109/BigData.2016.7840661",
language = "English (US)",
series = "Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "686--695",
editor = "Ronay Ak and George Karypis and Yinglong Xia and Hu, {Xiaohua Tony} and Yu, {Philip S.} and James Joshi and Lyle Ungar and Ling Liu and Aki-Hiro Sato and Toyotaro Suzumura and Sudarsan Rachuri and Rama Govindaraju and Weijia Xu",
booktitle = "Proceedings - 2016 IEEE International Conference on Big Data, Big Data 2016",
}