@inproceedings{90c0f4722fc34894a50229956824d3f3,
title = "Optimal distance estimation between compressed data series",
abstract = "Most real-world data contain repeated or periodic pat- Terns. This suggests that they can be effectively represented and compressed using only a few coefficients of an appro- priate complete orthogonal basis (e.g., Fourier, Wavelets, Karhunen-L{\`o}eve expansion or Principal Components). In the face of ever increasing data repositories and given that most mining operations are distance-based, it is vital to perform accurate distance estimation directly on the compressed data. However, distance estimation when the data are represented using different sets of coefficients is still a largely unexplored area. This work studies the optimiza- Tion problems related to obtaining the tightest lower/upper bound on the distance based on the available information. In particular, we consider the problem where a distinct set of coefficients is maintained for each sequence, and the L2- norm of the compression error is recorded. We establish the properties of optimal solutions, and leverage the theoretical analysis to develop a fast algorithm to obtain an exact so- lution to the problem. The suggested solution provides the tightest provable estimation of the L2-norm or the correla- Tion, and executes at least two order of magnitudes faster than a numerical solution based on convex optimization. The contributions of this work extend beyond the purview of periodic data, as our methods are applicable to any sequen- Tial or high-dimensional data as well as to any orthogonal data transformation used for the underlying data compres- sion scheme.",
keywords = "Compression, Distance estimation, Fourier, KKT conditions, Orthogonal bases, Tine series, Water-filling algorithm, Wavelets",
author = "Freris, {Nikolaos M.} and Michail Vlachos and Kozat, {Suleyman S.}",
year = "2012",
doi = "10.1137/1.9781611972825.30",
language = "English (US)",
isbn = "9781611972320",
series = "Proceedings of the 12th SIAM International Conference on Data Mining, SDM 2012",
publisher = "Society for Industrial and Applied Mathematics Publications",
pages = "343--354",
booktitle = "Proceedings of the 12th SIAM International Conference on Data Mining, SDM 2012",
note = "12th SIAM International Conference on Data Mining, SDM 2012 ; Conference date: 26-04-2012 Through 28-04-2012",
}