@inproceedings{15e5fa5b2cfb478e949f3b3ba8f891ec,
title = "GPU-accelerated similarity self-join for multi-dimensional data",
abstract = "The similarity self-join finds all objects in a dataset that are within a search distance, ∈, of each other. As such, the self-join is a building block of many algorithms. In high dimensions, indexing structures become increasingly ineffective at pruning the search, making the self-join challenging to compute efficiently. We advance a GPU-accelerated self-join algorithm targeted towards high dimensional data. The massive parallelism afforded by the GPU and high aggregate memory bandwidth makes the architecture well-suited for data-intensive workloads. We leverage a grid-based GPU-tailored index to perform range queries, and propose the following optimizations: (i) a trade-off between candidate set filtering and index search overhead by exploiting properties of the index; (ii) reordering the data based on variance in each dimension to improve the filtering power of the index; and (iii) a pruning method for reducing the number of expensive distance calculations. Our algorithm generally outperforms a parallel CPU state-of-the-art approach.",
keywords = "GPGPU, High-dimensional data, In-memory data-base, Index structure, Query optimization, Self-join",
author = "Michael Gowanlock and Ben Karsin",
note = "Publisher Copyright: {\textcopyright} 2019 ACM.; 15th International Workshop on Data Management on New Hardware, DaMoN 2019, Held with ACM SIGMOD/PODS 2019 ; Conference date: 01-07-2019",
year = "2019",
month = jul,
day = "1",
doi = "10.1145/3329785.3329920",
language = "English (US)",
series = "Proceedings of the ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems",
publisher = "Association for Computing Machinery",
booktitle = "15th International Workshop on Data Management on New Hardware, DaMoN 2019",
}