@inproceedings{0ccb30120fd74bd3a67f00461eb8aa45,
title = "Exploiting Variant-Based Parallelism for Data Mining of Space Weather Phenomena",
abstract = "This paper studies a form of parallelism termed variant-based parallelism, which exploits commonalities and reuse among variant computations in order to improve multithreading scalability. The problem is motivated by space weather studies that aim to identify changes in the Earth's ionosphere caused by auroral activity, tsunamis, and earthquakes. Today it is common to execute cluster algorithm variants with different parameters in order to determine which ones best explain phenomena in empirical data. We propose a novel approach and a set of optimizations to maximize throughput in such clustering algorithms. This is achieved by executing multiple clustering algorithm variants in parallel and developing efficient approaches to concurrently cluster data and maximize the reuse of results from completed variants. We present evaluations on real-world space weather datasets with up to 5 million ionospheric total electron content data points as well as synthetic datasets with up to a million data points. Results show a 1101% performance improvement due to indexing tailored for variant-based clustering, and a 2209% performance improvement when applying all of our proposed optimizations. Our optimizations enable new approaches in computer-aided discovery and could enable the short run times required for early warning systems for natural hazards.",
keywords = "Computer-Aided Discovery, DB-SCAN, Data Mining, Parallel Clustering",
author = "Michael Gowanlock and Blair, {David M.} and Victor Pankratius",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 30th IEEE International Parallel and Distributed Processing Symposium, IPDPS 2016 ; Conference date: 23-05-2016 Through 27-05-2016",
year = "2016",
month = jul,
day = "18",
doi = "10.1109/IPDPS.2016.10",
language = "English (US)",
series = "Proceedings - 2016 IEEE 30th International Parallel and Distributed Processing Symposium, IPDPS 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "760--769",
booktitle = "Proceedings - 2016 IEEE 30th International Parallel and Distributed Processing Symposium, IPDPS 2016",
}