@inproceedings{1187fde135014b8893a64a16aec8fcb9,
title = "G-thinker: A distributed framework for mining subgraphs in a big graph",
abstract = "Mining from a big graph those subgraphs that satisfy certain conditions is useful in many applications such as community detection and subgraph matching. These problems have a high time complexity, but existing systems to scale them are all IO-bound in execution. We propose the first truly CPU-bound distributed framework called G-thinker that adopts a user-friendly subgraph-centric vertex-pulling API for writing distributed subgraph mining algorithms. To utilize all CPU cores of a cluster, G-thinker features (1) a highly-concurrent vertex cache for parallel task access and (2) a lightweight task scheduling approach that ensures high task throughput. These designs well overlap communication with computation to minimize the CPU idle time. Extensive experiments demonstrate that G-thinker achieves orders of magnitude speedup compared even with the fastest existing subgraph-centric system, and it scales well to much larger and denser real network data. G-thinker is open-sourced at http://bit.ly/gthinker with detailed documentation.",
author = "Da Yan and Guimu Guo and {Rahman Chowdhury}, {Md Mashiur} and {Tamer Ozsu}, M. and Ku, {Wei Shinn} and Lui, {John C.S.}",
note = "Funding Information: To the best of our knowledge, G-thinker is the first truly CPU-bound graph-parallel system for subgraph mining, and it provides a user-friendly subgraph-centric programming interface based on task-based vertex pulling where users can easily write distributed subgraph mining programs. This is the first of a series of CPU-bound systems we plan to develop following our task-based T-thinker paradigm [36]. Another one is [37]. Acknowledgements. This work is partially supported by NSF OAC-1755464 (CRII), South Big Data Hub Azure Research Award, NSF IIS-1618669 (III) and ACI-1642133 (CICI), NSERC of Canada, and Hong Kong GRF 14201819. Publisher Copyright: {\textcopyright} 2020 IEEE.; 36th IEEE International Conference on Data Engineering, ICDE 2020 ; Conference date: 20-04-2020 Through 24-04-2020",
year = "2020",
month = apr,
doi = "10.1109/ICDE48307.2020.00122",
language = "English (US)",
series = "Proceedings - International Conference on Data Engineering",
publisher = "IEEE Computer Society",
pages = "1369--1380",
booktitle = "Proceedings - 2020 IEEE 36th International Conference on Data Engineering, ICDE 2020",
address = "United States",
}