BibTeX

@inproceedings{RIOHAWSEGA17,
	author	 = {Eugen Betke and Julian Kunkel},
	title	 = {{Real-Time I/O-Monitoring of HPC Applications with SIOX, Elasticsearch, Grafana and FUSE}},
	year	 = {2017},
	booktitle	 = {{High Performance Computing: ISC High Performance 2017 International Workshops, DRBSD, ExaComm, HCPM, HPC-IODC, IWOPH, IXPUG, P^3MA, VHPC, Visualization at Scale, WOPSSS}},
	editor	 = {Julian Kunkel and Rio Yokota and Michaela Taufer and John Shalf},
	publisher	 = {Springer},
	series	 = {Lecture Notes in Computer Science},
	number	 = {10524},
	pages	 = {158--170},
	conference	 = {ISC High Performance},
	location	 = {Frankfurt, Germany},
	isbn	 = {978-3-319-67629-6},
	doi	 = {https://doi.org/10.1007/978-3-319-67630-2},
	abstract	 = {The starting point for our work was a demand for an overview of application’s I/O behavior, that provides information about the usage of our HPC “Mistral”. We suspect that some applications are running using inefficient I/O patterns, and probably, are wasting a significant amount of machine hours. To tackle the problem, we focus on detection of poor I/O performance, identification of these applications, and description of I/O behavior. Instead of gathering I/O statistics from global system variables, like many other monitoring tools do, in our approach statistics come directly from I/O interfaces POSIX, MPI, HDF5 and NetCDF. For interception of I/O calls we use an instrumentation library that is dynamically linked with LD_PRELOAD at program startup. The HPC on-line monitoring framework is built on top of open source software: Grafana, SIOX, Elasticsearch and FUSE. This framework collects I/O statistics from applications and mount points. The latter is used for non-intrusive monitoring of virtual memory allocated with mmap(), i.e., no code adaption is necessary. The framework is evaluated showing its effectiveness and critically discussed.},
}