BibTeX

@inproceedings{TIOTBWCJIP20,
	author	 = {Eugen Betke and Julian Kunkel},
	title	 = {{The Importance of Temporal Behavior when Classifying Job IO Patterns Using Machine Learning Techniques}},
	year	 = {2020},
	month	 = {06},
	booktitle	 = {{High Performance Computing: ISC High Performance 2020 International Workshops, Revised Selected Papers}},
	editor	 = {Heike Jagode and Hartwig Anzt and Guido Juckeland and Hatem Ltaief},
	publisher	 = {Springer},
	series	 = {Lecture Notes in Computer Science},
	number	 = {12151},
	pages	 = {191-205},
	conference	 = {ISC HPC},
	location	 = {Frankfurt, Germany},
	isbn	 = {978-3-030-59851-8},
	issn	 = {1611-3349},
	doi	 = {https://doi.org/10.1007/978-3-030-59851-8_12},
	abstract	 = {Every day, supercomputers execute 1000s of jobs with different characteristics. Data centers monitor the behavior of jobs to support the users and improve the infrastructure, for instance, by optimizing jobs or by determining guidelines for the next procurement. The classification of jobs into groups that express similar run-time behavior aids this analysis as it reduces the number of representative jobs to look into. It is state of the practice to investigate job similarity by looking into job profiles that summarize the dynamics of job execution into one dimension of statistics and neglect the temporal behavior. In this work, we utilize machine learning techniques to cluster and classify parallel jobs based on the similarity in their temporal IO behavior to highlight the importance of temporal behavior when comparing jobs. Our contribution is the qualitative and quantitative evaluation of different IO characterizations and similarity measurements that work toward the development of a suitable clustering algorithm. We explore IO characteristics from monitoring data of one million parallel jobs and cluster them into groups of similar jobs. Therefore, the time series of various IO statistics is converted into features using different similarity metrics that customize the classification. We discuss conventional ML techniques that are applied to job profiles and contrast this with the analysis of time series data where we apply the Levenshtein distance as a distance metrics. While the employed Levenshtein algorithms aren’t yet optimal, the results suggest that temporal behavior is key to identify related pattern.},
}