author	 = {Raul Torres and Julian Kunkel and Manuel Dolz and Thomas Ludwig},
	title	 = {{A Novel String Representation and Kernel Function for the Comparison of I/O Access Patterns}},
	year	 = {2017},
	booktitle	 = {{International Conference on Parallel Computing Technologies}},
	editor	 = {Victor Malyshkin},
	publisher	 = {Springer},
	series	 = {Lecture Notes in Computer Science},
	number	 = {10421},
	pages	 = {500--512},
	conference	 = {PaCT},
	location	 = {Nizhni Novgorod, Russia},
	isbn	 = {978-3-319-62932-2},
	doi	 = {},
	abstract	 = {Parallel I/O access patterns act as fingerprints of a parallel program. In order to extract meaningful information from these patterns, they have to be represented appropriately. Due to the fact that string objects can be easily compared using Kernel Methods, a conversion to a weighted string representation is proposed in this paper, together with a novel string kernel function called Kast Spectrum Kernel. The similarity matrices, obtained after applying the mentioned kernel over a set of examples from a real application, were analyzed using Kernel Principal Component Analysis (Kernel PCA) and Hierarchical Clustering. The evaluation showed that 2 out of 4 I/O access pattern groups were completely identified, while the other 2 conformed a single cluster due to the intrinsic similarity of their members. The proposed strategy can be promisingly applied to other similarity problems involving tree-like structured data.},