BibTeX
@inproceedings{USFIUIBLPL21, author = {Radita Liem and Dmytro Povaliaiev and Jay Lofstead and Julian Kunkel and Christian Terboven}, title = {{User-Centric System Fault Identification Using IO500 Benchmark}}, year = {2021}, month = {12}, booktitle = {{In 2021 IEEE/ACM Sixth International Parallel Data Systems Workshop (PDSW)}}, editor = {}, publisher = {IEEE}, pages = {35-40}, conference = {International Parallel Data Systems Workshop (PDSW)}, location = {St. Louis}, doi = {https://doi.org/10.1109/PDSW54622.2021.00011}, abstract = {I/O performance in a multi-user environment is difficult to predict. Users do not know what I/O performance to expect when running and tuning applications. We propose to use the IO500 benchmark as a way to guide user expectations on their application’s performance and to aid identifying root causes of their I/O problems that might come from the system. Our experiments describe how we manage user expectation with IO500 and provide a mechanism for system fault identification. This work also provides us with information of the tail latency problem that needs to be addressed and granular information about the impact of I/O technique choices (POSIX and MPI-IO).}, url = {http://www.pdsw.org/pdsw21/index.shtml}, }