author	 = {Julian Kunkel and Nabeeh Jumah and Anastasiia Novikova and Thomas Ludwig and Hisashi Yashiro and Naoya Maruyama and Mohamed Wahib and John Thuburn},
	title	 = {{AIMES: Advanced Computation and I/O Methods for Earth-System Simulations}},
	year	 = {2020},
	month	 = {07},
	editor	 = {Hans-Joachim Bungartz and Severin Reiz and Benjamin Uekermann and Philipp Neumann and Wolfgang E. Nagel},
	publisher	 = {Springer International Publishing},
	journal	 = {Lecture Notes in Computer Science},
	series	 = {Volume 7, Number 2},
	pages	 = {61-102},
	isbn	 = {978-3-030-47956-5},
	issn	 = {2197-7100},
	doi	 = {},
	abstract	 = {Dealing with extreme scale Earth-system models is challenging from the computer science perspective, as the required computing power and storage capacity are steadily increasing. Scientists perform runs with growing resolution or aggregate results from many similar smaller-scale runs with slightly different initial conditions (the so-called ensemble runs). In the fifth Coupled Model Intercomparison Project (CMIP5), the produced datasets require more than three Petabytes of storage and the compute and storage requirements are increasing significantly for CMIP6. Climate scientists across the globe are developing next-generation models based on improved numerical formulation leading to grids that are discretized in alternative forms such as an icosahedral (geodesic) grid. The developers of these models face similar problems in scaling, maintaining and optimizing code. Performance portability and the maintainability of code are key concerns of scientists as, compared to industry projects, model code is continuously revised and extended to incorporate further levels of detail. This leads to a rapidly growing code base that is rarely refactored. However, code modernization is important to maintain productivity of the scientist working with the code and for utilizing performance provided by modern and future architectures. The need for performance optimization is motivated by the evolution of the parallel architecture landscape from homogeneous flat machines to heterogeneous combinations of processors with deep memory hierarchy. Notably, the rise of many-core, throughput-oriented accelerators, such as GPUs, requires non-trivial code changes at minimum and, even worse, may necessitate a substantial rewrite of the existing codebase. At the same time, the code complexity increases the difficulty for computer scientists and vendors to understand and optimize the code for a given system. Storing the products of climate predictions requires a large storage and archival system which is expensive. Often, scientists restrict the number of scientific variables and write interval to keep the costs balanced. Compression algorithms can reduce the costs significantly but can also increase the scientific yield of simulation runs. In the AIMES project, we addressed the key issues of programmability, computational efficiency and I/O limitations that are common in next-generation icosahedral earth-system models. The project focused on the separation of concerns between domain scientist, computational scientists, and computer scientists.},