BibTeX

@inproceedings{BDBIWACSLK19,
	author	 = {Bryan N. Lawrence and Julian Kunkel and Jonathan Churchill and Neil Massey and Philip Kershaw and Matt Pritchard},
	title	 = {{Beating data bottlenecks in weather and climate science}},
	year	 = {2019},
	month	 = {01},
	booktitle	 = {{Extreme Data Workshop 2018}},
	editor	 = {Martin Schultz and Dirk Pleiter and Peter Bauer},
	publisher	 = {Forschungszentrum Jülich},
	series	 = {Schriften des Forschungszentrums Jülich IAS Series},
	number	 = {40},
	pages	 = {31--36},
	conference	 = {Extreme Data Workshop},
	location	 = {Jülich, Germany},
	isbn	 = {978-3-95806-392-1},
	issn	 = {1868-8489},
	abstract	 = {The data volumes produced by simulation and observation are large, and growing rapidly. In the case of simulation, plans for future modelling programmes require complicated orchestration of data, and anticipate large user communities. “Download and work at home” is no longer practical for many use-cases. In the case of simulation, these issues are exacerbated by users who want simulation data at grid point resolution instead of at the resolution resolved by the mathematics, and/or who design numerical experiments without knowledge of the storage costs. There is no simple solution to these problems: user education, smarter compression, and better use of tiered storage and smarter workflows are all necessary – but far from sufficient. In this paper, we introduce two approaches to addressing (some) of these data bottlenecks: dedicated data analysis platforms, and smarter storage software. We provide a brief introduction to the JASMIN data storage and analysis facility, and some of the storage tools and approaches being developed by the ESiWACE project. In doing so, we describe some of our observations of real world data handling problems at scale, from the generic performance of file systems to the difficulty of optimising both volume stored and performance of workflows. We use these examples to motivate the two-pronged approach of smarter hardware and smarter software – but recognise that data bottlenecks may yet limit the aspirations of our science.},
	url	 = {https://pdfs.semanticscholar.org/9881/ed9d9e16cb70fba9456fb0905bf28c450ce0.pdf#page=38},
}