author	 = {Tien Duc Dinh},
	title	 = {{Evaluierung von Hadoop}},
	advisors	 = {Julian Kunkel and Olga Mordvinova},
	year	 = {2009},
	month	 = {12},
	institution	 = {{Ruprecht-Karls-Universität Heidelberg}},
	howpublished	 = {{Online \url{}}},
	abstract	 = {Hadoop is an open-source, Java-based programming framework that supports the processing of large data sets in a distributed computing environment. It was inspired by Google MapReduce and Google File System (GFS) papers. Hadoop is now a top level Apache project, being built and used by a community of contributors from all over the world, since it’s easy to install, configure and can be run on many platforms supporting Java. The Hadoop framework is currently used by major players including Google, Yahoo and IBM,  largely for applications involving search engines and advertising. The major contributions of this work are a Hadoop performance evaluation on writing/reading and full understanding about the MapReduce concept as well as the distribution of processes. Section 3 describes the HDFS and MapReduce concept with a small example. Section 4 describes the installation and configuration a HDFS cluster. Section 5 describes the test preparation towards our cluster enviroment. Section 6 has performance measurements based on many test cases and a performance comparision between the HDFS and local file system on the testing cluster.},
	url	 = {},