BibTeX

@mastersthesis{CNBNTSVMTP18,
	author	 = {Max Lübbering},
	title	 = {{Comparing Naïve Bayesian Networks to Support Vector Machines to predict Stock Prices Based on Press Release Sentiment}},
	advisors	 = {Julian Kunkel and Patricio Farrell},
	year	 = {2018},
	month	 = {12},
	school	 = {Universität Hamburg},
	howpublished	 = {{Online \url{{{:research:theses:max_l__bbering_comparing_na_ve_bayesian_networks_to_support_vector_machines_to_predict_stock_prices_based_on_press_release_sentiment.pdf|Thesis}}}}},
	type	 = {Master's Thesis},
	abstract	 = {In this master thesis, we trained and evaluated two models, namely a naïve Bayesian network and a support vector machine, to predict stock price trends based on count vectorized press releases published in the after hours. Additionally, we introduced a trivial classifier to put the results of the other two into perspective. The stock price trend prediction was solved in two steps: In the first step, we built for each of the three algorithms a classifier in order to predict the impact of a press release. For training and evaluation, every press release was assigned the label impact, if the stock price had changed at least 8\% from the exchange closing time to exchange opening time plus an offset of 5 hours and no impact, otherwise. In the second step, all press releases with no impact were discarded. The remaining ones were reassigned to the classes sentiment and no sentiment based on the direction of their impact. Afterwards, we built a model for each of the three algorithms to predict the sentiment of press releases. After applying grid search on an extensive grid, the impact models of the naïve Bayesian network, the support vector machine and trivial classifier had an accuracy of 76\%, 78\% and 77\%, respectively. The balanced dataset contained 919 training samples and 231 samples in the holdout set. These high accuracies show that im- pact prediction is per se possible, even though count vectorization destroys all the semantics. The sentiment models of the naïve Bayesian network, the support vector machine and trivial classifier had an accuracy of 47\%, 53\% and 53\%, respectively. The balanced training set contained 426 samples and the holdout set 108. The weak results reveal that sentiment prediction is far more complex than impact prediction and cannot be captured by the word frequency in a document. As part of our press release exploration, we demonstrated that over night press releases cause inefficiencies in the market for the entire next trading day. As a consequence, we provided an example that clearly contradicts with the theoretical efficient market hypothesis. If our models become more reliable, these inefficiencies can be exploited.},
}