use of org.apache.gora.examples.mapreduce.WordCount in project gora by apache.
the class MapReduceTestUtils method testWordCount.
public static void testWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
//Datastore now has to be a Hadoop based datastore
((DataStoreBase<String, WebPage>) inStore).setConf(conf);
((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
//create input
WebPageDataCreator.createWebPageData(inStore);
//run the job
WordCount wordCount = new WordCount(conf);
wordCount.wordCount(inStore, outStore);
//assert results
HashMap<String, Integer> actualCounts = new HashMap<>();
for (String content : WebPageDataCreator.CONTENTS) {
if (content != null) {
for (String token : content.split(" ")) {
Integer count = actualCounts.get(token);
if (count == null)
count = 0;
actualCounts.put(token, ++count);
}
}
}
for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
assertTokenCount(outStore, entry.getKey(), entry.getValue());
}
}
Aggregations