use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.
the class MapReduceTestUtils method testSparkWordCount.
public static void testSparkWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
// Datastore now has to be a Hadoop based datastore
((DataStoreBase<String, WebPage>) inStore).setConf(conf);
((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
// create input
WebPageDataCreator.createWebPageData(inStore);
// run Spark
SparkWordCount wordCount = new SparkWordCount();
wordCount.wordCount(inStore, outStore);
// assert results
HashMap<String, Integer> actualCounts = new HashMap<>();
for (String content : WebPageDataCreator.CONTENTS) {
if (content != null) {
for (String token : content.split(" ")) {
Integer count = actualCounts.get(token);
if (count == null)
count = 0;
actualCounts.put(token, ++count);
}
}
}
for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
assertTokenCount(outStore, entry.getKey(), entry.getValue());
}
}
use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.
the class MapReduceTestUtils method testMapReduceSerialization.
public static void testMapReduceSerialization(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, WebPage> outStore) throws Exception {
// Datastore now has to be a Hadoop based datastore
((DataStoreBase<String, WebPage>) inStore).setConf(conf);
((DataStoreBase<String, WebPage>) outStore).setConf(conf);
// create input
WebPage page = WebPage.newBuilder().build();
page.setUrl("TestURL");
List<CharSequence> content = new ArrayList<CharSequence>();
content.add("parsed1");
content.add("parsed2");
page.setParsedContent(content);
page.setContent(ByteBuffer.wrap("content".getBytes(Charset.defaultCharset())));
inStore.put("key1", page);
inStore.flush();
// expected
WebPage expectedPage = WebPage.newBuilder().build();
expectedPage.setUrl("hola");
List<CharSequence> expectedContent = new ArrayList<CharSequence>();
expectedContent.add("parsed1");
expectedContent.add("parsed2");
expectedPage.setParsedContent(expectedContent);
expectedPage.setContent(ByteBuffer.wrap("content".getBytes(Charset.defaultCharset())));
// run the job
MapReduceSerialization mapReduceSerialization = new MapReduceSerialization(conf);
mapReduceSerialization.mapReduceSerialization(inStore, outStore);
Query<String, WebPage> outputQuery = outStore.newQuery();
Result<String, WebPage> serializationResult = outStore.execute(outputQuery);
while (serializationResult.next()) {
assertEquals(expectedPage, serializationResult.get());
}
}
use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.
the class MapReduceTestUtils method testFlinkWordCount.
public static void testFlinkWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
// Datastore now has to be a Hadoop based datastore
((DataStoreBase<String, WebPage>) inStore).setConf(conf);
((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
// create input
WebPageDataCreator.createWebPageData(inStore);
// run Flink Job
FlinkWordCount flinkWordCount = new FlinkWordCount();
flinkWordCount.wordCount(inStore, outStore, conf);
// assert results
HashMap<String, Integer> actualCounts = new HashMap<>();
for (String content : WebPageDataCreator.CONTENTS) {
if (content != null) {
for (String token : content.split(" ")) {
Integer count = actualCounts.get(token);
if (count == null)
count = 0;
actualCounts.put(token, ++count);
}
}
}
for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
assertTokenCount(outStore, entry.getKey(), entry.getValue());
}
}
use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.
the class MapReduceTestUtils method testWordCount.
public static void testWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
// Datastore now has to be a Hadoop based datastore
((DataStoreBase<String, WebPage>) inStore).setConf(conf);
((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
// create input
WebPageDataCreator.createWebPageData(inStore);
// run the job
WordCount wordCount = new WordCount(conf);
wordCount.wordCount(inStore, outStore);
// assert results
HashMap<String, Integer> actualCounts = new HashMap<>();
for (String content : WebPageDataCreator.CONTENTS) {
if (content != null) {
for (String token : content.split(" ")) {
Integer count = actualCounts.get(token);
if (count == null)
count = 0;
actualCounts.put(token, ++count);
}
}
}
for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
assertTokenCount(outStore, entry.getKey(), entry.getValue());
}
}
Aggregations