Search in sources :

Example 1 with DataStoreBase

use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.

the class MapReduceTestUtils method testSparkWordCount.

public static void testSparkWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
    // Datastore now has to be a Hadoop based datastore
    ((DataStoreBase<String, WebPage>) inStore).setConf(conf);
    ((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
    // create input
    WebPageDataCreator.createWebPageData(inStore);
    // run Spark
    SparkWordCount wordCount = new SparkWordCount();
    wordCount.wordCount(inStore, outStore);
    // assert results
    HashMap<String, Integer> actualCounts = new HashMap<>();
    for (String content : WebPageDataCreator.CONTENTS) {
        if (content != null) {
            for (String token : content.split(" ")) {
                Integer count = actualCounts.get(token);
                if (count == null)
                    count = 0;
                actualCounts.put(token, ++count);
            }
        }
    }
    for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
        assertTokenCount(outStore, entry.getKey(), entry.getValue());
    }
}
Also used : SparkWordCount(org.apache.gora.examples.spark.SparkWordCount) HashMap(java.util.HashMap) DataStoreBase(org.apache.gora.store.impl.DataStoreBase) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with DataStoreBase

use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.

the class MapReduceTestUtils method testMapReduceSerialization.

public static void testMapReduceSerialization(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, WebPage> outStore) throws Exception {
    // Datastore now has to be a Hadoop based datastore
    ((DataStoreBase<String, WebPage>) inStore).setConf(conf);
    ((DataStoreBase<String, WebPage>) outStore).setConf(conf);
    // create input
    WebPage page = WebPage.newBuilder().build();
    page.setUrl("TestURL");
    List<CharSequence> content = new ArrayList<CharSequence>();
    content.add("parsed1");
    content.add("parsed2");
    page.setParsedContent(content);
    page.setContent(ByteBuffer.wrap("content".getBytes(Charset.defaultCharset())));
    inStore.put("key1", page);
    inStore.flush();
    // expected
    WebPage expectedPage = WebPage.newBuilder().build();
    expectedPage.setUrl("hola");
    List<CharSequence> expectedContent = new ArrayList<CharSequence>();
    expectedContent.add("parsed1");
    expectedContent.add("parsed2");
    expectedPage.setParsedContent(expectedContent);
    expectedPage.setContent(ByteBuffer.wrap("content".getBytes(Charset.defaultCharset())));
    // run the job
    MapReduceSerialization mapReduceSerialization = new MapReduceSerialization(conf);
    mapReduceSerialization.mapReduceSerialization(inStore, outStore);
    Query<String, WebPage> outputQuery = outStore.newQuery();
    Result<String, WebPage> serializationResult = outStore.execute(outputQuery);
    while (serializationResult.next()) {
        assertEquals(expectedPage, serializationResult.get());
    }
}
Also used : WebPage(org.apache.gora.examples.generated.WebPage) MapReduceSerialization(org.apache.gora.examples.mapreduce.MapReduceSerialization) ArrayList(java.util.ArrayList) DataStoreBase(org.apache.gora.store.impl.DataStoreBase)

Example 3 with DataStoreBase

use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.

the class MapReduceTestUtils method testFlinkWordCount.

public static void testFlinkWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
    // Datastore now has to be a Hadoop based datastore
    ((DataStoreBase<String, WebPage>) inStore).setConf(conf);
    ((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
    // create input
    WebPageDataCreator.createWebPageData(inStore);
    // run Flink Job
    FlinkWordCount flinkWordCount = new FlinkWordCount();
    flinkWordCount.wordCount(inStore, outStore, conf);
    // assert results
    HashMap<String, Integer> actualCounts = new HashMap<>();
    for (String content : WebPageDataCreator.CONTENTS) {
        if (content != null) {
            for (String token : content.split(" ")) {
                Integer count = actualCounts.get(token);
                if (count == null)
                    count = 0;
                actualCounts.put(token, ++count);
            }
        }
    }
    for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
        assertTokenCount(outStore, entry.getKey(), entry.getValue());
    }
}
Also used : HashMap(java.util.HashMap) FlinkWordCount(org.apache.gora.examples.flink.FlinkWordCount) DataStoreBase(org.apache.gora.store.impl.DataStoreBase) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with DataStoreBase

use of org.apache.gora.store.impl.DataStoreBase in project gora by apache.

the class MapReduceTestUtils method testWordCount.

public static void testWordCount(Configuration conf, DataStore<String, WebPage> inStore, DataStore<String, TokenDatum> outStore) throws Exception {
    // Datastore now has to be a Hadoop based datastore
    ((DataStoreBase<String, WebPage>) inStore).setConf(conf);
    ((DataStoreBase<String, TokenDatum>) outStore).setConf(conf);
    // create input
    WebPageDataCreator.createWebPageData(inStore);
    // run the job
    WordCount wordCount = new WordCount(conf);
    wordCount.wordCount(inStore, outStore);
    // assert results
    HashMap<String, Integer> actualCounts = new HashMap<>();
    for (String content : WebPageDataCreator.CONTENTS) {
        if (content != null) {
            for (String token : content.split(" ")) {
                Integer count = actualCounts.get(token);
                if (count == null)
                    count = 0;
                actualCounts.put(token, ++count);
            }
        }
    }
    for (Map.Entry<String, Integer> entry : actualCounts.entrySet()) {
        assertTokenCount(outStore, entry.getKey(), entry.getValue());
    }
}
Also used : HashMap(java.util.HashMap) SparkWordCount(org.apache.gora.examples.spark.SparkWordCount) WordCount(org.apache.gora.examples.mapreduce.WordCount) FlinkWordCount(org.apache.gora.examples.flink.FlinkWordCount) DataStoreBase(org.apache.gora.store.impl.DataStoreBase) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

DataStoreBase (org.apache.gora.store.impl.DataStoreBase)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 FlinkWordCount (org.apache.gora.examples.flink.FlinkWordCount)2 SparkWordCount (org.apache.gora.examples.spark.SparkWordCount)2 ArrayList (java.util.ArrayList)1 WebPage (org.apache.gora.examples.generated.WebPage)1 MapReduceSerialization (org.apache.gora.examples.mapreduce.MapReduceSerialization)1 WordCount (org.apache.gora.examples.mapreduce.WordCount)1