Search in sources :

Example 1 with ResultPageView

use of org.apache.gora.jet.generated.ResultPageView in project gora by apache.

the class JetTest method testNewJetSource.

@Test
public void testNewJetSource() throws Exception {
    DataStore<Long, Pageview> dataStoreIn;
    dataStoreIn = DataStoreFactory.getDataStore(Long.class, Pageview.class, utility.getConfiguration());
    dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
    query = dataStoreIn.newQuery();
    query.setStartKey(0L);
    query.setEndKey(55L);
    JetEngine<Long, Pageview, Long, ResultPageView> jetEngine = new JetEngine<>();
    BatchSource<JetInputOutputFormat<Long, Pageview>> fileSource = jetEngine.createDataSource(dataStoreIn, query);
    Pipeline p = Pipeline.create();
    p.drawFrom(fileSource).filter(item -> item.getValue().getIp().toString().equals("88.240.129.183")).map(e -> {
        ResultPageView resultPageView = new ResultPageView();
        resultPageView.setIp(e.getValue().getIp());
        resultPageView.setTimestamp(e.getValue().getTimestamp());
        resultPageView.setUrl(e.getValue().getUrl());
        return new JetInputOutputFormat<Long, ResultPageView>(e.getValue().getTimestamp(), resultPageView);
    }).drainTo(jetEngine.createDataSink(dataStoreOut));
    JetInstance jet = Jet.newJetInstance();
    Jet.newJetInstance();
    try {
        jet.newJob(p).join();
    } finally {
        Jet.shutdownAll();
    }
    Query<Long, ResultPageView> query = dataStoreOut.newQuery();
    Result<Long, ResultPageView> result = query.execute();
    int noOfOutputRecords = 0;
    String ip = "";
    while (result.next()) {
        noOfOutputRecords++;
        ip = result.get().getIp().toString();
        assertEquals("88.240.129.183", ip);
    }
    assertEquals(2, noOfOutputRecords);
}
Also used : AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) JetInstance(com.hazelcast.jet.JetInstance) BatchSource(com.hazelcast.jet.pipeline.BatchSource) BeforeClass(org.junit.BeforeClass) Pipeline(com.hazelcast.jet.pipeline.Pipeline) DataStore(org.apache.gora.store.DataStore) Functions.wholeItem(com.hazelcast.jet.function.Functions.wholeItem) Sinks(com.hazelcast.jet.pipeline.Sinks) Test(org.junit.Test) Pageview(org.apache.gora.jet.generated.Pageview) ResultPageView(org.apache.gora.jet.generated.ResultPageView) Result(org.apache.gora.query.Result) DataStoreFactory(org.apache.gora.store.DataStoreFactory) IMap(com.hazelcast.core.IMap) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) Query(org.apache.gora.query.Query) Traversers.traverseArray(com.hazelcast.jet.Traversers.traverseArray) Jet(com.hazelcast.jet.Jet) Pattern(java.util.regex.Pattern) GoraException(org.apache.gora.util.GoraException) Assert.assertEquals(org.junit.Assert.assertEquals) JetInstance(com.hazelcast.jet.JetInstance) Pipeline(com.hazelcast.jet.pipeline.Pipeline) Pageview(org.apache.gora.jet.generated.Pageview) ResultPageView(org.apache.gora.jet.generated.ResultPageView) Test(org.junit.Test)

Example 2 with ResultPageView

use of org.apache.gora.jet.generated.ResultPageView in project gora by apache.

the class JetTest method jetWordCount.

@Test
public void jetWordCount() throws GoraException {
    dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
    Query<Long, ResultPageView> query = dataStoreOut.newQuery();
    JetEngine<Long, ResultPageView, Long, ResultPageView> jetEngine = new JetEngine<>();
    Pattern delimiter = Pattern.compile("\\W+");
    Pipeline p = Pipeline.create();
    p.drawFrom(jetEngine.createDataSource(dataStoreOut, query)).flatMap(e -> traverseArray(delimiter.split(e.getValue().getUrl().toString()))).filter(word -> !word.isEmpty()).groupingKey(wholeItem()).aggregate(counting()).drainTo(Sinks.map("COUNTS"));
    JetInstance jet = Jet.newJetInstance();
    ;
    jet.newJob(p).join();
    IMap<String, Long> counts = jet.getMap("COUNTS");
    assertEquals(3L, (long) counts.get("the"));
}
Also used : AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) JetInstance(com.hazelcast.jet.JetInstance) BatchSource(com.hazelcast.jet.pipeline.BatchSource) BeforeClass(org.junit.BeforeClass) Pipeline(com.hazelcast.jet.pipeline.Pipeline) DataStore(org.apache.gora.store.DataStore) Functions.wholeItem(com.hazelcast.jet.function.Functions.wholeItem) Sinks(com.hazelcast.jet.pipeline.Sinks) Test(org.junit.Test) Pageview(org.apache.gora.jet.generated.Pageview) ResultPageView(org.apache.gora.jet.generated.ResultPageView) Result(org.apache.gora.query.Result) DataStoreFactory(org.apache.gora.store.DataStoreFactory) IMap(com.hazelcast.core.IMap) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) Query(org.apache.gora.query.Query) Traversers.traverseArray(com.hazelcast.jet.Traversers.traverseArray) Jet(com.hazelcast.jet.Jet) Pattern(java.util.regex.Pattern) GoraException(org.apache.gora.util.GoraException) Assert.assertEquals(org.junit.Assert.assertEquals) Pattern(java.util.regex.Pattern) ResultPageView(org.apache.gora.jet.generated.ResultPageView) JetInstance(com.hazelcast.jet.JetInstance) Pipeline(com.hazelcast.jet.pipeline.Pipeline) Test(org.junit.Test)

Example 3 with ResultPageView

use of org.apache.gora.jet.generated.ResultPageView in project gora by apache.

the class JetTest method insertData.

@BeforeClass
public static void insertData() throws Exception {
    utility = new HBaseTestingUtility();
    utility.startMiniCluster();
    dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
    ResultPageView resultPageView = new ResultPageView();
    resultPageView.setIp("88.240.129.183");
    resultPageView.setTimestamp(123L);
    resultPageView.setUrl("I am the the one");
    ResultPageView resultPageView1 = new ResultPageView();
    resultPageView1.setIp("87.240.129.170");
    resultPageView1.setTimestamp(124L);
    resultPageView1.setUrl("How are you");
    ResultPageView resultPageView2 = new ResultPageView();
    resultPageView1.setIp("88.240.129.183");
    resultPageView1.setTimestamp(124L);
    resultPageView1.setUrl("This is the jet engine");
    dataStoreOut.put(1L, resultPageView);
    dataStoreOut.put(2L, resultPageView1);
    dataStoreOut.put(3L, resultPageView2);
    dataStoreOut.flush();
}
Also used : ResultPageView(org.apache.gora.jet.generated.ResultPageView) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) BeforeClass(org.junit.BeforeClass)

Aggregations

ResultPageView (org.apache.gora.jet.generated.ResultPageView)3 HBaseTestingUtility (org.apache.hadoop.hbase.HBaseTestingUtility)3 BeforeClass (org.junit.BeforeClass)3 IMap (com.hazelcast.core.IMap)2 Jet (com.hazelcast.jet.Jet)2 JetInstance (com.hazelcast.jet.JetInstance)2 Traversers.traverseArray (com.hazelcast.jet.Traversers.traverseArray)2 AggregateOperations.counting (com.hazelcast.jet.aggregate.AggregateOperations.counting)2 Functions.wholeItem (com.hazelcast.jet.function.Functions.wholeItem)2 BatchSource (com.hazelcast.jet.pipeline.BatchSource)2 Pipeline (com.hazelcast.jet.pipeline.Pipeline)2 Sinks (com.hazelcast.jet.pipeline.Sinks)2 Pattern (java.util.regex.Pattern)2 Pageview (org.apache.gora.jet.generated.Pageview)2 Query (org.apache.gora.query.Query)2 Result (org.apache.gora.query.Result)2 DataStore (org.apache.gora.store.DataStore)2 DataStoreFactory (org.apache.gora.store.DataStoreFactory)2 GoraException (org.apache.gora.util.GoraException)2 Assert.assertEquals (org.junit.Assert.assertEquals)2