use of org.apache.gora.jet.generated.ResultPageView in project gora by apache.
the class JetTest method testNewJetSource.
@Test
public void testNewJetSource() throws Exception {
DataStore<Long, Pageview> dataStoreIn;
dataStoreIn = DataStoreFactory.getDataStore(Long.class, Pageview.class, utility.getConfiguration());
dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
query = dataStoreIn.newQuery();
query.setStartKey(0L);
query.setEndKey(55L);
JetEngine<Long, Pageview, Long, ResultPageView> jetEngine = new JetEngine<>();
BatchSource<JetInputOutputFormat<Long, Pageview>> fileSource = jetEngine.createDataSource(dataStoreIn, query);
Pipeline p = Pipeline.create();
p.drawFrom(fileSource).filter(item -> item.getValue().getIp().toString().equals("88.240.129.183")).map(e -> {
ResultPageView resultPageView = new ResultPageView();
resultPageView.setIp(e.getValue().getIp());
resultPageView.setTimestamp(e.getValue().getTimestamp());
resultPageView.setUrl(e.getValue().getUrl());
return new JetInputOutputFormat<Long, ResultPageView>(e.getValue().getTimestamp(), resultPageView);
}).drainTo(jetEngine.createDataSink(dataStoreOut));
JetInstance jet = Jet.newJetInstance();
Jet.newJetInstance();
try {
jet.newJob(p).join();
} finally {
Jet.shutdownAll();
}
Query<Long, ResultPageView> query = dataStoreOut.newQuery();
Result<Long, ResultPageView> result = query.execute();
int noOfOutputRecords = 0;
String ip = "";
while (result.next()) {
noOfOutputRecords++;
ip = result.get().getIp().toString();
assertEquals("88.240.129.183", ip);
}
assertEquals(2, noOfOutputRecords);
}
use of org.apache.gora.jet.generated.ResultPageView in project gora by apache.
the class JetTest method jetWordCount.
@Test
public void jetWordCount() throws GoraException {
dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
Query<Long, ResultPageView> query = dataStoreOut.newQuery();
JetEngine<Long, ResultPageView, Long, ResultPageView> jetEngine = new JetEngine<>();
Pattern delimiter = Pattern.compile("\\W+");
Pipeline p = Pipeline.create();
p.drawFrom(jetEngine.createDataSource(dataStoreOut, query)).flatMap(e -> traverseArray(delimiter.split(e.getValue().getUrl().toString()))).filter(word -> !word.isEmpty()).groupingKey(wholeItem()).aggregate(counting()).drainTo(Sinks.map("COUNTS"));
JetInstance jet = Jet.newJetInstance();
;
jet.newJob(p).join();
IMap<String, Long> counts = jet.getMap("COUNTS");
assertEquals(3L, (long) counts.get("the"));
}
use of org.apache.gora.jet.generated.ResultPageView in project gora by apache.
the class JetTest method insertData.
@BeforeClass
public static void insertData() throws Exception {
utility = new HBaseTestingUtility();
utility.startMiniCluster();
dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
ResultPageView resultPageView = new ResultPageView();
resultPageView.setIp("88.240.129.183");
resultPageView.setTimestamp(123L);
resultPageView.setUrl("I am the the one");
ResultPageView resultPageView1 = new ResultPageView();
resultPageView1.setIp("87.240.129.170");
resultPageView1.setTimestamp(124L);
resultPageView1.setUrl("How are you");
ResultPageView resultPageView2 = new ResultPageView();
resultPageView1.setIp("88.240.129.183");
resultPageView1.setTimestamp(124L);
resultPageView1.setUrl("This is the jet engine");
dataStoreOut.put(1L, resultPageView);
dataStoreOut.put(2L, resultPageView1);
dataStoreOut.put(3L, resultPageView2);
dataStoreOut.flush();
}
Aggregations