use of org.apache.gora.jet.JetEngine in project gora by apache.
the class LogAnalyticsJet method main.
/**
* In the main method pageviews are fetched though the jet source connector.
* Then those are grouped by url and day. Then a counting aggregator is
* applied to calculate the aggregated daily pageviews. Then the result is
* output through the jet sink connector to a gora compatible data store.
*/
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
inStore = DataStoreFactory.getDataStore(Long.class, Pageview.class, conf);
outStore = DataStoreFactory.getDataStore(String.class, MetricDatum.class, conf);
Query<Long, Pageview> query = inStore.newQuery();
JetEngine<Long, Pageview, String, MetricDatum> jetEngine = new JetEngine<>();
Pipeline p = Pipeline.create();
p.drawFrom(jetEngine.createDataSource(inStore, query)).groupingKey(e -> e.getValue().getUrl().toString()).aggregate(groupingBy(e -> getDay(e.getValue().getTimestamp()), counting())).map(e -> {
MetricDatum metricDatum = new MetricDatum();
String url = e.getKey();
for (Map.Entry<Long, Long> item : e.getValue().entrySet()) {
long timeStamp = item.getKey();
long sum = item.getKey();
metricDatum.setTimestamp(timeStamp);
metricDatum.setMetric(sum);
}
metricDatum.setMetricDimension(url);
return new JetInputOutputFormat<String, MetricDatum>(url + "_" + "ip", metricDatum);
}).peek().drainTo(jetEngine.createDataSink(outStore));
JetInstance jet = Jet.newJetInstance();
try {
jet.newJob(p).join();
} finally {
Jet.shutdownAll();
}
}
Aggregations