Search in sources :

Example 1 with MetricDatum

use of org.apache.gora.tutorial.log.generated.MetricDatum in project gora by apache.

the class LogAnalytics method run.

@Override
public int run(String[] args) throws Exception {
    DataStore<Long, Pageview> inStore;
    DataStore<String, MetricDatum> outStore;
    Configuration conf = new Configuration();
    if (args.length > 0) {
        String dataStoreClass = args[0];
        inStore = DataStoreFactory.getDataStore(dataStoreClass, Long.class, Pageview.class, conf);
        if (args.length > 1) {
            dataStoreClass = args[1];
        }
        outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, MetricDatum.class, conf);
    } else {
        inStore = DataStoreFactory.getDataStore(Long.class, Pageview.class, conf);
        outStore = DataStoreFactory.getDataStore(String.class, MetricDatum.class, conf);
    }
    Job job = createJob(inStore, outStore, 3);
    boolean success = job.waitForCompletion(true);
    inStore.close();
    outStore.close();
    log.info("Log completed with {}", (success ? "success" : "failure"));
    return success ? 0 : 1;
}
Also used : Pageview(org.apache.gora.tutorial.log.generated.Pageview) Configuration(org.apache.hadoop.conf.Configuration) MetricDatum(org.apache.gora.tutorial.log.generated.MetricDatum) Job(org.apache.hadoop.mapreduce.Job)

Example 2 with MetricDatum

use of org.apache.gora.tutorial.log.generated.MetricDatum in project gora by apache.

the class LogAnalyticsSpark method run.

public int run(String[] args) throws Exception {
    DataStore<Long, Pageview> inStore;
    DataStore<String, MetricDatum> outStore;
    Configuration hadoopConf = new Configuration();
    if (args.length > 0) {
        String dataStoreClass = args[0];
        inStore = DataStoreFactory.getDataStore(dataStoreClass, Long.class, Pageview.class, hadoopConf);
        if (args.length > 1) {
            dataStoreClass = args[1];
        }
        outStore = DataStoreFactory.getDataStore(dataStoreClass, String.class, MetricDatum.class, hadoopConf);
    } else {
        inStore = DataStoreFactory.getDataStore(Long.class, Pageview.class, hadoopConf);
        outStore = DataStoreFactory.getDataStore(String.class, MetricDatum.class, hadoopConf);
    }
    //Spark engine initialization
    GoraSparkEngine<Long, Pageview> goraSparkEngine = new GoraSparkEngine<>(Long.class, Pageview.class);
    SparkConf sparkConf = new SparkConf().setAppName("Gora Spark Integration Application").setMaster("local");
    Class[] c = new Class[1];
    c[0] = inStore.getPersistentClass();
    sparkConf.registerKryoClasses(c);
    //
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    JavaPairRDD<Long, Pageview> goraRDD = goraSparkEngine.initialize(sc, inStore);
    long count = goraRDD.count();
    log.info("Total Log Count: {}", count);
    JavaRDD<Tuple2<Tuple2<String, Long>, Long>> mappedGoraRdd = goraRDD.values().map(mapFunc);
    JavaPairRDD<String, MetricDatum> reducedGoraRdd = JavaPairRDD.fromJavaRDD(mappedGoraRdd).reduceByKey(redFunc).mapToPair(metricFunc);
    log.info("MetricDatum count: {}", reducedGoraRdd.count());
    //Print output for debug purpose
    /*
    Map<String, MetricDatum> metricDatumMap = reducedGoraRdd.collectAsMap();
    for (String key : metricDatumMap.keySet()) {
      System.out.println(key);
    }
    */
    //
    //write output to datastore
    Configuration sparkHadoopConf = goraSparkEngine.generateOutputConf(outStore);
    reducedGoraRdd.saveAsNewAPIHadoopDataset(sparkHadoopConf);
    //
    inStore.close();
    outStore.close();
    log.info("Log completed with success");
    return 1;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) GoraSparkEngine(org.apache.gora.spark.GoraSparkEngine) MetricDatum(org.apache.gora.tutorial.log.generated.MetricDatum) Pageview(org.apache.gora.tutorial.log.generated.Pageview) Tuple2(scala.Tuple2) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf)

Aggregations

MetricDatum (org.apache.gora.tutorial.log.generated.MetricDatum)2 Pageview (org.apache.gora.tutorial.log.generated.Pageview)2 Configuration (org.apache.hadoop.conf.Configuration)2 GoraSparkEngine (org.apache.gora.spark.GoraSparkEngine)1 Job (org.apache.hadoop.mapreduce.Job)1 SparkConf (org.apache.spark.SparkConf)1 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1 Tuple2 (scala.Tuple2)1