Search in sources :

Example 11 with Function2

use of org.apache.spark.api.java.function.Function2 in project incubator-sdap-mudrod by apache.

the class SessionStatistic method processSessionInParallel.

public void processSessionInParallel() throws InterruptedException, IOException {
    List<String> sessions = this.getSessions();
    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessions, partition);
    int sessionCount = 0;
    sessionCount = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {

        @Override
        public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
            ESDriver tmpES = new ESDriver(props);
            tmpES.createBulkProcessor();
            List<Integer> sessionNums = new ArrayList<>();
            sessionNums.add(0);
            while (arg0.hasNext()) {
                String s = arg0.next();
                Integer sessionNum = processSession(tmpES, s);
                sessionNums.add(sessionNum);
            }
            tmpES.destroyBulkProcessor();
            tmpES.close();
            return sessionNums.iterator();
        }
    }).reduce(new Function2<Integer, Integer, Integer>() {

        @Override
        public Integer call(Integer a, Integer b) {
            return a + b;
        }
    });
    LOG.info("Final Session count: {}", Integer.toString(sessionCount));
}
Also used : ESDriver(org.apache.sdap.mudrod.driver.ESDriver) Function2(org.apache.spark.api.java.function.Function2) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 12 with Function2

use of org.apache.spark.api.java.function.Function2 in project learning-spark by databricks.

the class BasicAvg method main.

public static void main(String[] args) throws Exception {
    String master;
    if (args.length > 0) {
        master = args[0];
    } else {
        master = "local";
    }
    JavaSparkContext sc = new JavaSparkContext(master, "basicavg", System.getenv("SPARK_HOME"), System.getenv("JARS"));
    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
    Function2<AvgCount, Integer, AvgCount> addAndCount = new Function2<AvgCount, Integer, AvgCount>() {

        @Override
        public AvgCount call(AvgCount a, Integer x) {
            a.total_ += x;
            a.num_ += 1;
            return a;
        }
    };
    Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>() {

        @Override
        public AvgCount call(AvgCount a, AvgCount b) {
            a.total_ += b.total_;
            a.num_ += b.num_;
            return a;
        }
    };
    AvgCount initial = new AvgCount(0, 0);
    AvgCount result = rdd.aggregate(initial, addAndCount, combine);
    System.out.println(result.avg());
    sc.stop();
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Function2(org.apache.spark.api.java.function.Function2)

Example 13 with Function2

use of org.apache.spark.api.java.function.Function2 in project learning-spark by databricks.

the class BasicAvgWithKryo method main.

public static void main(String[] args) throws Exception {
    String master;
    if (args.length > 0) {
        master = args[0];
    } else {
        master = "local";
    }
    SparkConf conf = new SparkConf().setMaster(master).setAppName("basicavgwithkyro");
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    conf.set("spark.kryo.registrator", AvgRegistrator.class.getName());
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
    Function2<AvgCount, Integer, AvgCount> addAndCount = new Function2<AvgCount, Integer, AvgCount>() {

        @Override
        public AvgCount call(AvgCount a, Integer x) {
            a.total_ += x;
            a.num_ += 1;
            return a;
        }
    };
    Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>() {

        @Override
        public AvgCount call(AvgCount a, AvgCount b) {
            a.total_ += b.total_;
            a.num_ += b.num_;
            return a;
        }
    };
    AvgCount initial = new AvgCount(0, 0);
    AvgCount result = rdd.aggregate(initial, addAndCount, combine);
    System.out.println(result.avg());
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Function2(org.apache.spark.api.java.function.Function2) SparkConf(org.apache.spark.SparkConf)

Aggregations

Function2 (org.apache.spark.api.java.function.Function2)13 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)10 Tuple2 (scala.Tuple2)7 ArrayList (java.util.ArrayList)6 SparkConf (org.apache.spark.SparkConf)5 FlatMapFunction (org.apache.spark.api.java.function.FlatMapFunction)5 Function (org.apache.spark.api.java.function.Function)4 PairFunction (org.apache.spark.api.java.function.PairFunction)4 IOException (java.io.IOException)3 Map (java.util.Map)3 ESDriver (org.apache.sdap.mudrod.driver.ESDriver)3 JavaRDD (org.apache.spark.api.java.JavaRDD)3 Arrays (java.util.Arrays)2 Iterator (java.util.Iterator)2 List (java.util.List)2 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)2 TxRunnable (co.cask.cdap.api.TxRunnable)1 DatasetContext (co.cask.cdap.api.data.DatasetContext)1 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)1 Schema (co.cask.cdap.api.data.schema.Schema)1