use of org.apache.spark.api.java.function.Function2 in project incubator-sdap-mudrod by apache.
the class SessionStatistic method processSessionInParallel.
public void processSessionInParallel() throws InterruptedException, IOException {
List<String> sessions = this.getSessions();
JavaRDD<String> sessionRDD = spark.sc.parallelize(sessions, partition);
int sessionCount = 0;
sessionCount = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
@Override
public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
List<Integer> sessionNums = new ArrayList<>();
sessionNums.add(0);
while (arg0.hasNext()) {
String s = arg0.next();
Integer sessionNum = processSession(tmpES, s);
sessionNums.add(sessionNum);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return sessionNums.iterator();
}
}).reduce(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer a, Integer b) {
return a + b;
}
});
LOG.info("Final Session count: {}", Integer.toString(sessionCount));
}
use of org.apache.spark.api.java.function.Function2 in project learning-spark by databricks.
the class BasicAvg method main.
public static void main(String[] args) throws Exception {
String master;
if (args.length > 0) {
master = args[0];
} else {
master = "local";
}
JavaSparkContext sc = new JavaSparkContext(master, "basicavg", System.getenv("SPARK_HOME"), System.getenv("JARS"));
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
Function2<AvgCount, Integer, AvgCount> addAndCount = new Function2<AvgCount, Integer, AvgCount>() {
@Override
public AvgCount call(AvgCount a, Integer x) {
a.total_ += x;
a.num_ += 1;
return a;
}
};
Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>() {
@Override
public AvgCount call(AvgCount a, AvgCount b) {
a.total_ += b.total_;
a.num_ += b.num_;
return a;
}
};
AvgCount initial = new AvgCount(0, 0);
AvgCount result = rdd.aggregate(initial, addAndCount, combine);
System.out.println(result.avg());
sc.stop();
}
use of org.apache.spark.api.java.function.Function2 in project learning-spark by databricks.
the class BasicAvgWithKryo method main.
public static void main(String[] args) throws Exception {
String master;
if (args.length > 0) {
master = args[0];
} else {
master = "local";
}
SparkConf conf = new SparkConf().setMaster(master).setAppName("basicavgwithkyro");
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.set("spark.kryo.registrator", AvgRegistrator.class.getName());
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
Function2<AvgCount, Integer, AvgCount> addAndCount = new Function2<AvgCount, Integer, AvgCount>() {
@Override
public AvgCount call(AvgCount a, Integer x) {
a.total_ += x;
a.num_ += 1;
return a;
}
};
Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>() {
@Override
public AvgCount call(AvgCount a, AvgCount b) {
a.total_ += b.total_;
a.num_ += b.num_;
return a;
}
};
AvgCount initial = new AvgCount(0, 0);
AvgCount result = rdd.aggregate(initial, addAndCount, combine);
System.out.println(result.avg());
}
Aggregations