use of co.cask.cdap.etl.spark.function.AggregatorAggregateFunction in project cdap by caskdata.
the class RDDCollection method aggregate.
@Override
public SparkCollection<Tuple2<Boolean, Object>> aggregate(StageInfo stageInfo, @Nullable Integer partitions) {
PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageInfo, sec);
PairFlatMapFunc<T, Object, T> groupByFunction = new AggregatorGroupByFunction<>(pluginFunctionContext);
PairFlatMapFunction<T, Object, T> sparkGroupByFunction = Compat.convert(groupByFunction);
JavaPairRDD<Object, T> keyedCollection = rdd.flatMapToPair(sparkGroupByFunction);
JavaPairRDD<Object, Iterable<T>> groupedCollection = partitions == null ? keyedCollection.groupByKey() : keyedCollection.groupByKey(partitions);
FlatMapFunc<Tuple2<Object, Iterable<T>>, Tuple2<Boolean, Object>> aggregateFunction = new AggregatorAggregateFunction<>(pluginFunctionContext);
FlatMapFunction<Tuple2<Object, Iterable<T>>, Tuple2<Boolean, Object>> sparkAggregateFunction = Compat.convert(aggregateFunction);
return wrap(groupedCollection.flatMap(sparkAggregateFunction));
}
Aggregations