Search in sources :

Example 1 with AggregatorMergePartitionFunction

use of io.cdap.cdap.etl.spark.function.AggregatorMergePartitionFunction in project cdap by caskdata.

the class BaseRDDCollection method reduceAggregate.

@Override
public SparkCollection<RecordInfo<Object>> reduceAggregate(StageSpec stageSpec, @Nullable Integer partitions, StageStatisticsCollector collector) {
    PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
    PairFlatMapFunction<T, Object, T> groupByFunction = new AggregatorReduceGroupByFunction<>(pluginFunctionContext, functionCacheFactory.newCache());
    JavaPairRDD<Object, T> keyedCollection = rdd.flatMapToPair(groupByFunction);
    Function<T, Object> initializeFunction = new AggregatorInitializeFunction<>(pluginFunctionContext, functionCacheFactory.newCache());
    Function2<Object, T, Object> mergeValueFunction = new AggregatorMergeValueFunction<>(pluginFunctionContext, functionCacheFactory.newCache());
    Function2<Object, Object, Object> mergePartitionFunction = new AggregatorMergePartitionFunction<>(pluginFunctionContext, functionCacheFactory.newCache());
    JavaPairRDD<Object, Object> groupedCollection = partitions == null ? keyedCollection.combineByKey(initializeFunction, mergeValueFunction, mergePartitionFunction) : keyedCollection.combineByKey(initializeFunction, mergeValueFunction, mergePartitionFunction, partitions);
    FlatMapFunction<Tuple2<Object, Object>, RecordInfo<Object>> postFunction = new AggregatorFinalizeFunction<>(pluginFunctionContext, functionCacheFactory.newCache());
    return wrap(groupedCollection.flatMap(postFunction));
}
Also used : RecordInfo(io.cdap.cdap.etl.common.RecordInfo) AggregatorReduceGroupByFunction(io.cdap.cdap.etl.spark.function.AggregatorReduceGroupByFunction) AggregatorInitializeFunction(io.cdap.cdap.etl.spark.function.AggregatorInitializeFunction) PluginFunctionContext(io.cdap.cdap.etl.spark.function.PluginFunctionContext) AggregatorMergeValueFunction(io.cdap.cdap.etl.spark.function.AggregatorMergeValueFunction) Tuple2(scala.Tuple2) AggregatorFinalizeFunction(io.cdap.cdap.etl.spark.function.AggregatorFinalizeFunction) AggregatorMergePartitionFunction(io.cdap.cdap.etl.spark.function.AggregatorMergePartitionFunction)

Aggregations

RecordInfo (io.cdap.cdap.etl.common.RecordInfo)1 AggregatorFinalizeFunction (io.cdap.cdap.etl.spark.function.AggregatorFinalizeFunction)1 AggregatorInitializeFunction (io.cdap.cdap.etl.spark.function.AggregatorInitializeFunction)1 AggregatorMergePartitionFunction (io.cdap.cdap.etl.spark.function.AggregatorMergePartitionFunction)1 AggregatorMergeValueFunction (io.cdap.cdap.etl.spark.function.AggregatorMergeValueFunction)1 AggregatorReduceGroupByFunction (io.cdap.cdap.etl.spark.function.AggregatorReduceGroupByFunction)1 PluginFunctionContext (io.cdap.cdap.etl.spark.function.PluginFunctionContext)1 Tuple2 (scala.Tuple2)1