Search in sources :

Example 6 with BatchJoinerRuntimeContext

use of co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext in project cdap by caskdata.

the class MapReduceTransformExecutorFactory method getTransformation.

@SuppressWarnings("unchecked")
private <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
    DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(arguments, taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
    String stageName = stageSpec.getName();
    String pluginType = stageSpec.getPluginType();
    StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
    TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
    StageStatisticsCollector collector = isPipelineContainsCondition ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
    if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
        BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
        batchAggregator.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        } else {
            return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        }
    } else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
        BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
        batchJoiner.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        } else {
            return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName), collector);
        }
    }
    Transformation transformation = getInitializedTransformation(stageSpec);
    boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
    transformation = isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation;
    // we emit metrics for records into alert publishers when the actual alerts are published,
    // not when we write the alerts to the temporary dataset
    String recordsInMetric = AlertPublisher.PLUGIN_TYPE.equals(pluginType) ? null : Constants.Metrics.RECORDS_IN;
    return new TrackedTransform<>(transformation, stageMetrics, recordsInMetric, Constants.Metrics.RECORDS_OUT, taskContext.getDataTracer(stageName), collector);
}
Also used : BatchJoinerRuntimeContext(co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext) TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) Transformation(co.cask.cdap.etl.api.Transformation) LimitingTransform(co.cask.cdap.etl.common.preview.LimitingTransform) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BatchJoiner(co.cask.cdap.etl.api.batch.BatchJoiner) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) StageStatisticsCollector(co.cask.cdap.etl.common.StageStatisticsCollector) BatchRuntimeContext(co.cask.cdap.etl.api.batch.BatchRuntimeContext) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Aggregations

BatchJoinerRuntimeContext (co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext)6 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)4 BatchJoiner (co.cask.cdap.etl.api.batch.BatchJoiner)3 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)2 ErrorRecord (co.cask.cdap.etl.api.ErrorRecord)2 StageMetrics (co.cask.cdap.etl.api.StageMetrics)2 Transformation (co.cask.cdap.etl.api.Transformation)2 BatchRuntimeContext (co.cask.cdap.etl.api.batch.BatchRuntimeContext)2 Windower (co.cask.cdap.etl.api.streaming.Windower)2 DefaultStageMetrics (co.cask.cdap.etl.common.DefaultStageMetrics)2 NoopStageStatisticsCollector (co.cask.cdap.etl.common.NoopStageStatisticsCollector)2 StageStatisticsCollector (co.cask.cdap.etl.common.StageStatisticsCollector)2 TrackedTransform (co.cask.cdap.etl.common.TrackedTransform)2 LimitingTransform (co.cask.cdap.etl.common.preview.LimitingTransform)2 BatchSinkFunction (co.cask.cdap.etl.spark.function.BatchSinkFunction)2 JoinFlattenFunction (co.cask.cdap.etl.spark.function.JoinFlattenFunction)2 LeftJoinFlattenFunction (co.cask.cdap.etl.spark.function.LeftJoinFlattenFunction)2 OuterJoinFlattenFunction (co.cask.cdap.etl.spark.function.OuterJoinFlattenFunction)2 PluginFunctionContext (co.cask.cdap.etl.spark.function.PluginFunctionContext)2 HashMap (java.util.HashMap)2