Search in sources :

Example 6 with NoopStageStatisticsCollector

use of co.cask.cdap.etl.common.NoopStageStatisticsCollector in project cdap by caskdata.

the class MapReduceTransformExecutorFactory method getTransformation.

@SuppressWarnings("unchecked")
private <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
    DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(arguments, taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
    String stageName = stageSpec.getName();
    String pluginType = stageSpec.getPluginType();
    StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
    TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
    StageStatisticsCollector collector = isPipelineContainsCondition ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
    if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
        BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
        batchAggregator.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        } else {
            return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        }
    } else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
        BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
        batchJoiner.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        } else {
            return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName), collector);
        }
    }
    Transformation transformation = getInitializedTransformation(stageSpec);
    boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
    transformation = isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation;
    // we emit metrics for records into alert publishers when the actual alerts are published,
    // not when we write the alerts to the temporary dataset
    String recordsInMetric = AlertPublisher.PLUGIN_TYPE.equals(pluginType) ? null : Constants.Metrics.RECORDS_IN;
    return new TrackedTransform<>(transformation, stageMetrics, recordsInMetric, Constants.Metrics.RECORDS_OUT, taskContext.getDataTracer(stageName), collector);
}
Also used : BatchJoinerRuntimeContext(co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext) TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) Transformation(co.cask.cdap.etl.api.Transformation) LimitingTransform(co.cask.cdap.etl.common.preview.LimitingTransform) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BatchJoiner(co.cask.cdap.etl.api.batch.BatchJoiner) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) StageStatisticsCollector(co.cask.cdap.etl.common.StageStatisticsCollector) BatchRuntimeContext(co.cask.cdap.etl.api.batch.BatchRuntimeContext) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Aggregations

NoopStageStatisticsCollector (co.cask.cdap.etl.common.NoopStageStatisticsCollector)6 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)4 StageStatisticsCollector (co.cask.cdap.etl.common.StageStatisticsCollector)3 PluginFunctionContext (co.cask.cdap.etl.spark.function.PluginFunctionContext)3 TxRunnable (co.cask.cdap.api.TxRunnable)2 DatasetContext (co.cask.cdap.api.data.DatasetContext)2 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)2 StageMetrics (co.cask.cdap.etl.api.StageMetrics)2 BatchJoinerRuntimeContext (co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext)2 BasicArguments (co.cask.cdap.etl.common.BasicArguments)2 DefaultStageMetrics (co.cask.cdap.etl.common.DefaultStageMetrics)2 PipelineRuntime (co.cask.cdap.etl.common.PipelineRuntime)2 SparkPipelineRuntime (co.cask.cdap.etl.spark.SparkPipelineRuntime)2 BatchSinkFunction (co.cask.cdap.etl.spark.function.BatchSinkFunction)2 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)2 PluginContext (co.cask.cdap.api.plugin.PluginContext)1 Alert (co.cask.cdap.etl.api.Alert)1 ErrorRecord (co.cask.cdap.etl.api.ErrorRecord)1 TransformContext (co.cask.cdap.etl.api.TransformContext)1 Transformation (co.cask.cdap.etl.api.Transformation)1