Search in sources :

Example 6 with StageStatisticsCollector

use of co.cask.cdap.etl.common.StageStatisticsCollector in project cdap by caskdata.

the class MapReduceTransformExecutorFactory method getTransformation.

@SuppressWarnings("unchecked")
private <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
    DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(arguments, taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
    String stageName = stageSpec.getName();
    String pluginType = stageSpec.getPluginType();
    StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
    TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
    StageStatisticsCollector collector = isPipelineContainsCondition ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
    if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
        BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
        batchAggregator.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        } else {
            return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        }
    } else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
        BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
        batchJoiner.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
        } else {
            return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName), collector);
        }
    }
    Transformation transformation = getInitializedTransformation(stageSpec);
    boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
    transformation = isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation;
    // we emit metrics for records into alert publishers when the actual alerts are published,
    // not when we write the alerts to the temporary dataset
    String recordsInMetric = AlertPublisher.PLUGIN_TYPE.equals(pluginType) ? null : Constants.Metrics.RECORDS_IN;
    return new TrackedTransform<>(transformation, stageMetrics, recordsInMetric, Constants.Metrics.RECORDS_OUT, taskContext.getDataTracer(stageName), collector);
}
Also used : BatchJoinerRuntimeContext(co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext) TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) Transformation(co.cask.cdap.etl.api.Transformation) LimitingTransform(co.cask.cdap.etl.common.preview.LimitingTransform) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BatchJoiner(co.cask.cdap.etl.api.batch.BatchJoiner) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) StageStatisticsCollector(co.cask.cdap.etl.common.StageStatisticsCollector) BatchRuntimeContext(co.cask.cdap.etl.api.batch.BatchRuntimeContext) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Aggregations

StageStatisticsCollector (co.cask.cdap.etl.common.StageStatisticsCollector)6 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)3 NoopStageStatisticsCollector (co.cask.cdap.etl.common.NoopStageStatisticsCollector)3 HashMap (java.util.HashMap)3 StageMetrics (co.cask.cdap.etl.api.StageMetrics)2 BatchJoinerRuntimeContext (co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext)2 DefaultStageMetrics (co.cask.cdap.etl.common.DefaultStageMetrics)2 PipelinePluginContext (co.cask.cdap.etl.common.plugin.PipelinePluginContext)2 SparkStageStatisticsCollector (co.cask.cdap.etl.spark.SparkStageStatisticsCollector)2 StageSpec (co.cask.cdap.etl.spec.StageSpec)2 Map (java.util.Map)2 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)2 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)1 Alert (co.cask.cdap.etl.api.Alert)1 ErrorRecord (co.cask.cdap.etl.api.ErrorRecord)1 TransformContext (co.cask.cdap.etl.api.TransformContext)1 Transformation (co.cask.cdap.etl.api.Transformation)1 BatchJoiner (co.cask.cdap.etl.api.batch.BatchJoiner)1 BatchRuntimeContext (co.cask.cdap.etl.api.batch.BatchRuntimeContext)1 SparkCompute (co.cask.cdap.etl.api.batch.SparkCompute)1