Search in sources :

Example 1 with StageMetrics

use of co.cask.cdap.etl.api.StageMetrics in project cdap by caskdata.

the class MapReduceTransformExecutorFactory method getMultiOutputTransform.

private <IN, ERROR> TrackedMultiOutputTransform<IN, ERROR> getMultiOutputTransform(StageSpec stageSpec) throws Exception {
    String stageName = stageSpec.getName();
    DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(arguments, taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
    SplitterTransform<IN, ERROR> splitterTransform = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
    TransformContext transformContext = createRuntimeContext(stageSpec);
    splitterTransform.initialize(transformContext);
    StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
    TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
    StageStatisticsCollector collector = isPipelineContainsCondition ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
    return new TrackedMultiOutputTransform<>(splitterTransform, stageMetrics, taskContext.getDataTracer(stageName), collector);
}
Also used : NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TrackedMultiOutputTransform(co.cask.cdap.etl.common.TrackedMultiOutputTransform) TransformContext(co.cask.cdap.etl.api.TransformContext) NoopStageStatisticsCollector(co.cask.cdap.etl.common.NoopStageStatisticsCollector) StageStatisticsCollector(co.cask.cdap.etl.common.StageStatisticsCollector) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Example 2 with StageMetrics

use of co.cask.cdap.etl.api.StageMetrics in project cdap by caskdata.

the class MapReduceTransformExecutorFactory method getTransformation.

@SuppressWarnings("unchecked")
@Override
protected TrackedTransform getTransformation(StageInfo stageInfo) throws Exception {
    DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(taskContext.getWorkflowToken(), taskContext.getRuntimeArguments(), taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
    String stageName = stageInfo.getName();
    String pluginType = stageInfo.getPluginType();
    StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
    if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
        BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
        batchAggregator.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
        } else {
            return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
        }
    } else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
        BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
        BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
        batchJoiner.initialize(runtimeContext);
        if (isMapPhase) {
            return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
        } else {
            return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName));
        }
    }
    Transformation transformation = getInitializedTransformation(stageInfo);
    boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
    return new TrackedTransform(isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation, stageMetrics, taskContext.getDataTracer(stageName));
}
Also used : BatchJoinerRuntimeContext(co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext) TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) Transformation(co.cask.cdap.etl.api.Transformation) LimitingTransform(co.cask.cdap.etl.common.preview.LimitingTransform) BatchJoiner(co.cask.cdap.etl.api.batch.BatchJoiner) BatchRuntimeContext(co.cask.cdap.etl.api.batch.BatchRuntimeContext) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Example 3 with StageMetrics

use of co.cask.cdap.etl.api.StageMetrics in project cdap by caskdata.

the class ETLWorker method initializeTransforms.

private void initializeTransforms(WorkerContext context, Map<String, TransformDetail> transformDetailMap, PipelinePhase pipeline) throws Exception {
    Set<StageInfo> transformInfos = pipeline.getStagesOfType(Transform.PLUGIN_TYPE);
    Preconditions.checkArgument(transformInfos != null);
    tranformIdToDatasetName = new HashMap<>(transformInfos.size());
    for (StageInfo transformInfo : transformInfos) {
        String transformName = transformInfo.getName();
        try {
            Transform<?, ?> transform = context.newPluginInstance(transformName);
            ;
            transform = new WrappedTransform<>(transform, Caller.DEFAULT);
            WorkerRealtimeContext transformContext = new WorkerRealtimeContext(context, metrics, new TxLookupProvider(context), transformInfo);
            LOG.debug("Transform Class : {}", transform.getClass().getName());
            transform.initialize(transformContext);
            StageMetrics stageMetrics = new DefaultStageMetrics(metrics, transformName);
            transformDetailMap.put(transformName, new TransformDetail(new TrackedTransform<>(transform, stageMetrics, context.getDataTracer(transformName)), pipeline.getStageOutputs(transformName)));
            if (transformInfo.getErrorDatasetName() != null) {
                tranformIdToDatasetName.put(transformName, transformInfo.getErrorDatasetName());
            }
        } catch (InstantiationException e) {
            LOG.error("Unable to instantiate Transform", e);
            Throwables.propagate(e);
        }
    }
}
Also used : TrackedTransform(co.cask.cdap.etl.common.TrackedTransform) StageInfo(co.cask.cdap.etl.planner.StageInfo) TxLookupProvider(co.cask.cdap.etl.common.TxLookupProvider) TransformDetail(co.cask.cdap.etl.common.TransformDetail) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Example 4 with StageMetrics

use of co.cask.cdap.etl.api.StageMetrics in project cdap by caskdata.

the class RDDCollection method publishAlerts.

@Override
public void publishAlerts(StageSpec stageSpec, StageStatisticsCollector collector) throws Exception {
    PluginFunctionContext pluginFunctionContext = new PluginFunctionContext(stageSpec, sec, collector);
    AlertPublisher alertPublisher = pluginFunctionContext.createPlugin();
    PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec);
    AlertPublisherContext alertPublisherContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, sec.getMessagingContext(), sec.getAdmin());
    alertPublisher.initialize(alertPublisherContext);
    StageMetrics stageMetrics = new DefaultStageMetrics(sec.getMetrics(), stageSpec.getName());
    TrackedIterator<Alert> trackedAlerts = new TrackedIterator<>(((JavaRDD<Alert>) rdd).collect().iterator(), stageMetrics, Constants.Metrics.RECORDS_IN);
    alertPublisher.publish(trackedAlerts);
    alertPublisher.destroy();
}
Also used : PluginFunctionContext(co.cask.cdap.etl.spark.function.PluginFunctionContext) AlertPublisher(co.cask.cdap.etl.api.AlertPublisher) SparkPipelineRuntime(co.cask.cdap.etl.spark.SparkPipelineRuntime) PipelineRuntime(co.cask.cdap.etl.common.PipelineRuntime) SparkPipelineRuntime(co.cask.cdap.etl.spark.SparkPipelineRuntime) TrackedIterator(co.cask.cdap.etl.common.TrackedIterator) Alert(co.cask.cdap.etl.api.Alert) DefaultAlertPublisherContext(co.cask.cdap.etl.common.DefaultAlertPublisherContext) AlertPublisherContext(co.cask.cdap.etl.api.AlertPublisherContext) DefaultAlertPublisherContext(co.cask.cdap.etl.common.DefaultAlertPublisherContext) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 5 with StageMetrics

use of co.cask.cdap.etl.api.StageMetrics in project cdap by caskdata.

the class StreamingAlertPublishFunction method call.

@Override
public Void call(JavaRDD<Alert> data, Time batchTime) throws Exception {
    MacroEvaluator evaluator = new DefaultMacroEvaluator(new BasicArguments(sec), batchTime.milliseconds(), sec.getSecureStore(), sec.getNamespace());
    PluginContext pluginContext = new SparkPipelinePluginContext(sec.getPluginContext(), sec.getMetrics(), stageSpec.isStageLoggingEnabled(), stageSpec.isProcessTimingEnabled());
    String stageName = stageSpec.getName();
    AlertPublisher alertPublisher = pluginContext.newPluginInstance(stageName, evaluator);
    PipelineRuntime pipelineRuntime = new SparkPipelineRuntime(sec, batchTime.milliseconds());
    AlertPublisherContext alertPublisherContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, sec.getMessagingContext(), sec.getAdmin());
    alertPublisher.initialize(alertPublisherContext);
    StageMetrics stageMetrics = new DefaultStageMetrics(sec.getMetrics(), stageName);
    TrackedIterator<Alert> trackedAlerts = new TrackedIterator<>(data.collect().iterator(), stageMetrics, Constants.Metrics.RECORDS_IN);
    alertPublisher.publish(trackedAlerts);
    alertPublisher.destroy();
    return null;
}
Also used : MacroEvaluator(co.cask.cdap.api.macro.MacroEvaluator) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) AlertPublisher(co.cask.cdap.etl.api.AlertPublisher) PipelineRuntime(co.cask.cdap.etl.common.PipelineRuntime) SparkPipelineRuntime(co.cask.cdap.etl.spark.SparkPipelineRuntime) SparkPipelinePluginContext(co.cask.cdap.etl.spark.plugin.SparkPipelinePluginContext) PluginContext(co.cask.cdap.api.plugin.PluginContext) SparkPipelineRuntime(co.cask.cdap.etl.spark.SparkPipelineRuntime) TrackedIterator(co.cask.cdap.etl.common.TrackedIterator) SparkPipelinePluginContext(co.cask.cdap.etl.spark.plugin.SparkPipelinePluginContext) DefaultMacroEvaluator(co.cask.cdap.etl.common.DefaultMacroEvaluator) Alert(co.cask.cdap.etl.api.Alert) BasicArguments(co.cask.cdap.etl.common.BasicArguments) DefaultAlertPublisherContext(co.cask.cdap.etl.common.DefaultAlertPublisherContext) AlertPublisherContext(co.cask.cdap.etl.api.AlertPublisherContext) DefaultAlertPublisherContext(co.cask.cdap.etl.common.DefaultAlertPublisherContext) StageMetrics(co.cask.cdap.etl.api.StageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics) DefaultStageMetrics(co.cask.cdap.etl.common.DefaultStageMetrics)

Aggregations

StageMetrics (co.cask.cdap.etl.api.StageMetrics)8 DefaultStageMetrics (co.cask.cdap.etl.common.DefaultStageMetrics)8 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)5 Alert (co.cask.cdap.etl.api.Alert)3 AlertPublisher (co.cask.cdap.etl.api.AlertPublisher)3 AlertPublisherContext (co.cask.cdap.etl.api.AlertPublisherContext)3 BatchJoiner (co.cask.cdap.etl.api.batch.BatchJoiner)3 DefaultAlertPublisherContext (co.cask.cdap.etl.common.DefaultAlertPublisherContext)3 PipelineRuntime (co.cask.cdap.etl.common.PipelineRuntime)3 TrackedIterator (co.cask.cdap.etl.common.TrackedIterator)3 TrackedTransform (co.cask.cdap.etl.common.TrackedTransform)3 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)2 Transformation (co.cask.cdap.etl.api.Transformation)2 BatchJoinerRuntimeContext (co.cask.cdap.etl.api.batch.BatchJoinerRuntimeContext)2 BatchRuntimeContext (co.cask.cdap.etl.api.batch.BatchRuntimeContext)2 PostAction (co.cask.cdap.etl.api.batch.PostAction)2 NoopStageStatisticsCollector (co.cask.cdap.etl.common.NoopStageStatisticsCollector)2 StageStatisticsCollector (co.cask.cdap.etl.common.StageStatisticsCollector)2 LimitingTransform (co.cask.cdap.etl.common.preview.LimitingTransform)2 SparkPipelineRuntime (co.cask.cdap.etl.spark.SparkPipelineRuntime)2