use of co.cask.cdap.etl.api.Transformation in project cdap by caskdata.
the class MapReduceTransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
@Override
protected TrackedTransform getTransformation(StageInfo stageInfo) throws Exception {
DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(taskContext.getWorkflowToken(), taskContext.getRuntimeArguments(), taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
String stageName = stageInfo.getName();
String pluginType = stageInfo.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
batchAggregator.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
} else {
return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
batchJoiner.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName));
} else {
return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName));
}
}
Transformation transformation = getInitializedTransformation(stageInfo);
boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
return new TrackedTransform(isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation, stageMetrics, taskContext.getDataTracer(stageName));
}
use of co.cask.cdap.etl.api.Transformation in project cdap by caskdata.
the class ETLWorker method initializeSinks.
@SuppressWarnings("unchecked")
private void initializeSinks(WorkerContext context, Map<String, TransformDetail> transformationMap, PipelinePhase pipeline) throws Exception {
Set<StageInfo> sinkInfos = pipeline.getStagesOfType(RealtimeSink.PLUGIN_TYPE);
sinks = new HashMap<>(sinkInfos.size());
for (StageInfo sinkInfo : sinkInfos) {
String sinkName = sinkInfo.getName();
RealtimeSink sink = context.newPluginInstance(sinkName);
sink = new LoggedRealtimeSink(sinkName, sink);
WorkerRealtimeContext sinkContext = new WorkerRealtimeContext(context, metrics, new TxLookupProvider(context), sinkInfo);
LOG.debug("Sink Class : {}", sink.getClass().getName());
sink.initialize(sinkContext);
sink = new TrackedRealtimeSink(sink, new DefaultStageMetrics(metrics, sinkName));
Transformation identityTransformation = new Transformation() {
@Override
public void transform(Object input, Emitter emitter) throws Exception {
emitter.emit(input);
}
};
// we use identity transformation to simplify executing transformation in pipeline (similar to ETLMapreduce),
// since we want to emit metrics during write to sink and not during this transformation, we use NoOpMetrics.
TrackedTransform trackedTransform = new TrackedTransform(identityTransformation, new DefaultStageMetrics(metrics, sinkName), TrackedTransform.RECORDS_IN, null, context.getDataTracer(sinkName));
transformationMap.put(sinkInfo.getName(), new TransformDetail(trackedTransform, new HashSet<String>()));
sinks.put(sinkInfo.getName(), sink);
}
}
use of co.cask.cdap.etl.api.Transformation in project cdap by caskdata.
the class MapReduceTransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
private <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
DefaultMacroEvaluator macroEvaluator = new DefaultMacroEvaluator(arguments, taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
String stageName = stageSpec.getName();
String pluginType = stageSpec.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
StageStatisticsCollector collector = isPipelineContainsCondition ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchAggregator.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
} else {
return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
BatchJoiner<?, ?, ?> batchJoiner = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchJoiner.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, taskContext.getDataTracer(stageName), collector);
} else {
return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, taskContext.getDataTracer(stageName), collector);
}
}
Transformation transformation = getInitializedTransformation(stageSpec);
boolean isLimitingSource = taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;
transformation = isLimitingSource ? new LimitingTransform(transformation, numberOfRecordsPreview) : transformation;
// we emit metrics for records into alert publishers when the actual alerts are published,
// not when we write the alerts to the temporary dataset
String recordsInMetric = AlertPublisher.PLUGIN_TYPE.equals(pluginType) ? null : Constants.Metrics.RECORDS_IN;
return new TrackedTransform<>(transformation, stageMetrics, recordsInMetric, Constants.Metrics.RECORDS_OUT, taskContext.getDataTracer(stageName), collector);
}
Aggregations