use of io.cdap.cdap.etl.api.batch.BatchReducibleAggregator in project cdap by caskdata.
the class PipelinePhasePreparer method prepare.
/**
* Prepare all the stages in the given phase and return Finishers that must be run when the pipeline completes.
*
* @param phaseSpec the pipeline phase to prepare
* @return list of finishers that should be run when the pipeline ends
*/
public List<Finisher> prepare(PhaseSpec phaseSpec) throws TransactionFailureException, InstantiationException, IOException {
PipelinePluginInstantiator pluginInstantiator = getPluginInstantiator(phaseSpec);
PipelinePhase phase = phaseSpec.getPhase();
List<Finisher> finishers = new ArrayList<>();
// call prepareRun on each stage in order so that any arguments set by a stage will be visible to subsequent stages
for (String stageName : phase.getDag().getTopologicalOrder()) {
StageSpec stageSpec = phase.getStage(stageName);
String pluginType = stageSpec.getPluginType();
boolean isConnectorSource = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSources().contains(stageName);
boolean isConnectorSink = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSinks().contains(stageName);
SubmitterPlugin submitterPlugin;
if (BatchSource.PLUGIN_TYPE.equals(pluginType) || isConnectorSource) {
BatchConfigurable<BatchSourceContext> batchSource = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createSource(batchSource, stageSpec);
} else if (BatchSink.PLUGIN_TYPE.equals(pluginType) || AlertPublisher.PLUGIN_TYPE.equals(pluginType) || isConnectorSink) {
BatchConfigurable<BatchSinkContext> batchSink = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createSink(batchSink, stageSpec);
} else if (Transform.PLUGIN_TYPE.equals(pluginType) || ErrorTransform.PLUGIN_TYPE.equals(pluginType)) {
Transform<?, ?> transform = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createTransform(transform, stageSpec);
} else if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
if (plugin instanceof BatchAggregator) {
BatchAggregator<?, ?, ?> aggregator = (BatchAggregator) plugin;
submitterPlugin = createAggregator(aggregator, stageSpec);
} else if (plugin instanceof BatchReducibleAggregator) {
BatchReducibleAggregator<?, ?, ?, ?> aggregator = (BatchReducibleAggregator) plugin;
submitterPlugin = createReducibleAggregator(aggregator, stageSpec);
} else {
throw new IllegalStateException(String.format("Aggregator stage '%s' is of an unsupported class '%s'.", stageSpec.getName(), plugin.getClass().getName()));
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
if (plugin instanceof BatchJoiner) {
BatchJoiner<?, ?, ?> batchJoiner = (BatchJoiner<?, ?, ?>) plugin;
submitterPlugin = createJoiner(batchJoiner, stageSpec);
} else if (plugin instanceof BatchAutoJoiner) {
BatchAutoJoiner batchJoiner = (BatchAutoJoiner) plugin;
validateAutoJoiner(batchJoiner, stageSpec);
submitterPlugin = createAutoJoiner(batchJoiner, stageSpec);
} else {
throw new IllegalStateException(String.format("Join stage '%s' is of an unsupported class '%s'.", stageSpec.getName(), plugin.getClass().getName()));
}
} else if (SplitterTransform.PLUGIN_TYPE.equals(pluginType)) {
SplitterTransform<?, ?> splitterTransform = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createSplitterTransform(splitterTransform, stageSpec);
} else {
submitterPlugin = create(pluginInstantiator, stageSpec);
}
if (submitterPlugin != null) {
submitterPlugin.prepareRun();
finishers.add(submitterPlugin);
}
}
return finishers;
}
use of io.cdap.cdap.etl.api.batch.BatchReducibleAggregator in project cdap by caskdata.
the class MapReduceTransformExecutorFactory method getTransformation.
@SuppressWarnings("unchecked")
@Override
protected <IN, OUT> TrackedTransform<IN, OUT> getTransformation(StageSpec stageSpec) throws Exception {
String stageName = stageSpec.getName();
String pluginType = stageSpec.getPluginType();
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
StageStatisticsCollector collector = collectStageStatistics ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchAggregator<?, ?, ?> batchAggregator;
if (plugin instanceof BatchReducibleAggregator) {
BatchReducibleAggregator<?, ?, ?, ?> reducibleAggregator = (BatchReducibleAggregator<?, ?, ?, ?>) plugin;
batchAggregator = new AggregatorBridge<>(reducibleAggregator);
} else {
batchAggregator = (BatchAggregator<?, ?, ?>) plugin;
}
BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchAggregator.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, getDataTracer(stageName), collector);
} else {
return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator, mapOutputKeyClassName, mapOutputValClassName), stageMetrics, getDataTracer(stageName), collector);
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
BatchJoiner<?, ?, ?> batchJoiner;
Set<String> filterNullKeyStages = new HashSet<>();
if (plugin instanceof BatchAutoJoiner) {
BatchAutoJoiner autoJoiner = (BatchAutoJoiner) plugin;
FailureCollector failureCollector = new LoggingFailureCollector(stageName, stageSpec.getInputSchemas());
DefaultAutoJoinerContext context = DefaultAutoJoinerContext.from(stageSpec.getInputSchemas(), failureCollector);
// definition will be non-null due to validate by PipelinePhasePreparer at the start of the run
JoinDefinition joinDefinition = autoJoiner.define(context);
JoinCondition condition = joinDefinition.getCondition();
// should never happen as it's checked at deployment time, but add this to be safe.
if (condition.getOp() != JoinCondition.Op.KEY_EQUALITY) {
failureCollector.addFailure(String.format("Join stage '%s' uses a %s condition, which is not supported with the MapReduce engine.", stageName, condition.getOp()), "Switch to a different execution engine.");
}
failureCollector.getOrThrowException();
batchJoiner = new JoinerBridge(stageName, autoJoiner, joinDefinition);
// this is the same as filtering out records that have a null key if they are from an optional stage
if (condition.getOp() == JoinCondition.Op.KEY_EQUALITY && !((JoinCondition.OnKeys) condition).isNullSafe()) {
filterNullKeyStages = joinDefinition.getStages().stream().filter(s -> !s.isRequired()).map(JoinStage::getStageName).collect(Collectors.toSet());
}
} else {
batchJoiner = (BatchJoiner<?, ?, ?>) plugin;
}
BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
batchJoiner.initialize(runtimeContext);
if (isMapPhase) {
return getTrackedEmitKeyStep(new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, filterNullKeyStages), stageMetrics, getDataTracer(stageName), collector);
} else {
return getTrackedMergeStep(new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName, runtimeContext.getInputSchemas().size()), stageMetrics, getDataTracer(stageName), collector);
}
}
return super.getTransformation(stageSpec);
}
use of io.cdap.cdap.etl.api.batch.BatchReducibleAggregator in project cdap by caskdata.
the class PipelinePluginContext method wrapPlugin.
private Object wrapPlugin(String pluginId, Object plugin) {
Caller caller = getCaller(pluginId);
StageMetrics stageMetrics = new DefaultStageMetrics(metrics, pluginId);
OperationTimer operationTimer = processTimingEnabled ? new MetricsOperationTimer(stageMetrics) : NoOpOperationTimer.INSTANCE;
if (plugin instanceof Action) {
return new WrappedAction((Action) plugin, caller);
} else if (plugin instanceof BatchSource) {
return new WrappedBatchSource<>((BatchSource) plugin, caller, operationTimer);
} else if (plugin instanceof BatchSink) {
return new WrappedBatchSink<>((BatchSink) plugin, caller, operationTimer);
} else if (plugin instanceof ErrorTransform) {
return new WrappedErrorTransform<>((ErrorTransform) plugin, caller, operationTimer);
} else if (plugin instanceof Transform) {
return new WrappedTransform<>((Transform) plugin, caller, operationTimer);
} else if (plugin instanceof BatchReducibleAggregator) {
return new WrappedReduceAggregator<>((BatchReducibleAggregator) plugin, caller, operationTimer);
} else if (plugin instanceof BatchAggregator) {
return new WrappedBatchAggregator<>((BatchAggregator) plugin, caller, operationTimer);
} else if (plugin instanceof BatchJoiner) {
return new WrappedBatchJoiner<>((BatchJoiner) plugin, caller, operationTimer);
} else if (plugin instanceof PostAction) {
return new WrappedPostAction((PostAction) plugin, caller);
} else if (plugin instanceof SplitterTransform) {
return new WrappedSplitterTransform<>((SplitterTransform) plugin, caller, operationTimer);
}
return wrapUnknownPlugin(pluginId, plugin, caller);
}
Aggregations