use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class SparkPipelineRunner method handleJoin.
protected SparkCollection<Object> handleJoin(Map<String, SparkCollection<Object>> inputDataCollections, PipelinePhase pipelinePhase, PluginFunctionContext pluginFunctionContext, StageSpec stageSpec, FunctionCache.Factory functionCacheFactory, Object plugin, Integer numPartitions, StageStatisticsCollector collector, Set<String> shufflers) throws Exception {
String stageName = stageSpec.getName();
if (plugin instanceof BatchJoiner) {
BatchJoiner<Object, Object, Object> joiner = (BatchJoiner<Object, Object, Object>) plugin;
BatchJoinerRuntimeContext joinerRuntimeContext = pluginFunctionContext.createBatchRuntimeContext();
joiner.initialize(joinerRuntimeContext);
shufflers.add(stageName);
return handleJoin(joiner, inputDataCollections, stageSpec, functionCacheFactory, numPartitions, collector);
} else if (plugin instanceof AutoJoiner) {
AutoJoiner autoJoiner = (AutoJoiner) plugin;
Map<String, Schema> inputSchemas = new HashMap<>();
for (String inputStageName : pipelinePhase.getStageInputs(stageName)) {
StageSpec inputStageSpec = pipelinePhase.getStage(inputStageName);
Port outputPort = inputStageSpec.getOutputPorts().get(stageName);
if (outputPort == null) {
inputSchemas.put(inputStageName, null);
} else {
inputSchemas.put(inputStageName, outputPort.getSchema());
}
}
FailureCollector failureCollector = new LoggingFailureCollector(stageName, inputSchemas);
AutoJoinerContext autoJoinerContext = DefaultAutoJoinerContext.from(inputSchemas, failureCollector);
// joinDefinition will always be non-null because
// it is checked by PipelinePhasePreparer at the start of the run.
JoinDefinition joinDefinition = autoJoiner.define(autoJoinerContext);
failureCollector.getOrThrowException();
if (joinDefinition.getStages().stream().noneMatch(JoinStage::isBroadcast)) {
shufflers.add(stageName);
}
return handleAutoJoin(stageName, joinDefinition, inputDataCollections, numPartitions);
} else {
// should never happen unless there is a bug in the code. should have failed during deployment
throw new IllegalStateException(String.format("Stage '%s' is an unknown joiner type %s", stageName, plugin.getClass().getName()));
}
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class PipelinePlannerTest method testConditionsOnBranches.
@Test
public void testConditionsOnBranches() {
/*
|-- true --> n2
|--> c1 --|
n1 --| |-- false --> n3
|
| |-- true --> n5
|--> n4 --> c2 --|
|-- false --> n6
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "c1"), new Connection("n1", "n4"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n4", "c2"), new Connection("c2", "n5", true), new Connection("c2", "n6", false));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
Set<String> reduceTypes = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, Collections.<String>emptySet(), Collections.<String>emptySet(), Collections.<String>emptySet());
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
Set<Connection> phaseConnections = new HashSet<>();
for (String condition : ImmutableList.of("c1", "c2")) {
phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
}
PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n4").addConnection("n1", "c1.connector").addConnection("n4", "c2.connector").build();
Dag nonConnectorDag = new Dag(ImmutableSet.of(new Connection("n1", "n4"), new Connection("n1", "c1"), new Connection("n4", "c2")));
String phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
// [n1, n4, c1, c2] -> [c1]
phaseConnections.add(new Connection(phaseName, "c1"));
// [n1, n4, c1, c2] -> [c2]
phaseConnections.add(new Connection(phaseName, "c2"));
// [c1] -- true --> [c1 -> n2]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n2")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c1", phaseName, true));
// [c1] -- false --> [c1 -> n3]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("c1.connector", "n3").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n3")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c1", phaseName, false));
// [c2] -- true --> [c2 -> n5]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addConnection("c2.connector", "n5").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n5")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c2", phaseName, true));
// [c2] -- false --> [c2 -> n6]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n6", NODE).build()).addConnection("c2.connector", "n6").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n6")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c2", phaseName, false));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class PipelinePlannerTest method testConditionsToConditions.
@Test
public void testConditionsToConditions() {
/*
n1 - c1----c2---n2
|
|-----c3---n3
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition1", CONDITION1).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("condition2", CONDITION2).build(), StageSpec.builder("condition3", CONDITION3).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "condition1"), new Connection("condition1", "condition2", true), new Connection("condition1", "condition3", false), new Connection("condition2", "n2", true), new Connection("condition3", "n3", true));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION1.getType(), CONDITION2.getType(), CONDITION3.getType(), CONDITION4.getType(), CONDITION5.getType());
Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
Set<String> emptySet = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
/*
n1--condition1.connector
*/
PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "condition1.connector").build();
Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "condition1")));
String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase1Name, phase1);
/*
condition1
*/
PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1", CONDITION1).build()).build();
String phase2Name = "condition1";
phases.put(phase2Name, phase2);
/*
condition2
*/
PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2", CONDITION2).build()).build();
String phase3Name = "condition2";
phases.put(phase3Name, phase3);
/*
condition3
*/
PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition3", CONDITION3).build()).build();
String phase4Name = "condition3";
phases.put(phase4Name, phase4);
/*
condition1.connector -- n2
*/
PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("condition1.connector", "n2").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition2", "n2")));
String phase5Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase5Name, phase5);
/*
condition1.connector -- n3
*/
PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition1.connector", "n3").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n3")));
String phase6Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase6Name, phase6);
Set<Connection> phaseConnections = new HashSet<>();
phaseConnections.add(new Connection(phase1Name, phase2Name));
phaseConnections.add(new Connection(phase2Name, phase3Name, true));
phaseConnections.add(new Connection(phase2Name, phase4Name, false));
phaseConnections.add(new Connection(phase3Name, phase5Name, true));
phaseConnections.add(new Connection(phase4Name, phase6Name, true));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class PipelinePlannerTest method testSimpleCondition.
@Test
public void testSimpleCondition() {
/*
n1 - n2 - condition - n3
|
|---- n4
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition", CONDITION).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition"), new Connection("condition", "n3", true), new Connection("condition", "n4", false));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
Set<String> emptySet = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
/*
n1--n2--condition.connector
*/
PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition.connector").build();
Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition")));
String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase1Name, phase1);
/*
condition
*/
PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition", CONDITION).build()).build();
String phase2Name = "condition";
phases.put(phase2Name, phase2);
/*
condition.connector -- n3
*/
PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition.connector", "n3").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n3")));
String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase3Name, phase3);
/*
condition.connector -- n4
*/
PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition.connector", "n4").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n4")));
String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase4Name, phase4);
Set<Connection> phaseConnections = new HashSet<>();
phaseConnections.add(new Connection(phase1Name, phase2Name));
phaseConnections.add(new Connection(phase2Name, phase3Name, true));
phaseConnections.add(new Connection(phase2Name, phase4Name, false));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by cdapio.
the class TransformExecutorFactory method getPipeStage.
private PipeStage getPipeStage(PipelinePhase pipeline, String stageName, Map<String, PipeStage> pipeStages) throws Exception {
StageSpec stageSpec = pipeline.getStage(stageName);
String pluginType = stageSpec.getPluginType();
// handle ending stage case, which don't use PipeEmitter
if (pipeline.getSinks().contains(stageName)) {
return getSinkPipeStage(stageSpec);
}
// create PipeEmitter, which holds all output PipeStages it needs to write to and wraps any output it gets
// into a RecordInfo
// ConnectorSources require a special emitter since they need to build RecordInfo from the temporary dataset
PipeEmitter.Builder emitterBuilder = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && pipeline.getSources().contains(stageName) ? ConnectorSourceEmitter.builder(stageName) : PipeEmitter.builder(stageName);
Map<String, StageSpec.Port> outputPorts = stageSpec.getOutputPorts();
for (String outputStageName : pipeline.getStageOutputs(stageName)) {
StageSpec outputStageSpec = pipeline.getStage(outputStageName);
String outputStageType = outputStageSpec.getPluginType();
PipeStage outputPipeStage = pipeStages.get(outputStageName);
if (ErrorTransform.PLUGIN_TYPE.equals(outputStageType)) {
emitterBuilder.addErrorConsumer(outputPipeStage);
} else if (AlertPublisher.PLUGIN_TYPE.equals(outputStageType)) {
emitterBuilder.addAlertConsumer(outputPipeStage);
} else if (Constants.Connector.PLUGIN_TYPE.equals(pluginType)) {
// connectors only have a single output
emitterBuilder.addOutputConsumer(outputPipeStage);
} else {
// if the output is a connector like agg5.connector, the outputPorts will contain the original 'agg5' as
// a key, but not 'agg5.connector' so we need to lookup the original stage from the connector's plugin spec
String originalOutputName = Constants.Connector.PLUGIN_TYPE.equals(outputStageType) ? outputStageSpec.getPlugin().getProperties().get(Constants.Connector.ORIGINAL_NAME) : outputStageName;
String port = outputPorts.containsKey(originalOutputName) ? outputPorts.get(originalOutputName).getPort() : null;
if (port != null) {
emitterBuilder.addOutputConsumer(outputPipeStage, port);
} else {
emitterBuilder.addOutputConsumer(outputPipeStage);
}
}
}
PipeEmitter pipeEmitter = emitterBuilder.build();
if (SplitterTransform.PLUGIN_TYPE.equals(pluginType)) {
// this is a SplitterTransform, needs to emit records to the right outputs based on port
return new MultiOutputTransformPipeStage<>(stageName, getMultiOutputTransform(stageSpec), pipeEmitter);
} else {
return new UnwrapPipeStage<>(stageName, getTransformation(stageSpec), pipeEmitter);
}
}
Aggregations