use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class PipelinePlannerTest method testConditionsOnBranches.
@Test
public void testConditionsOnBranches() {
/*
|-- true --> n2
|--> c1 --|
n1 --| |-- false --> n3
|
| |-- true --> n5
|--> n4 --> c2 --|
|-- false --> n6
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "c1"), new Connection("n1", "n4"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n4", "c2"), new Connection("c2", "n5", true), new Connection("c2", "n6", false));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
Set<String> reduceTypes = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, Collections.<String>emptySet(), Collections.<String>emptySet(), Collections.<String>emptySet());
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
Set<Connection> phaseConnections = new HashSet<>();
for (String condition : ImmutableList.of("c1", "c2")) {
phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
}
PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n4").addConnection("n1", "c1.connector").addConnection("n4", "c2.connector").build();
Dag nonConnectorDag = new Dag(ImmutableSet.of(new Connection("n1", "n4"), new Connection("n1", "c1"), new Connection("n4", "c2")));
String phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
// [n1, n4, c1, c2] -> [c1]
phaseConnections.add(new Connection(phaseName, "c1"));
// [n1, n4, c1, c2] -> [c2]
phaseConnections.add(new Connection(phaseName, "c2"));
// [c1] -- true --> [c1 -> n2]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n2")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c1", phaseName, true));
// [c1] -- false --> [c1 -> n3]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("c1.connector", "n3").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n3")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c1", phaseName, false));
// [c2] -- true --> [c2 -> n5]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addConnection("c2.connector", "n5").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n5")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c2", phaseName, true));
// [c2] -- false --> [c2 -> n6]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n6", NODE).build()).addConnection("c2.connector", "n6").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n6")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c2", phaseName, false));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class PipelinePlannerTest method testSimpleCondition.
@Test
public void testSimpleCondition() {
/*
n1 - n2 - condition - n3
|
|---- n4
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition", CONDITION).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition"), new Connection("condition", "n3", true), new Connection("condition", "n4", false));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
Set<String> emptySet = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
/*
n1--n2--condition.connector
*/
PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition.connector").build();
Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition")));
String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase1Name, phase1);
/*
condition
*/
PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition", CONDITION).build()).build();
String phase2Name = "condition";
phases.put(phase2Name, phase2);
/*
condition.connector -- n3
*/
PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition.connector", "n3").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n3")));
String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase3Name, phase3);
/*
condition.connector -- n4
*/
PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition.connector", "n4").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n4")));
String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase4Name, phase4);
Set<Connection> phaseConnections = new HashSet<>();
phaseConnections.add(new Connection(phase1Name, phase2Name));
phaseConnections.add(new Connection(phase2Name, phase3Name, true));
phaseConnections.add(new Connection(phase2Name, phase4Name, false));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class PipelinePlannerTest method testConditionsToConditions.
@Test
public void testConditionsToConditions() {
/*
n1 - c1----c2---n2
|
|-----c3---n3
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition1", CONDITION1).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("condition2", CONDITION2).build(), StageSpec.builder("condition3", CONDITION3).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "condition1"), new Connection("condition1", "condition2", true), new Connection("condition1", "condition3", false), new Connection("condition2", "n2", true), new Connection("condition3", "n3", true));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION1.getType(), CONDITION2.getType(), CONDITION3.getType(), CONDITION4.getType(), CONDITION5.getType());
Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
Set<String> emptySet = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
/*
n1--condition1.connector
*/
PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "condition1.connector").build();
Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "condition1")));
String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase1Name, phase1);
/*
condition1
*/
PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1", CONDITION1).build()).build();
String phase2Name = "condition1";
phases.put(phase2Name, phase2);
/*
condition2
*/
PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2", CONDITION2).build()).build();
String phase3Name = "condition2";
phases.put(phase3Name, phase3);
/*
condition3
*/
PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition3", CONDITION3).build()).build();
String phase4Name = "condition3";
phases.put(phase4Name, phase4);
/*
condition1.connector -- n2
*/
PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("condition1.connector", "n2").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition2", "n2")));
String phase5Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase5Name, phase5);
/*
condition1.connector -- n3
*/
PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition1.connector", "n3").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n3")));
String phase6Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase6Name, phase6);
Set<Connection> phaseConnections = new HashSet<>();
phaseConnections.add(new Connection(phase1Name, phase2Name));
phaseConnections.add(new Connection(phase2Name, phase3Name, true));
phaseConnections.add(new Connection(phase2Name, phase4Name, false));
phaseConnections.add(new Connection(phase3Name, phase5Name, true));
phaseConnections.add(new Connection(phase4Name, phase6Name, true));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class PipelinePlanner method dagToPipeline.
/**
* Converts a Dag into a PipelinePhase, using what we know about the plugin type of each node in the dag.
* The PipelinePhase is what programs will take as input, and keeps track of sources, transforms, sinks, etc.
*
* @param dag the dag to convert
* @param connectors connector nodes across all dags
* @param specs specifications for every stage
* @return the converted dag
*/
private PipelinePhase dagToPipeline(Dag dag, Map<String, String> connectors, Map<String, StageSpec> specs, Map<String, String> conditionConnectors) {
PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(supportedPluginTypes);
for (String stageName : dag.getTopologicalOrder()) {
Set<String> outputs = dag.getNodeOutputs(stageName);
if (!outputs.isEmpty()) {
phaseBuilder.addConnections(stageName, outputs);
}
// add connectors
String originalName = connectors.get(stageName);
if (originalName != null || conditionConnectors.values().contains(stageName)) {
String connectorType = dag.getSources().contains(stageName) ? Constants.Connector.SOURCE_TYPE : Constants.Connector.SINK_TYPE;
PluginSpec connectorSpec = new PluginSpec(Constants.Connector.PLUGIN_TYPE, "connector", ImmutableMap.of(Constants.Connector.ORIGINAL_NAME, originalName != null ? originalName : stageName, Constants.Connector.TYPE, connectorType), null);
phaseBuilder.addStage(StageSpec.builder(stageName, connectorSpec).build());
continue;
}
// add other plugin types
StageSpec spec = specs.get(stageName);
phaseBuilder.addStage(spec);
}
return phaseBuilder.build();
}
use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by caskdata.
the class PipelinePhasePreparer method prepare.
/**
* Prepare all the stages in the given phase and return Finishers that must be run when the pipeline completes.
*
* @param phaseSpec the pipeline phase to prepare
* @return list of finishers that should be run when the pipeline ends
*/
public List<Finisher> prepare(PhaseSpec phaseSpec) throws TransactionFailureException, InstantiationException, IOException {
PipelinePluginInstantiator pluginInstantiator = getPluginInstantiator(phaseSpec);
PipelinePhase phase = phaseSpec.getPhase();
List<Finisher> finishers = new ArrayList<>();
// call prepareRun on each stage in order so that any arguments set by a stage will be visible to subsequent stages
for (String stageName : phase.getDag().getTopologicalOrder()) {
StageSpec stageSpec = phase.getStage(stageName);
String pluginType = stageSpec.getPluginType();
boolean isConnectorSource = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSources().contains(stageName);
boolean isConnectorSink = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && phase.getSinks().contains(stageName);
SubmitterPlugin submitterPlugin;
if (BatchSource.PLUGIN_TYPE.equals(pluginType) || isConnectorSource) {
BatchConfigurable<BatchSourceContext> batchSource = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createSource(batchSource, stageSpec);
} else if (BatchSink.PLUGIN_TYPE.equals(pluginType) || AlertPublisher.PLUGIN_TYPE.equals(pluginType) || isConnectorSink) {
BatchConfigurable<BatchSinkContext> batchSink = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createSink(batchSink, stageSpec);
} else if (Transform.PLUGIN_TYPE.equals(pluginType) || ErrorTransform.PLUGIN_TYPE.equals(pluginType)) {
Transform<?, ?> transform = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createTransform(transform, stageSpec);
} else if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
if (plugin instanceof BatchAggregator) {
BatchAggregator<?, ?, ?> aggregator = (BatchAggregator) plugin;
submitterPlugin = createAggregator(aggregator, stageSpec);
} else if (plugin instanceof BatchReducibleAggregator) {
BatchReducibleAggregator<?, ?, ?, ?> aggregator = (BatchReducibleAggregator) plugin;
submitterPlugin = createReducibleAggregator(aggregator, stageSpec);
} else {
throw new IllegalStateException(String.format("Aggregator stage '%s' is of an unsupported class '%s'.", stageSpec.getName(), plugin.getClass().getName()));
}
} else if (BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
Object plugin = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
if (plugin instanceof BatchJoiner) {
BatchJoiner<?, ?, ?> batchJoiner = (BatchJoiner<?, ?, ?>) plugin;
submitterPlugin = createJoiner(batchJoiner, stageSpec);
} else if (plugin instanceof BatchAutoJoiner) {
BatchAutoJoiner batchJoiner = (BatchAutoJoiner) plugin;
validateAutoJoiner(batchJoiner, stageSpec);
submitterPlugin = createAutoJoiner(batchJoiner, stageSpec);
} else {
throw new IllegalStateException(String.format("Join stage '%s' is of an unsupported class '%s'.", stageSpec.getName(), plugin.getClass().getName()));
}
} else if (SplitterTransform.PLUGIN_TYPE.equals(pluginType)) {
SplitterTransform<?, ?> splitterTransform = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
submitterPlugin = createSplitterTransform(splitterTransform, stageSpec);
} else {
submitterPlugin = create(pluginInstantiator, stageSpec);
}
if (submitterPlugin != null) {
submitterPlugin.prepareRun();
finishers.add(submitterPlugin);
}
}
return finishers;
}
Aggregations