use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class PipelinePhase method registerPlugins.
/**
* Registers all the plugin to the given pluginConfigurer by calling {@link PluginConfigurer#usePluginClass(String,
* String, String, PluginProperties, PluginSelector)}
*
* @param pluginConfigurer the {@link PluginConfigurer} to which the plugins in this {@link PipelinePhase} needs to be
* registered
* @param runtimeConfigurer the runtime configurer to provide runtime arguments to resolve macro better, null
* if this is the initial deploy
* @param namespace namespace the app is deployed
*/
public void registerPlugins(PluginConfigurer pluginConfigurer, @Nullable RuntimeConfigurer runtimeConfigurer, String namespace) {
MacroParserOptions options = MacroParserOptions.builder().skipInvalidMacros().setEscaping(false).setFunctionWhitelist(ConnectionMacroEvaluator.FUNCTION_NAME).build();
MacroEvaluator runtimeEvaluator = null;
if (runtimeConfigurer != null) {
Map<String, MacroEvaluator> evaluators = Collections.singletonMap(ConnectionMacroEvaluator.FUNCTION_NAME, new ConnectionMacroEvaluator(namespace, runtimeConfigurer));
runtimeEvaluator = new DefaultMacroEvaluator(new BasicArguments(runtimeConfigurer.getRuntimeArguments()), evaluators, Collections.singleton(ConnectionMacroEvaluator.FUNCTION_NAME));
}
for (StageSpec stageSpec : stagesByName.values()) {
// we don't need to register connectors only source, sink and transform plugins
if (stageSpec.getPluginType().equals(Constants.Connector.PLUGIN_TYPE)) {
continue;
}
PluginSpec pluginSpec = stageSpec.getPlugin();
ArtifactVersion version = pluginSpec.getArtifact().getVersion();
ArtifactSelector artifactSelector = new ArtifactSelector(pluginSpec.getArtifact().getScope(), pluginSpec.getArtifact().getName(), new ArtifactVersionRange(version, true, version, true));
Map<String, String> prop = pluginSpec.getProperties();
pluginConfigurer.usePluginClass(pluginSpec.getType(), pluginSpec.getName(), stageSpec.getName(), PluginProperties.builder().addAll(runtimeConfigurer == null ? prop : pluginConfigurer.evaluateMacros(prop, runtimeEvaluator, options)).build(), artifactSelector);
}
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class TransformRunner method getSinkWriter.
// this is needed because we need to write to the context differently depending on the number of outputs
private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context, PipelinePhase pipelinePhase, Configuration hConf) {
Set<StageSpec> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
JobContext hadoopContext = context.getHadoopContext();
if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) {
return new SingleOutputWriter<>(context);
}
String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY);
// should never happen, this is set in initialize
Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf.");
Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE);
return hasSingleOutput(sinkOutputs) ? new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs);
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class FieldLineageProcessorTest method testGeneratedOperations.
@Test
public void testGeneratedOperations() throws Exception {
// src -> transform1 -> transform2 -> sink
Schema srcSchema = Schema.recordOf("srcSchema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)), Schema.Field.of("offset", Schema.of(Schema.Type.INT)));
Schema transform1Schema = Schema.recordOf("trans1Schema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
Schema transform2Schema = Schema.recordOf("trans2Schema", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("src", DUMMY_PLUGIN).addOutput(srcSchema, "transform1").build(), StageSpec.builder("transform1", DUMMY_PLUGIN).addInputSchema("src", srcSchema).addOutput(transform1Schema, "transform2").build(), StageSpec.builder("transform2", DUMMY_PLUGIN).addInputSchema("transform1", transform1Schema).addOutput(transform2Schema, "sink").build(), StageSpec.builder("sink", DUMMY_PLUGIN).addInputSchema("transform2", transform2Schema).build());
Set<Connection> connections = ImmutableSet.of(new Connection("src", "transform1"), new Connection("transform1", "transform2"), new Connection("transform2", "sink"));
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
FieldLineageProcessor processor = new FieldLineageProcessor(pipelineSpec);
Map<String, List<FieldOperation>> fieldOperations = ImmutableMap.of("src", Collections.singletonList(new FieldReadOperation("Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset"))), "transform1", Collections.emptyList(), "transform2", Collections.emptyList(), "sink", Collections.singletonList(new FieldWriteOperation("Write", "4th operation", EndPoint.of("sink"), ImmutableList.of("id", "name"))));
Set<Operation> operations = processor.validateAndConvert(fieldOperations);
Set<Operation> expected = ImmutableSet.of(new ReadOperation("src.Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset")), new TransformOperation("transform1.Transform", "", ImmutableList.of(InputField.of("src.Read", "body"), InputField.of("src.Read", "offset")), "body"), new TransformOperation("transform2.Transform", "", ImmutableList.of(InputField.of("transform1.Transform", "body")), ImmutableList.of("id", "name")), new WriteOperation("sink.Write", "4th operation", EndPoint.of("sink"), ImmutableList.of(InputField.of("transform2.Transform", "id"), InputField.of("transform2.Transform", "name"))));
Assert.assertEquals(expected, operations);
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class PipelinePlannerTest method testConditionsOnBranches.
@Test
public void testConditionsOnBranches() {
/*
|-- true --> n2
|--> c1 --|
n1 --| |-- false --> n3
|
| |-- true --> n5
|--> n4 --> c2 --|
|-- false --> n6
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "c1"), new Connection("n1", "n4"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n4", "c2"), new Connection("c2", "n5", true), new Connection("c2", "n6", false));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
Set<String> reduceTypes = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, Collections.<String>emptySet(), Collections.<String>emptySet(), Collections.<String>emptySet());
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
Set<Connection> phaseConnections = new HashSet<>();
for (String condition : ImmutableList.of("c1", "c2")) {
phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
}
PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n4").addConnection("n1", "c1.connector").addConnection("n4", "c2.connector").build();
Dag nonConnectorDag = new Dag(ImmutableSet.of(new Connection("n1", "n4"), new Connection("n1", "c1"), new Connection("n4", "c2")));
String phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
// [n1, n4, c1, c2] -> [c1]
phaseConnections.add(new Connection(phaseName, "c1"));
// [n1, n4, c1, c2] -> [c2]
phaseConnections.add(new Connection(phaseName, "c2"));
// [c1] -- true --> [c1 -> n2]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n2")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c1", phaseName, true));
// [c1] -- false --> [c1 -> n3]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("c1.connector", "n3").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n3")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c1", phaseName, false));
// [c2] -- true --> [c2 -> n5]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addConnection("c2.connector", "n5").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n5")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c2", phaseName, true));
// [c2] -- false --> [c2 -> n6]
phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n6", NODE).build()).addConnection("c2.connector", "n6").build();
nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n6")));
phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
phases.put(phaseName, phase);
phaseConnections.add(new Connection("c2", phaseName, false));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.
the class PipelinePlannerTest method testSimpleCondition.
@Test
public void testSimpleCondition() {
/*
n1 - n2 - condition - n3
|
|---- n4
*/
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition", CONDITION).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build());
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition"), new Connection("condition", "n3", true), new Connection("condition", "n4", false));
Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
Set<String> emptySet = ImmutableSet.of();
PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
Map<String, PipelinePhase> phases = new HashMap<>();
/*
n1--n2--condition.connector
*/
PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition.connector").build();
Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition")));
String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase1Name, phase1);
/*
condition
*/
PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition", CONDITION).build()).build();
String phase2Name = "condition";
phases.put(phase2Name, phase2);
/*
condition.connector -- n3
*/
PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition.connector", "n3").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n3")));
String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase3Name, phase3);
/*
condition.connector -- n4
*/
PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition.connector", "n4").build();
controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n4")));
String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
phases.put(phase4Name, phase4);
Set<Connection> phaseConnections = new HashSet<>();
phaseConnections.add(new Connection(phase1Name, phase2Name));
phaseConnections.add(new Connection(phase2Name, phase3Name, true));
phaseConnections.add(new Connection(phase2Name, phase4Name, false));
PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
PipelinePlan actual = planner.plan(pipelineSpec);
Assert.assertEquals(expected, actual);
}
Aggregations