Search in sources :

Example 1 with StageSpec

use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.

the class PipelinePhase method registerPlugins.

/**
 * Registers all the plugin to the given pluginConfigurer by calling {@link PluginConfigurer#usePluginClass(String,
 * String, String, PluginProperties, PluginSelector)}
 *
 * @param pluginConfigurer the {@link PluginConfigurer} to which the plugins in this {@link PipelinePhase} needs to be
 * registered
 * @param runtimeConfigurer the runtime configurer to provide runtime arguments to resolve macro better, null
 *                          if this is the initial deploy
 * @param namespace namespace the app is deployed
 */
public void registerPlugins(PluginConfigurer pluginConfigurer, @Nullable RuntimeConfigurer runtimeConfigurer, String namespace) {
    MacroParserOptions options = MacroParserOptions.builder().skipInvalidMacros().setEscaping(false).setFunctionWhitelist(ConnectionMacroEvaluator.FUNCTION_NAME).build();
    MacroEvaluator runtimeEvaluator = null;
    if (runtimeConfigurer != null) {
        Map<String, MacroEvaluator> evaluators = Collections.singletonMap(ConnectionMacroEvaluator.FUNCTION_NAME, new ConnectionMacroEvaluator(namespace, runtimeConfigurer));
        runtimeEvaluator = new DefaultMacroEvaluator(new BasicArguments(runtimeConfigurer.getRuntimeArguments()), evaluators, Collections.singleton(ConnectionMacroEvaluator.FUNCTION_NAME));
    }
    for (StageSpec stageSpec : stagesByName.values()) {
        // we don't need to register connectors only source, sink and transform plugins
        if (stageSpec.getPluginType().equals(Constants.Connector.PLUGIN_TYPE)) {
            continue;
        }
        PluginSpec pluginSpec = stageSpec.getPlugin();
        ArtifactVersion version = pluginSpec.getArtifact().getVersion();
        ArtifactSelector artifactSelector = new ArtifactSelector(pluginSpec.getArtifact().getScope(), pluginSpec.getArtifact().getName(), new ArtifactVersionRange(version, true, version, true));
        Map<String, String> prop = pluginSpec.getProperties();
        pluginConfigurer.usePluginClass(pluginSpec.getType(), pluginSpec.getName(), stageSpec.getName(), PluginProperties.builder().addAll(runtimeConfigurer == null ? prop : pluginConfigurer.evaluateMacros(prop, runtimeEvaluator, options)).build(), artifactSelector);
    }
}
Also used : MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) ArtifactVersionRange(io.cdap.cdap.api.artifact.ArtifactVersionRange) MacroParserOptions(io.cdap.cdap.api.macro.MacroParserOptions) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ArtifactVersion(io.cdap.cdap.api.artifact.ArtifactVersion) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec)

Example 2 with StageSpec

use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.

the class TransformRunner method getSinkWriter.

// this is needed because we need to write to the context differently depending on the number of outputs
private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context, PipelinePhase pipelinePhase, Configuration hConf) {
    Set<StageSpec> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
    JobContext hadoopContext = context.getHadoopContext();
    if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) {
        return new SingleOutputWriter<>(context);
    }
    String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY);
    // should never happen, this is set in initialize
    Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf.");
    Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE);
    return hasSingleOutput(sinkOutputs) ? new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs);
}
Also used : Mapper(org.apache.hadoop.mapreduce.Mapper) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) JobContext(org.apache.hadoop.mapreduce.JobContext)

Example 3 with StageSpec

use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.

the class FieldLineageProcessorTest method testGeneratedOperations.

@Test
public void testGeneratedOperations() throws Exception {
    // src -> transform1 -> transform2 -> sink
    Schema srcSchema = Schema.recordOf("srcSchema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)), Schema.Field.of("offset", Schema.of(Schema.Type.INT)));
    Schema transform1Schema = Schema.recordOf("trans1Schema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
    Schema transform2Schema = Schema.recordOf("trans2Schema", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("src", DUMMY_PLUGIN).addOutput(srcSchema, "transform1").build(), StageSpec.builder("transform1", DUMMY_PLUGIN).addInputSchema("src", srcSchema).addOutput(transform1Schema, "transform2").build(), StageSpec.builder("transform2", DUMMY_PLUGIN).addInputSchema("transform1", transform1Schema).addOutput(transform2Schema, "sink").build(), StageSpec.builder("sink", DUMMY_PLUGIN).addInputSchema("transform2", transform2Schema).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("src", "transform1"), new Connection("transform1", "transform2"), new Connection("transform2", "sink"));
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    FieldLineageProcessor processor = new FieldLineageProcessor(pipelineSpec);
    Map<String, List<FieldOperation>> fieldOperations = ImmutableMap.of("src", Collections.singletonList(new FieldReadOperation("Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset"))), "transform1", Collections.emptyList(), "transform2", Collections.emptyList(), "sink", Collections.singletonList(new FieldWriteOperation("Write", "4th operation", EndPoint.of("sink"), ImmutableList.of("id", "name"))));
    Set<Operation> operations = processor.validateAndConvert(fieldOperations);
    Set<Operation> expected = ImmutableSet.of(new ReadOperation("src.Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset")), new TransformOperation("transform1.Transform", "", ImmutableList.of(InputField.of("src.Read", "body"), InputField.of("src.Read", "offset")), "body"), new TransformOperation("transform2.Transform", "", ImmutableList.of(InputField.of("transform1.Transform", "body")), ImmutableList.of("id", "name")), new WriteOperation("sink.Write", "4th operation", EndPoint.of("sink"), ImmutableList.of(InputField.of("transform2.Transform", "id"), InputField.of("transform2.Transform", "name"))));
    Assert.assertEquals(expected, operations);
}
Also used : ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) Schema(io.cdap.cdap.api.data.schema.Schema) Connection(io.cdap.cdap.etl.proto.Connection) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 4 with StageSpec

use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.

the class PipelinePlannerTest method testConditionsOnBranches.

@Test
public void testConditionsOnBranches() {
    /*
                        |-- true --> n2
              |--> c1 --|
         n1 --|         |-- false --> n3
              |
              |                |-- true --> n5
              |--> n4 --> c2 --|
                               |-- false --> n6
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "c1"), new Connection("n1", "n4"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n4", "c2"), new Connection("c2", "n5", true), new Connection("c2", "n6", false));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
    Set<String> reduceTypes = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, Collections.<String>emptySet(), Collections.<String>emptySet(), Collections.<String>emptySet());
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    Set<Connection> phaseConnections = new HashSet<>();
    for (String condition : ImmutableList.of("c1", "c2")) {
        phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
    }
    PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n4").addConnection("n1", "c1.connector").addConnection("n4", "c2.connector").build();
    Dag nonConnectorDag = new Dag(ImmutableSet.of(new Connection("n1", "n4"), new Connection("n1", "c1"), new Connection("n4", "c2")));
    String phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
    phases.put(phaseName, phase);
    // [n1, n4, c1, c2] -> [c1]
    phaseConnections.add(new Connection(phaseName, "c1"));
    // [n1, n4, c1, c2] -> [c2]
    phaseConnections.add(new Connection(phaseName, "c2"));
    // [c1] -- true --> [c1 -> n2]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
    nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n2")));
    phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, true));
    // [c1] -- false --> [c1 -> n3]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("c1.connector", "n3").build();
    nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c1", "n3")));
    phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, false));
    // [c2] -- true --> [c2 -> n5]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addConnection("c2.connector", "n5").build();
    nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n5")));
    phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c2", phaseName, true));
    // [c2] -- false --> [c2 -> n6]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n6", NODE).build()).addConnection("c2.connector", "n6").build();
    nonConnectorDag = new Dag(ImmutableSet.of(new Connection("c2", "n6")));
    phaseName = PipelinePlanner.getPhaseName(nonConnectorDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c2", phaseName, false));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with StageSpec

use of io.cdap.cdap.etl.proto.v2.spec.StageSpec in project cdap by caskdata.

the class PipelinePlannerTest method testSimpleCondition.

@Test
public void testSimpleCondition() {
    /*
      n1 - n2 - condition - n3
                      |
                      |---- n4
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition", CONDITION).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition"), new Connection("condition", "n3", true), new Connection("condition", "n4", false));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
      n1--n2--condition.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition")));
    String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase1Name, phase1);
    /*
      condition
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition", CONDITION).build()).build();
    String phase2Name = "condition";
    phases.put(phase2Name, phase2);
    /*
      condition.connector -- n3
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition.connector", "n3").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n3")));
    String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase3Name, phase3);
    /*
      condition.connector -- n4
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition.connector", "n4").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n4")));
    String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase4Name, phase4);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase2Name, phase3Name, true));
    phaseConnections.add(new Connection(phase2Name, phase4Name, false));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)37 HashMap (java.util.HashMap)21 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)15 HashSet (java.util.HashSet)12 Map (java.util.Map)12 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)10 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)10 Connection (io.cdap.cdap.etl.proto.Connection)9 Schema (io.cdap.cdap.api.data.schema.Schema)8 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)8 ArrayList (java.util.ArrayList)8 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)7 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)7 Test (org.junit.Test)7 BasicArguments (io.cdap.cdap.etl.common.BasicArguments)6 PipelinePluginContext (io.cdap.cdap.etl.common.plugin.PipelinePluginContext)6 PluginContext (io.cdap.cdap.api.plugin.PluginContext)5 BatchJoiner (io.cdap.cdap.etl.api.batch.BatchJoiner)4 FieldOperation (io.cdap.cdap.etl.api.lineage.field.FieldOperation)4 List (java.util.List)4