Search in sources :

Example 11 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by cdapio.

the class MultiSinkFunction method initializeBranchExecutors.

private void initializeBranchExecutors() {
    emitter = new DefaultEmitter<>();
    PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(pipelineRuntime.getPluginContext(), pipelineRuntime.getMetrics(), phaseSpec, new SingleConnectorFactory());
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), pipelineRuntime.getLogicalStartTime(), pipelineRuntime.getSecureStore(), pipelineRuntime.getServiceDiscoverer(), pipelineRuntime.getNamespace());
    executorFactory = new SparkTransformExecutorFactory(pluginInstantiator, macroEvaluator, null, collectors, dataTracers, pipelineRuntime, emitter);
    /*
       If the dag is:

            |--> t1 --> k1
       s1 --|
            |--> k2
                 ^
           s2 ---|

       the group is t1, k1, and k2.
     */
    PipelinePhase pipelinePhase = phaseSpec.getPhase();
    branchExecutors = new HashMap<>();
    inputConnections = new HashMap<>();
    for (String groupSource : group) {
        // group "sources" are stages in the group that don't have an input from another stage in the group.
        if (Sets.difference(pipelinePhase.getStageInputs(groupSource), group).isEmpty()) {
            continue;
        }
        // get the branch by taking a subset of the pipeline starting from the "source".
        // with the example above, the two branches are t1 -> k1, and k2.
        PipelinePhase branch;
        if (pipelinePhase.getSinks().contains(groupSource)) {
            // pipelinePhase.subsetFrom() throws an exception if the new "source" is also a sink,
            // since a Dag cannot be a single node. so build it manually.
            branch = PipelinePhase.builder(pipelinePhase.getPluginTypes()).addStage(pipelinePhase.getStage(groupSource)).build();
        } else {
            branch = pipelinePhase.subsetFrom(Collections.singleton(groupSource));
        }
        try {
            branchExecutors.put(groupSource, executorFactory.create(branch));
        } catch (Exception e) {
            throw new IllegalStateException(String.format("Unable to get subset of pipeline starting from stage %s. " + "This indicates a planning error. Please report this bug and turn off stage " + "consolidation by setting %s to false in the runtime arguments.", groupSource, Constants.CONSOLIDATE_STAGES), e);
        }
        /*
          create a mapping from possible inputs to "group sources". This will help identify which incoming
          records should be sent to which branch executor.

          for example, the pipeline may look like:

                           |port a --> k1
             s --> split --|
                           |port b --> k2

          In this scenario, k1, and k2, are all in the same group, so the map contains:

            { stageName: split, port: a, type: output } -> [k1]
            { stageName: split, port: b, type: output } -> [k2]

          A slightly more complicated example:

                               |--> k1
            s1 --> transform --|
                      |        |--> k2
                      |
                      |--> error collector --> k3

          In this scenario, k1, k2, k3, and error collector are in the same group, so the map contains:

            { stageName: transform, type: output } -> [k1, k2]
            { stageName: transform, type: error } -> [k3]
       */
        String groupSourceType = pipelinePhase.getStage(groupSource).getPluginType();
        RecordType recordType = ErrorTransform.PLUGIN_TYPE.equals(groupSourceType) ? RecordType.ERROR : RecordType.OUTPUT;
        for (String inputStage : pipelinePhase.getStageInputs(groupSource)) {
            Map<String, StageSpec.Port> ports = pipelinePhase.getStage(inputStage).getOutputPorts();
            String port = ports.get(groupSource).getPort();
            InputInfo inputInfo = new InputInfo(inputStage, recordType, port);
            Set<String> groupSources = inputConnections.computeIfAbsent(inputInfo, key -> new HashSet<>());
            groupSources.add(groupSource);
        }
    }
}
Also used : DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) SingleConnectorFactory(io.cdap.cdap.etl.batch.connector.SingleConnectorFactory) SparkTransformExecutorFactory(io.cdap.cdap.etl.spark.SparkTransformExecutorFactory) RecordType(io.cdap.cdap.etl.common.RecordType) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) PipelinePluginInstantiator(io.cdap.cdap.etl.batch.PipelinePluginInstantiator)

Example 12 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by cdapio.

the class PipelineAction method run.

@Override
public void run() throws Exception {
    CustomActionContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
    Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
    ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
    if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
        action.run(actionContext);
    }
    WorkflowToken token = context.getWorkflowToken();
    if (token == null) {
        throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
    }
    for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
        token.put(entry.getKey(), entry.getValue());
    }
}
Also used : Action(io.cdap.cdap.etl.api.action.Action) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) ActionContext(io.cdap.cdap.etl.api.action.ActionContext) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext)

Example 13 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by cdapio.

the class PipelinePlannerTest method testMultipleConditions.

@Test
public void testMultipleConditions() {
    /*
      n1 - n2 - condition1 - n3 - n4 - condition2 - n5 - condition3 - n6
                    |                       |                   |
                    |--n10                  |---condition4 - n8 |------n7
                                                  |
                                                  |----condition5 - n9
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition1", CONDITION1).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("condition2", CONDITION2).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("condition3", CONDITION3).build(), StageSpec.builder("n6", NODE).build(), StageSpec.builder("condition4", CONDITION4).build(), StageSpec.builder("n7", NODE).build(), StageSpec.builder("condition5", CONDITION5).build(), StageSpec.builder("n8", NODE).build(), StageSpec.builder("n9", NODE).build(), StageSpec.builder("n10", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition1"), new Connection("condition1", "n3", true), new Connection("condition1", "n10", false), new Connection("n3", "n4"), new Connection("n4", "condition2"), new Connection("condition2", "n5", true), new Connection("n5", "condition3"), new Connection("condition3", "n6", true), new Connection("condition3", "n7", false), new Connection("condition2", "condition4", false), new Connection("condition4", "n8", true), new Connection("condition4", "condition5", false), new Connection("condition5", "n9", true));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION1.getType(), CONDITION2.getType(), CONDITION3.getType(), CONDITION4.getType(), CONDITION5.getType());
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
      n1--n2--condition1.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition1")));
    String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase1Name, phase1);
    /*
      condition1
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1", CONDITION1).build()).build();
    String phase2Name = "condition1";
    phases.put(phase2Name, phase2);
    /*
      condition1.connector -- n3 - n4 - condition2.connector
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition1.connector", "n3").addConnection("n3", "n4").addConnection("n4", "condition2.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition1", "n3"), new Connection("n3", "n4"), new Connection("n4", "condition2")));
    String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase3Name, phase3);
    /*
      condition1.connector -- n10
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n10", NODE).build()).addConnection("condition1.connector", "n10").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition1", "n10")));
    String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase4Name, phase4);
    /*
      condition2
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2", CONDITION2).build()).build();
    String phase5Name = "condition2";
    phases.put(phase5Name, phase5);
    /*
      condition2.connector -- n5 -- condition3.connector
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("condition2.connector", "n5").addConnection("n5", "condition3.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition2", "n5"), new Connection("n5", "condition3")));
    String phase6Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase6Name, phase6);
    /*
      condition3
     */
    PipelinePhase phase7 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition3", CONDITION3).build()).build();
    String phase7Name = "condition3";
    phases.put(phase7Name, phase7);
    /*
      condition3.connector -- n6
     */
    PipelinePhase phase8 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n6", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition3.connector", "n6").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n6")));
    String phase8Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase8Name, phase8);
    /*
      condition3.connector -- n7
     */
    PipelinePhase phase9 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n7", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition3.connector", "n7").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n7")));
    String phase9Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase9Name, phase9);
    /*
      condition4
     */
    PipelinePhase phase10 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition4", CONDITION4).build()).build();
    String phase10Name = "condition4";
    phases.put(phase10Name, phase10);
    /*
      condition4(condition2.connector) -- n8
     */
    PipelinePhase phase11 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n8", NODE).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition2.connector", "n8").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition4", "n8")));
    String phase11Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase11Name, phase11);
    /*
      condition5
     */
    PipelinePhase phase12 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition5", CONDITION5).build()).build();
    String phase12Name = "condition5";
    phases.put(phase12Name, phase12);
    /*
      condition5(condition2.connector) -- n9
     */
    PipelinePhase phase13 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n9", NODE).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition2.connector", "n9").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition5", "n9")));
    String phase13Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase13Name, phase13);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase2Name, phase3Name, true));
    phaseConnections.add(new Connection(phase2Name, phase4Name, false));
    phaseConnections.add(new Connection(phase3Name, phase5Name));
    phaseConnections.add(new Connection(phase5Name, phase6Name, true));
    phaseConnections.add(new Connection(phase6Name, phase7Name));
    phaseConnections.add(new Connection(phase7Name, phase8Name, true));
    phaseConnections.add(new Connection(phase7Name, phase9Name, false));
    phaseConnections.add(new Connection(phase5Name, phase10Name, false));
    phaseConnections.add(new Connection(phase10Name, phase11Name, true));
    phaseConnections.add(new Connection(phase10Name, phase12Name, false));
    phaseConnections.add(new Connection(phase12Name, phase13Name, true));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 14 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by cdapio.

the class PipelinePlannerTest method testSimpleCondition.

@Test
public void testSimpleCondition() {
    /*
      n1 - n2 - condition - n3
                      |
                      |---- n4
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition", CONDITION).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition"), new Connection("condition", "n3", true), new Connection("condition", "n4", false));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
      n1--n2--condition.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition")));
    String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase1Name, phase1);
    /*
      condition
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition", CONDITION).build()).build();
    String phase2Name = "condition";
    phases.put(phase2Name, phase2);
    /*
      condition.connector -- n3
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition.connector", "n3").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n3")));
    String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase3Name, phase3);
    /*
      condition.connector -- n4
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition.connector", connectorSpec("condition.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition.connector", "n4").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition", "n4")));
    String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase4Name, phase4);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase2Name, phase3Name, true));
    phaseConnections.add(new Connection(phase2Name, phase4Name, false));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 15 with PipelinePhase

use of io.cdap.cdap.etl.common.PipelinePhase in project cdap by cdapio.

the class PipelinePlannerTest method testGeneratePlan.

@Test
public void testGeneratePlan() {
    /*
             |--- n2(r) ----------|
             |                    |                                    |-- n10
        n1 --|--- n3(r) --- n5 ---|--- n6 --- n7(r) --- n8 --- n9(r) --|
             |                    |                                    |-- n11
             |--- n4(r) ----------|
     */
    // create the spec for this pipeline
    Schema schema = Schema.recordOf("stuff", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).addOutput(schema, "n2", "n3", "n4").build(), StageSpec.builder("n2", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build(), StageSpec.builder("n3", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n5").build(), StageSpec.builder("n4", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build(), StageSpec.builder("n5", NODE).addInputSchema("n3", schema).addOutput(schema, "n6").build(), StageSpec.builder("n6", NODE).addInputSchemas(ImmutableMap.of("n2", schema, "n5", schema, "n4", schema)).addOutput(schema, "n7").build(), StageSpec.builder("n7", REDUCE).addInputSchema("n6", schema).addOutput(schema, "n8").build(), StageSpec.builder("n8", NODE).addInputSchema("n7", schema).addOutput(schema, "n9").build(), StageSpec.builder("n9", REDUCE).addInputSchema("n8", schema).addOutput(schema, "n10", "n11").build(), StageSpec.builder("n10", NODE).addInputSchema("n9", schema).build(), StageSpec.builder("n11", NODE).addInputSchema("n9", schema).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n1", "n3"), new Connection("n1", "n4"), new Connection("n2", "n6"), new Connection("n3", "n5"), new Connection("n4", "n6"), new Connection("n5", "n6"), new Connection("n6", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9"), new Connection("n9", "n10"), new Connection("n9", "n11"));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE);
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
        n1 --> n1.out.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).addOutput(schema, "n2", "n3", "n4").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SINK_TYPE)).build()).addConnections("n1", ImmutableSet.of("n1.out.connector")).build();
    String phase1Name = PipelinePlanner.getPhaseName(phase1.getDag());
    phases.put(phase1Name, phase1);
    /*
        phase2:
        n1.out.connector --- n2(r) --- n6 --- n7.connector
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n2", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutput(schema, "n7").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n2").addConnection("n2", "n6").addConnection("n6", "n7.connector").build();
    String phase2Name = PipelinePlanner.getPhaseName(phase2.getDag());
    phases.put(phase2Name, phase2);
    /*
        phase3:
        n1.out.connector --- n3(r) --- n5 --- n6 --- n7.connector
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n5", NODE).addInputSchema("n3", schema).addOutput(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutput(schema, "n7").build()).addStage(StageSpec.builder("n3", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n5").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n3").addConnection("n3", "n5").addConnection("n5", "n6").addConnection("n6", "n7.connector").build();
    String phase3Name = PipelinePlanner.getPhaseName(phase3.getDag());
    phases.put(phase3Name, phase3);
    /*
        phase4:
        n1.out.connector --- n4(r) --- n6 --- n7.connector
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n4", REDUCE).addInputSchema("n1", schema).addOutput(schema, "n6").build()).addStage(StageSpec.builder("n6", NODE).addInputSchema("n2", schema).addInputSchema("n4", schema).addInputSchema("n5", schema).addOutput(schema, "n7").build()).addStage(StageSpec.builder("n1.out.connector", connectorSpec("n1.out.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SINK_TYPE)).build()).addConnection("n1.out.connector", "n4").addConnection("n4", "n6").addConnection("n6", "n7.connector").build();
    String phase4Name = PipelinePlanner.getPhaseName(phase4.getDag());
    phases.put(phase4Name, phase4);
    /*
        phase5:
        n7.connector --- n7(r) --- n8 --- n9.connector
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n8", NODE).addInputSchema("n7", schema).addOutput(schema, "n9").build()).addStage(StageSpec.builder("n7", REDUCE).addInputSchema("n6", schema).addOutput(schema, "n8").build()).addStage(StageSpec.builder("n7.connector", connectorSpec("n7", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n9.connector", connectorSpec("n9", Constants.Connector.SINK_TYPE)).build()).addConnection("n7.connector", "n7").addConnection("n7", "n8").addConnection("n8", "n9.connector").build();
    String phase5Name = PipelinePlanner.getPhaseName(phase5.getDag());
    phases.put(phase5Name, phase5);
    /*
        phase6:
                                 |-- n10
        n9.connector --- n9(r) --|
                                 |-- n11
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n10", NODE).addInputSchema("n9", schema).build()).addStage(StageSpec.builder("n11", NODE).addInputSchema("n9", schema).build()).addStage(StageSpec.builder("n9", REDUCE).addInputSchema("n8", schema).addOutput(schema, "n10", "n11").build()).addStage(StageSpec.builder("n9.connector", connectorSpec("n9", Constants.Connector.SOURCE_TYPE)).build()).addConnection("n9.connector", "n9").addConnection("n9", "n10").addConnection("n9", "n11").build();
    String phase6Name = PipelinePlanner.getPhaseName(phase6.getDag());
    phases.put(phase6Name, phase6);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase1Name, phase3Name));
    phaseConnections.add(new Connection(phase1Name, phase4Name));
    phaseConnections.add(new Connection(phase2Name, phase5Name));
    phaseConnections.add(new Connection(phase3Name, phase5Name));
    phaseConnections.add(new Connection(phase4Name, phase5Name));
    phaseConnections.add(new Connection(phase5Name, phase6Name));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) Connection(io.cdap.cdap.etl.proto.Connection) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)30 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)28 HashMap (java.util.HashMap)20 HashSet (java.util.HashSet)16 Connection (io.cdap.cdap.etl.proto.Connection)14 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)12 Test (org.junit.Test)12 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)8 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)6 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)6 Map (java.util.Map)6 ImmutableSet (com.google.common.collect.ImmutableSet)4 PluginContext (io.cdap.cdap.api.plugin.PluginContext)4 WorkflowToken (io.cdap.cdap.api.workflow.WorkflowToken)4 BasicArguments (io.cdap.cdap.etl.common.BasicArguments)4 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)4 PipelinePluginContext (io.cdap.cdap.etl.common.plugin.PipelinePluginContext)4 ArrayList (java.util.ArrayList)4 Set (java.util.Set)4 PipelinePluginInstantiator (io.cdap.cdap.etl.batch.PipelinePluginInstantiator)3