Search in sources :

Example 11 with StageSpec

use of co.cask.cdap.etl.spec.StageSpec in project cdap by caskdata.

the class PipelinePlannerTest method testMultipleActionConditions.

@Test
public void testMultipleActionConditions() {
    /*
                                                   |-- n2 -- a3
            |-- a1 --|        |-- n0 -- n1 -- c1 --|                          |-- a5 --|
        a0--|        |-- c0 --|                    |-- n3 -- c2 -- n8 -- a4 --|        |-- a7
            |-- a2 --|        |                                               |-- a6 --|
                              |        |-- n4 -- n5 -- c4 -- c5 -- n9
                              |-- c3 --|
                                       |              |-- a8
                                       |-- n6 -- n7 --|
                                                      |-- a9
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("a0", ACTION).build(), StageSpec.builder("a1", ACTION).build(), StageSpec.builder("a2", ACTION).build(), StageSpec.builder("a3", ACTION).build(), StageSpec.builder("a4", ACTION).build(), StageSpec.builder("a5", ACTION).build(), StageSpec.builder("a6", ACTION).build(), StageSpec.builder("a7", ACTION).build(), StageSpec.builder("a8", ACTION).build(), StageSpec.builder("a9", ACTION).build(), StageSpec.builder("c0", CONDITION).build(), StageSpec.builder("c1", CONDITION).build(), StageSpec.builder("c2", CONDITION).build(), StageSpec.builder("c3", CONDITION).build(), StageSpec.builder("c4", CONDITION).build(), StageSpec.builder("c5", CONDITION).build(), StageSpec.builder("n0", NODE).build(), StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("n6", NODE).build(), StageSpec.builder("n7", NODE).build(), StageSpec.builder("n8", NODE).build(), StageSpec.builder("n9", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("a0", "a1"), new Connection("a0", "a2"), new Connection("a1", "c0"), new Connection("a2", "c0"), new Connection("c0", "n0", true), new Connection("c0", "c3", false), new Connection("n0", "n1"), new Connection("n1", "c1"), new Connection("c1", "n2", true), new Connection("c1", "n3", false), new Connection("n2", "a3"), new Connection("n3", "c2"), new Connection("c2", "n8", true), new Connection("n8", "a4"), new Connection("a4", "a5"), new Connection("a4", "a6"), new Connection("a5", "a7"), new Connection("a6", "a7"), new Connection("c3", "n4", true), new Connection("c3", "n6", false), new Connection("n4", "n5"), new Connection("n5", "c4"), new Connection("c4", "c5", true), new Connection("c5", "n9", true), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9"));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), ACTION.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION.getType());
    Set<String> reduceTypes = ImmutableSet.of();
    Set<String> emptySet = ImmutableSet.of();
    Set<String> actionTypes = ImmutableSet.of(ACTION.getType());
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, actionTypes, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a0", "a2"));
    phaseConnections.add(new Connection("a1", "c0"));
    phaseConnections.add(new Connection("a2", "c0"));
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a0", "a1"));
    phaseConnections.add(new Connection("a4", "a5"));
    phaseConnections.add(new Connection("a4", "a6"));
    phaseConnections.add(new Connection("a5", "a7"));
    phaseConnections.add(new Connection("a6", "a7"));
    phaseConnections.add(new Connection("c0", "c3", false));
    phaseConnections.add(new Connection("c4", "c5", true));
    for (String action : ImmutableList.of("a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9")) {
        phases.put(action, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(action, ACTION).build()).build());
    }
    for (String condition : ImmutableList.of("c0", "c1", "c2", "c3", "c4", "c5")) {
        phases.put(condition, PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder(condition, CONDITION).build()).build());
    }
    // [c0] --true-->  [c0 -- n0 -- n1 -- c1]
    PipelinePhase phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n0", NODE).build()).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n0", "n1").addConnection("n1", "c1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c0", "n0"), new Connection("n0", "n1"), new Connection("n1", "c1")));
    String phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c0", phaseName, true));
    // [c0 -- n0 -- n1 -- c1] --> [c1]
    phaseConnections.add(new Connection(phaseName, "c1"));
    // [c1] --true--> [c1 -- n2 -- a3]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("c1.connector", "n2").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c1", "n2"), new Connection("n2", "a3")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, true));
    // [c1 -- n2 -- a3] -- [a3]
    phaseConnections.add(new Connection(phaseName, "a3"));
    // [c1] --false--> [c1 -- n3 -- c2]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c1.connector", connectorSpec("c1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("c1.connector", "n3").addConnection("n3", "c2.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c1", "n3"), new Connection("n3", "c2")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c1", phaseName, false));
    // [c1.connector -- n3 -- c2.connector] --> [c2]
    phaseConnections.add(new Connection(phaseName, "c2"));
    // [c2] --true--> [c2 -- n8 -- a4]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c2.connector", connectorSpec("c2.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n8", NODE).build()).addConnection("c2.connector", "n8").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c2", "n8"), new Connection("n8", "a4")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c2", phaseName, true));
    // [c2 -- n8 -- a4] --> [a4]
    phaseConnections.add(new Connection(phaseName, "a4"));
    // [c3] --true--> [c3 -- n4 -- n5 -- c4]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n4", NODE).build()).addStage(StageSpec.builder("n5", NODE).build()).addStage(StageSpec.builder("c4.connector", connectorSpec("c4.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n4", "n5").addConnection("n5", "c4.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c3", "n4"), new Connection("n4", "n5"), new Connection("n5", "c4")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c3", phaseName, true));
    // [c3 -- n4 -- n5 -- c4] --> c4
    phaseConnections.add(new Connection(phaseName, "c4"));
    // [c5] --true--> [c5 (via c4.connector) -- n9]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("c4.connector", connectorSpec("c4.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n9", NODE).build()).addConnection("c4.connector", "n9").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c5", "n9")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c5", phaseName, true));
    // [c3] --false--> [c3 -- n6 -- n7 -- a8, a9]
    phase = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n6", NODE).build()).addStage(StageSpec.builder("n7", NODE).build()).addConnection("n6", "n7").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("c3", "n6"), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9")));
    phaseName = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phaseName, phase);
    phaseConnections.add(new Connection("c3", phaseName, false));
    // [c3 -- n6 -- n7 -- a8, a9] --> [a8]
    // [c3 -- n6 -- n7 -- a8, a9] --> [a9]
    phaseConnections.add(new Connection(phaseName, "a8"));
    phaseConnections.add(new Connection(phaseName, "a9"));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(co.cask.cdap.etl.proto.Connection) PipelineSpec(co.cask.cdap.etl.spec.PipelineSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with StageSpec

use of co.cask.cdap.etl.spec.StageSpec in project cdap by caskdata.

the class PipelinePlannerTest method testMultipleConditions.

@Test
public void testMultipleConditions() throws Exception {
    /*
      n1 - n2 - condition1 - n3 - n4 - condition2 - n5 - condition3 - n6
                    |                       |                   |
                    |--n10                  |---condition4 - n8 |------n7
                                                  |
                                                  |----condition5 - n9
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition1", CONDITION1).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("n4", NODE).build(), StageSpec.builder("condition2", CONDITION2).build(), StageSpec.builder("n5", NODE).build(), StageSpec.builder("condition3", CONDITION3).build(), StageSpec.builder("n6", NODE).build(), StageSpec.builder("condition4", CONDITION4).build(), StageSpec.builder("n7", NODE).build(), StageSpec.builder("condition5", CONDITION5).build(), StageSpec.builder("n8", NODE).build(), StageSpec.builder("n9", NODE).build(), StageSpec.builder("n10", NODE).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition1"), new Connection("condition1", "n3", true), new Connection("condition1", "n10", false), new Connection("n3", "n4"), new Connection("n4", "condition2"), new Connection("condition2", "n5", true), new Connection("n5", "condition3"), new Connection("condition3", "n6", true), new Connection("condition3", "n7", false), new Connection("condition2", "condition4", false), new Connection("condition4", "n8", true), new Connection("condition4", "condition5", false), new Connection("condition5", "n9", true));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION1.getType(), CONDITION2.getType(), CONDITION3.getType(), CONDITION4.getType(), CONDITION5.getType());
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
      n1--n2--condition1.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("n2", NODE).build()).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "n2").addConnection("n2", "condition1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "condition1")));
    String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase1Name, phase1);
    /*
      condition1
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1", CONDITION1).build()).build();
    String phase2Name = "condition1";
    phases.put(phase2Name, phase2);
    /*
      condition1.connector -- n3 - n4 - condition2.connector
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SINK_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addStage(StageSpec.builder("n4", NODE).build()).addConnection("condition1.connector", "n3").addConnection("n3", "n4").addConnection("n4", "condition2.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition1", "n3"), new Connection("n3", "n4"), new Connection("n4", "condition2")));
    String phase3Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase3Name, phase3);
    /*
      condition1.connector -- n10
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n10", NODE).build()).addConnection("condition1.connector", "n10").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition1", "n10")));
    String phase4Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase4Name, phase4);
    /*
      condition2
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2", CONDITION2).build()).build();
    String phase5Name = "condition2";
    phases.put(phase5Name, phase5);
    /*
      condition2.connector -- n5 -- condition3.connector
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n5", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("condition2.connector", "n5").addConnection("n5", "condition3.connector").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition2", "n5"), new Connection("n5", "condition3")));
    String phase6Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase6Name, phase6);
    /*
      condition3
     */
    PipelinePhase phase7 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition3", CONDITION3).build()).build();
    String phase7Name = "condition3";
    phases.put(phase7Name, phase7);
    /*
      condition3.connector -- n6
     */
    PipelinePhase phase8 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n6", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition3.connector", "n6").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n6")));
    String phase8Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase8Name, phase8);
    /*
      condition3.connector -- n7
     */
    PipelinePhase phase9 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n7", NODE).build()).addStage(StageSpec.builder("condition3.connector", connectorSpec("condition3.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition3.connector", "n7").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n7")));
    String phase9Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase9Name, phase9);
    /*
      condition4
     */
    PipelinePhase phase10 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition4", CONDITION4).build()).build();
    String phase10Name = "condition4";
    phases.put(phase10Name, phase10);
    /*
      condition4(condition2.connector) -- n8
     */
    PipelinePhase phase11 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n8", NODE).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition2.connector", "n8").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition4", "n8")));
    String phase11Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase11Name, phase11);
    /*
      condition5
     */
    PipelinePhase phase12 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition5", CONDITION5).build()).build();
    String phase12Name = "condition5";
    phases.put(phase12Name, phase12);
    /*
      condition5(condition2.connector) -- n9
     */
    PipelinePhase phase13 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n9", NODE).build()).addStage(StageSpec.builder("condition2.connector", connectorSpec("condition2.connector", Constants.Connector.SOURCE_TYPE)).build()).addConnection("condition2.connector", "n9").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition5", "n9")));
    String phase13Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase13Name, phase13);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase2Name, phase3Name, true));
    phaseConnections.add(new Connection(phase2Name, phase4Name, false));
    phaseConnections.add(new Connection(phase3Name, phase5Name));
    phaseConnections.add(new Connection(phase5Name, phase6Name, true));
    phaseConnections.add(new Connection(phase6Name, phase7Name));
    phaseConnections.add(new Connection(phase7Name, phase8Name, true));
    phaseConnections.add(new Connection(phase7Name, phase9Name, false));
    phaseConnections.add(new Connection(phase5Name, phase10Name, false));
    phaseConnections.add(new Connection(phase10Name, phase11Name, true));
    phaseConnections.add(new Connection(phase10Name, phase12Name, false));
    phaseConnections.add(new Connection(phase12Name, phase13Name, true));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(co.cask.cdap.etl.proto.Connection) PipelineSpec(co.cask.cdap.etl.spec.PipelineSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 13 with StageSpec

use of co.cask.cdap.etl.spec.StageSpec in project cdap by caskdata.

the class PipelinePlannerTest method testConditionsToConditions.

@Test
public void testConditionsToConditions() throws Exception {
    /*
      n1 - c1----c2---n2
           |
           |-----c3---n3
     */
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("n1", NODE).build(), StageSpec.builder("n2", NODE).build(), StageSpec.builder("condition1", CONDITION1).build(), StageSpec.builder("n3", NODE).build(), StageSpec.builder("condition2", CONDITION2).build(), StageSpec.builder("condition3", CONDITION3).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "condition1"), new Connection("condition1", "condition2", true), new Connection("condition1", "condition3", false), new Connection("condition2", "n2", true), new Connection("condition3", "n3", true));
    Set<String> pluginTypes = ImmutableSet.of(NODE.getType(), REDUCE.getType(), Constants.Connector.PLUGIN_TYPE, CONDITION1.getType(), CONDITION2.getType(), CONDITION3.getType(), CONDITION4.getType(), CONDITION5.getType());
    Set<String> reduceTypes = ImmutableSet.of(REDUCE.getType());
    Set<String> emptySet = ImmutableSet.of();
    PipelinePlanner planner = new PipelinePlanner(pluginTypes, reduceTypes, emptySet, emptySet, emptySet);
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    Map<String, PipelinePhase> phases = new HashMap<>();
    /*
      n1--condition1.connector
     */
    PipelinePhase phase1 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("n1", NODE).build()).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SINK_TYPE)).build()).addConnection("n1", "condition1.connector").build();
    Dag controlPhaseDag = new Dag(ImmutableSet.of(new Connection("n1", "condition1")));
    String phase1Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase1Name, phase1);
    /*
      condition1
     */
    PipelinePhase phase2 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1", CONDITION1).build()).build();
    String phase2Name = "condition1";
    phases.put(phase2Name, phase2);
    /*
      condition2
     */
    PipelinePhase phase3 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition2", CONDITION2).build()).build();
    String phase3Name = "condition2";
    phases.put(phase3Name, phase3);
    /*
      condition3
     */
    PipelinePhase phase4 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition3", CONDITION3).build()).build();
    String phase4Name = "condition3";
    phases.put(phase4Name, phase4);
    /*
      condition1.connector -- n2
     */
    PipelinePhase phase5 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n2", NODE).build()).addConnection("condition1.connector", "n2").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition2", "n2")));
    String phase5Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase5Name, phase5);
    /*
      condition1.connector -- n3
     */
    PipelinePhase phase6 = PipelinePhase.builder(pluginTypes).addStage(StageSpec.builder("condition1.connector", connectorSpec("condition1.connector", Constants.Connector.SOURCE_TYPE)).build()).addStage(StageSpec.builder("n3", NODE).build()).addConnection("condition1.connector", "n3").build();
    controlPhaseDag = new Dag(ImmutableSet.of(new Connection("condition3", "n3")));
    String phase6Name = PipelinePlanner.getPhaseName(controlPhaseDag);
    phases.put(phase6Name, phase6);
    Set<Connection> phaseConnections = new HashSet<>();
    phaseConnections.add(new Connection(phase1Name, phase2Name));
    phaseConnections.add(new Connection(phase2Name, phase3Name, true));
    phaseConnections.add(new Connection(phase2Name, phase4Name, false));
    phaseConnections.add(new Connection(phase3Name, phase5Name, true));
    phaseConnections.add(new Connection(phase4Name, phase6Name, true));
    PipelinePlan expected = new PipelinePlan(phases, phaseConnections);
    PipelinePlan actual = planner.plan(pipelineSpec);
    Assert.assertEquals(expected, actual);
}
Also used : HashMap(java.util.HashMap) Connection(co.cask.cdap.etl.proto.Connection) PipelineSpec(co.cask.cdap.etl.spec.PipelineSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 14 with StageSpec

use of co.cask.cdap.etl.spec.StageSpec in project cdap by caskdata.

the class PipelinePlanner method plan.

/**
 * Create an execution plan for the given logical pipeline. This is used for batch pipelines.
 * Though it may eventually be useful to mark windowing points for realtime pipelines.
 *
 * A plan consists of one or more phases, with connections between phases.
 * A connection between a phase indicates control flow, and not necessarily
 * data flow. This class assumes that it receives a valid pipeline spec.
 * That is, the pipeline has no cycles, all its nodes have unique names,
 * sources don't have any input, sinks don't have any output,
 * everything else has both an input and an output, etc.
 *
 * We start by inserting connector nodes into the logical dag,
 * which are used to mark boundaries between mapreduce jobs.
 * Each connector represents a node where we will need to write to a local dataset.
 *
 * Next, the logical pipeline is broken up into phases,
 * using the connectors as sinks in one phase, and a source in another.
 * After this point, connections between phases do not indicate data flow, but control flow.
 *
 * @param spec the pipeline spec, representing a logical pipeline
 * @return the execution plan
 */
public PipelinePlan plan(PipelineSpec spec) {
    // go through the stages and examine their plugin type to determine which stages are reduce stages
    Set<String> reduceNodes = new HashSet<>();
    Set<String> isolationNodes = new HashSet<>();
    Set<String> actionNodes = new HashSet<>();
    Set<String> multiPortNodes = new HashSet<>();
    Set<String> allNodes = new HashSet<>();
    // Map to hold the connection information from condition nodes to the first stage
    // they connect to. Condition information also includes whether the stage is connected
    // on the 'true' branch or the 'false' branch
    Map<String, ConditionBranches> conditionBranches = new HashMap<>();
    Map<String, Set<String>> conditionOutputs = new HashMap<>();
    Map<String, Set<String>> conditionInputs = new HashMap<>();
    Map<String, StageSpec> specs = new HashMap<>();
    for (StageSpec stage : spec.getStages()) {
        String pluginType = stage.getPlugin().getType();
        allNodes.add(stage.getName());
        if (reduceTypes.contains(pluginType)) {
            reduceNodes.add(stage.getName());
        }
        if (isolationTypes.contains(pluginType)) {
            isolationNodes.add(stage.getName());
        }
        if (actionTypes.contains(pluginType)) {
            // Collect all Action nodes from spec
            actionNodes.add(stage.getName());
        }
        if (multiPortTypes.contains(pluginType)) {
            multiPortNodes.add(stage.getName());
        }
        if (Condition.PLUGIN_TYPE.equals(pluginType)) {
            conditionBranches.put(stage.getName(), new ConditionBranches(null, null));
            conditionOutputs.put(stage.getName(), new HashSet<String>());
            conditionInputs.put(stage.getName(), new HashSet<String>());
        }
        specs.put(stage.getName(), stage);
    }
    // Special case for action nodes when there is no connection between them
    if (spec.getConnections().isEmpty()) {
        // All nodes should be actions
        if (!actionNodes.containsAll(allNodes)) {
            throw new IllegalStateException("No connections are specified.");
        }
        Map<String, PipelinePhase> phases = new HashMap<>();
        for (String actionNode : actionNodes) {
            PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(supportedPluginTypes);
            PipelinePhase actionPhase = phaseBuilder.addStage(specs.get(actionNode)).build();
            phases.put(actionNode, actionPhase);
        }
        return new PipelinePlan(phases, new HashSet<Connection>());
    }
    // Set representing control nodes (Conditions and Actions)
    Set<String> controlNodes = Sets.union(actionNodes, conditionBranches.keySet());
    Map<String, String> conditionChildToParent = new HashMap<>();
    for (Connection connection : spec.getConnections()) {
        if (conditionBranches.containsKey(connection.getFrom())) {
            conditionOutputs.get(connection.getFrom()).add(connection.getTo());
        }
        if (conditionBranches.containsKey(connection.getTo())) {
            conditionInputs.get(connection.getTo()).add(connection.getFrom());
        }
        if (conditionBranches.containsKey(connection.getFrom())) {
            if (conditionBranches.containsKey(connection.getTo())) {
                // conditions are chained
                conditionChildToParent.put(connection.getTo(), connection.getFrom());
            }
            // Outgoing connection from condition
            ConditionBranches branches = conditionBranches.get(connection.getFrom());
            String trueOutput;
            String falseOutput;
            if (connection.getCondition()) {
                trueOutput = connection.getTo();
                falseOutput = branches.getFalseOutput();
            } else {
                trueOutput = branches.getTrueOutput();
                falseOutput = connection.getTo();
            }
            conditionBranches.put(connection.getFrom(), new ConditionBranches(trueOutput, falseOutput));
        }
    }
    Map<String, String> connectorNodes = new HashMap<>();
    // now split the logical pipeline into pipeline phases, using the connectors as split points
    Set<Dag> splittedDag = split(spec.getConnections(), conditionBranches.keySet(), reduceNodes, isolationNodes, actionNodes, multiPortNodes, connectorNodes);
    Map<String, String> controlConnectors = getConnectorsAssociatedWithConditions(conditionBranches.keySet(), conditionChildToParent, conditionInputs, conditionOutputs, actionNodes);
    Map<String, Dag> subdags = new HashMap<>();
    for (Dag subdag : splittedDag) {
        subdags.put(getPhaseName(subdag), subdag);
    }
    // build connections between phases and convert dags to PipelinePhase.
    Set<Connection> phaseConnections = new HashSet<>();
    Map<String, PipelinePhase> phases = new HashMap<>();
    for (Map.Entry<String, Dag> dagEntry1 : subdags.entrySet()) {
        String dag1Name = dagEntry1.getKey();
        Dag dag1 = dagEntry1.getValue();
        // convert the dag to a PipelinePhase
        // add a separate pipeline phase for each control node in the subdag
        Set<String> dag1ControlNodes = Sets.intersection(controlNodes, dag1.getNodes());
        for (String dag1ControlNode : dag1ControlNodes) {
            if (!phases.containsKey(dag1ControlNode)) {
                phases.put(dag1ControlNode, PipelinePhase.builder(supportedPluginTypes).addStage(specs.get(dag1ControlNode)).build());
            }
        }
        // if there are non-control nodes in the subdag, add a pipeline phase for it
        if (!controlNodes.containsAll(dag1.getNodes())) {
            // the updated dag replaces conditions with the corresponding connector if applicable.
            Dag updatedDag = getUpdatedDag(dag1, controlConnectors);
            // Remove any control nodes from this dag
            if (!Sets.intersection(updatedDag.getNodes(), controlNodes).isEmpty()) {
                Set<String> nodes = Sets.difference(updatedDag.getNodes(), controlNodes);
                updatedDag = updatedDag.createSubDag(nodes);
            }
            phases.put(dag1Name, dagToPipeline(updatedDag, connectorNodes, specs, controlConnectors));
        }
        for (String controlSource : Sets.intersection(controlNodes, dag1.getSources())) {
            ConditionBranches branches = conditionBranches.get(controlSource);
            Boolean condition = branches == null ? null : dag1.getNodes().contains(branches.getTrueOutput());
            for (String output : dag1.getNodeOutputs(controlSource)) {
                if (controlNodes.contains(output)) {
                    // control source -> control node, add a phase connection between the control phases
                    phaseConnections.add(new Connection(controlSource, output, condition));
                } else {
                    // control source -> non-control nodes, add a phase connection from the control phase to this dag
                    phaseConnections.add(new Connection(controlSource, dag1Name, condition));
                }
            }
        }
        // from this dag to the control phase
        for (String controlSink : Sets.intersection(controlNodes, dag1.getSinks())) {
            for (String input : dag1.getNodeInputs(controlSink)) {
                if (controlNodes.contains(input)) {
                    // control node -> control-sink, add a phase connection between the control phases
                    ConditionBranches branches = conditionBranches.get(input);
                    Boolean condition = branches == null ? null : dag1.getNodes().contains(branches.getTrueOutput());
                    phaseConnections.add(new Connection(input, controlSink, condition));
                } else {
                    // non-control node -> control-sink, add a phase connection from this dag to the control phase
                    phaseConnections.add(new Connection(dag1Name, controlSink));
                }
            }
        }
        // find connected subdags (they have a source that is a sink in dag1)
        Set<String> nonControlSinks = Sets.difference(dag1.getSinks(), controlNodes);
        for (Map.Entry<String, Dag> dagEntry2 : subdags.entrySet()) {
            String dag2Name = dagEntry2.getKey();
            Dag dag2 = dagEntry2.getValue();
            if (dag1Name.equals(dag2Name)) {
                continue;
            }
            if (!Sets.intersection(nonControlSinks, dag2.getSources()).isEmpty()) {
                phaseConnections.add(new Connection(dag1Name, dag2Name));
            }
        }
    }
    return new PipelinePlan(phases, phaseConnections);
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) HashMap(java.util.HashMap) StageSpec(co.cask.cdap.etl.spec.StageSpec) HashSet(java.util.HashSet) Connection(co.cask.cdap.etl.proto.Connection) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 15 with StageSpec

use of co.cask.cdap.etl.spec.StageSpec in project cdap by caskdata.

the class PipelinePlanner method dagToPipeline.

/**
   * Converts a Dag into a PipelinePhase, using what we know about the plugin type of each node in the dag.
   * The PipelinePhase is what programs will take as input, and keeps track of sources, transforms, sinks, etc.
   *
   * @param pipelineSpec the overall pipeline spec
   * @param dag the dag to convert
   * @param connectors connector nodes across all dags
   * @param specs specifications for every stage
   * @return the converted dag
   */
private PipelinePhase dagToPipeline(PipelineSpec pipelineSpec, Dag dag, Set<String> connectors, Map<String, StageSpec> specs) {
    PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(supportedPluginTypes);
    for (String stageName : dag.getTopologicalOrder()) {
        Set<String> outputs = dag.getNodeOutputs(stageName);
        if (!outputs.isEmpty()) {
            phaseBuilder.addConnections(stageName, outputs);
        }
        // add connectors
        if (connectors.contains(stageName)) {
            phaseBuilder.addStage(StageInfo.builder(stageName, Constants.CONNECTOR_TYPE).build());
            continue;
        }
        // add other plugin types
        StageSpec spec = specs.get(stageName);
        String pluginType = spec.getPlugin().getType();
        phaseBuilder.addStage(StageInfo.builder(stageName, pluginType).addInputs(spec.getInputs()).addInputSchemas(spec.getInputSchemas()).addOutputs(spec.getOutputs()).setOutputSchema(spec.getOutputSchema()).setErrorSchema(spec.getErrorSchema()).setErrorDatasetName(spec.getErrorDatasetName()).setStageLoggingEnabled(pipelineSpec.isStageLoggingEnabled()).setProcessTimingEnabled(pipelineSpec.isProcessTimingEnabled()).build());
    }
    return phaseBuilder.build();
}
Also used : PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec)

Aggregations

StageSpec (co.cask.cdap.etl.spec.StageSpec)27 HashMap (java.util.HashMap)20 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)15 Map (java.util.Map)10 PipelineRuntime (co.cask.cdap.etl.common.PipelineRuntime)8 BatchPhaseSpec (co.cask.cdap.etl.batch.BatchPhaseSpec)7 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)7 Connection (co.cask.cdap.etl.proto.Connection)7 HashSet (java.util.HashSet)7 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)6 PipelinePluginContext (co.cask.cdap.etl.common.plugin.PipelinePluginContext)5 PipelineSpec (co.cask.cdap.etl.spec.PipelineSpec)5 TransactionPolicy (co.cask.cdap.api.annotation.TransactionPolicy)4 PluginContext (co.cask.cdap.api.plugin.PluginContext)4 WorkflowToken (co.cask.cdap.api.workflow.WorkflowToken)4 LinkedHashMap (java.util.LinkedHashMap)4 Test (org.junit.Test)4 DatasetContext (co.cask.cdap.api.data.DatasetContext)2 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)2 SparkClientContext (co.cask.cdap.api.spark.SparkClientContext)2