Search in sources :

Example 81 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by cdapio.

the class LineageOperationProcessorTest method testMergeOperationsNonRepeat.

@Test
public void testMergeOperationsNonRepeat() {
    // n1 -> n3 ----
    // |---- n5
    // n2 -> n4 ----
    // operations (n1) -> (id, name)
    // (n3) -> (body, offset)
    // (n2.id) -> id
    // (n2.name) -> name
    // (n4.body) -> (id, name)
    // (n5) -> (id, name)
    Set<Connection> connections = new HashSet<>();
    connections.add(new Connection("n1", "n3"));
    connections.add(new Connection("n3", "n5"));
    connections.add(new Connection("n2", "n4"));
    connections.add(new Connection("n4", "n5"));
    EndPoint src1 = EndPoint.of("default", "n1");
    EndPoint src2 = EndPoint.of("default", "n2");
    EndPoint dest = EndPoint.of("default", "n5");
    Map<String, List<FieldOperation>> stageOperations = new HashMap<>();
    stageOperations.put("n1", Collections.singletonList(new FieldReadOperation("read1", "read description", src1, "id", "name")));
    stageOperations.put("n2", Collections.singletonList(new FieldReadOperation("read2", "read description", src2, "body", "offset")));
    List<FieldOperation> n3Operations = stageOperations.computeIfAbsent("n3", k -> new ArrayList<>());
    n3Operations.add(new FieldTransformOperation("identity1", "identity", Collections.singletonList("id"), "id"));
    n3Operations.add(new FieldTransformOperation("identity2", "identity", Collections.singletonList("name"), "name"));
    stageOperations.put("n4", Collections.singletonList(new FieldTransformOperation("generate", "generate", Collections.singletonList("body"), "id", "name")));
    stageOperations.put("n5", Collections.singletonList(new FieldWriteOperation("write", "write", dest, "id", "name")));
    LineageOperationsProcessor processor = new LineageOperationsProcessor(connections, stageOperations, Collections.emptySet());
    Set<Operation> expectedOperations = new HashSet<>();
    expectedOperations.add(new ReadOperation("n1.read1", "read description", src1, "id", "name"));
    expectedOperations.add(new ReadOperation("n2.read2", "read description", src2, "body", "offset"));
    expectedOperations.add(new TransformOperation("n3.identity1", "identity", Collections.singletonList(InputField.of("n1.read1", "id")), "id"));
    expectedOperations.add(new TransformOperation("n3.identity2", "identity", Collections.singletonList(InputField.of("n1.read1", "name")), "name"));
    expectedOperations.add(new TransformOperation("n4.generate", "generate", Collections.singletonList(InputField.of("n2.read2", "body")), "id", "name"));
    expectedOperations.add(new TransformOperation("n3,n4.merge.id", "Merged stages: n3,n4", Arrays.asList(InputField.of("n3.identity1", "id"), InputField.of("n4.generate", "id")), "id"));
    expectedOperations.add(new TransformOperation("n3,n4.merge.name", "Merged stages: n3,n4", Arrays.asList(InputField.of("n3.identity2", "name"), InputField.of("n4.generate", "name")), "name"));
    expectedOperations.add(new TransformOperation("n3,n4.merge.body", "Merged stages: n3,n4", Collections.singletonList(InputField.of("n2.read2", "body")), "body"));
    expectedOperations.add(new TransformOperation("n3,n4.merge.offset", "Merged stages: n3,n4", Collections.singletonList(InputField.of("n2.read2", "offset")), "offset"));
    expectedOperations.add(new WriteOperation("n5.write", "write", dest, Arrays.asList(InputField.of("n3,n4.merge.id", "id"), InputField.of("n3,n4.merge.name", "name"))));
    Set<Operation> process = processor.process();
    Assert.assertEquals(expectedOperations, process);
}
Also used : ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) FieldTransformOperation(io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) FieldTransformOperation(io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) FieldTransformOperation(io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 82 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by cdapio.

the class DagTest method testTopologicalOrder.

@Test
public void testTopologicalOrder() {
    // n1 -> n2 -> n3 -> n4
    Dag dag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "n3"), new Connection("n3", "n4")));
    Assert.assertEquals(ImmutableList.of("n1", "n2", "n3", "n4"), dag.getTopologicalOrder());
    /*
             |--- n2 ---|
        n1 --|          |-- n4
             |--- n3 ---|
     */
    dag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n1", "n3"), new Connection("n2", "n4"), new Connection("n3", "n4")));
    // could be n1 -> n2 -> n3 -> n4
    // or it could be n1 -> n3 -> n2 -> n4
    List<String> linearized = dag.getTopologicalOrder();
    Assert.assertEquals("n1", linearized.get(0));
    Assert.assertEquals("n4", linearized.get(3));
    assertBefore(linearized, "n1", "n2");
    assertBefore(linearized, "n1", "n3");
    /*
        n1 --|
             |--- n3
        n2 --|
     */
    dag = new Dag(ImmutableSet.of(new Connection("n1", "n3"), new Connection("n2", "n3")));
    // could be n1 -> n2 -> n3
    // or it could be n2 -> n1 -> n3
    linearized = dag.getTopologicalOrder();
    Assert.assertEquals("n3", linearized.get(2));
    assertBefore(linearized, "n1", "n3");
    assertBefore(linearized, "n2", "n3");
    /*
                                     |--- n3
             |--- n2 ----------------|
        n1 --|       |               |--- n5
             |--------- n4 ----------|
             |              |        |
             |---------------- n6 ---|

        vertical arrows are pointing down
     */
    dag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n1", "n4"), new Connection("n1", "n6"), new Connection("n2", "n3"), new Connection("n2", "n4"), new Connection("n2", "n5"), new Connection("n4", "n3"), new Connection("n4", "n5"), new Connection("n4", "n6"), new Connection("n6", "n3"), new Connection("n6", "n5")));
    linearized = dag.getTopologicalOrder();
    Assert.assertEquals("n1", linearized.get(0));
    Assert.assertEquals("n2", linearized.get(1));
    Assert.assertEquals("n4", linearized.get(2));
    Assert.assertEquals("n6", linearized.get(3));
    assertBefore(linearized, "n6", "n3");
    assertBefore(linearized, "n6", "n5");
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) Test(org.junit.Test)

Example 83 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by cdapio.

the class DagTest method testSplitByControlNodes.

@Test
public void testSplitByControlNodes() {
    // In following test cases note that Action nodes are named as (a0, a1...) and condition nodes are named
    // as (c0, c1, ..)
    // Test condition in the beginning and one branch connects to the action.
    // c1 --> a1 --> n1 --> n2
    // |
    // | --> n3 --> n4 --> a2
    Dag dag = new Dag(ImmutableSet.of(new Connection("c1", "a1"), new Connection("a1", "n1"), new Connection("n1", "n2"), new Connection("c1", "n3"), new Connection("n3", "n4"), new Connection("n4", "a2")));
    Set<Dag> actual = dag.splitByControlNodes(ImmutableSet.of("c1"), ImmutableSet.of("a1", "a2"));
    Set<Dag> expectedDags = new HashSet<>();
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c1", "a1"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("a1", "n1"), new Connection("n1", "n2"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c1", "n3"), new Connection("n3", "n4"), new Connection("n4", "a2"))));
    Assert.assertEquals(expectedDags, actual);
    // Test condition in the end and branches connects to the Action.
    // n0-->n1--c0-->n2-->c1-->a1
    // |
    // |-->a2
    dag = new Dag(ImmutableSet.of(new Connection("n0", "n1"), new Connection("n1", "c0"), new Connection("c0", "n2"), new Connection("n2", "c1"), new Connection("c1", "a1"), new Connection("c1", "a2")));
    actual = dag.splitByControlNodes(ImmutableSet.of("c0", "c1"), ImmutableSet.of("a1", "a2"));
    expectedDags.clear();
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("n0", "n1"), new Connection("n1", "c0"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c0", "n2"), new Connection("n2", "c1"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c1", "a2"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c1", "a1"))));
    Assert.assertEquals(expectedDags, actual);
    // Test Actions in the beginning and connects to the Condition.
    // a1 - a2 - c1 - n0 - n1
    // |
    // a0 --
    dag = new Dag(ImmutableSet.of(new Connection("a0", "a2"), new Connection("a1", "a2"), new Connection("a2", "c1"), new Connection("c1", "n0"), new Connection("n0", "n1")));
    actual = dag.splitByControlNodes(ImmutableSet.of("c1"), ImmutableSet.of("a0", "a1", "a2"));
    expectedDags.clear();
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("a0", "a2"), new Connection("a1", "a2"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("a2", "c1"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c1", "n0"), new Connection("n0", "n1"))));
    Assert.assertEquals(expectedDags, actual);
    // Tests Actions in the beginning and connect to the Condition through other plugin
    // a1 - n0 - c1 - n1
    // |
    // a0 --
    dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("a1", "n0"), new Connection("n0", "c1"), new Connection("c1", "n1")));
    actual = dag.splitByControlNodes(ImmutableSet.of("c1"), ImmutableSet.of("a0", "a1"));
    expectedDags.clear();
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("a1", "n0"), new Connection("n0", "c1"))));
    expectedDags.add(new Dag(ImmutableSet.of(new Connection("c1", "n1"))));
    Assert.assertEquals(expectedDags, actual);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 84 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by cdapio.

the class DagTest method testComplicatedSplitByControl.

@Test
public void testComplicatedSplitByControl() {
    /*
                                                   |-- n2 -- a3
            |-- a1 --|        |-- n0 -- n1 -- c1 --|                          |-- a5 --|
        a0--|        |-- c0 --|                    |-- n3 -- c2 -- n8 -- a4 --|        |-- a7
            |-- a2 --|        |                                               |-- a6 --|
                              |        |-- n4 -- n5 -- c4 -- c5 -- n9
                              |-- c3 --|
                                       |              |-- a8
                                       |-- n6 -- n7 --|
                                                      |-- a9
     */
    Dag dag = new Dag(ImmutableSet.of(new Connection("a0", "a1"), new Connection("a0", "a2"), new Connection("a1", "c0"), new Connection("a2", "c0"), new Connection("c0", "n0"), new Connection("c0", "c3"), new Connection("n0", "n1"), new Connection("n1", "c1"), new Connection("c1", "n2"), new Connection("c1", "n3"), new Connection("n2", "a3"), new Connection("n3", "c2"), new Connection("c2", "n8"), new Connection("n8", "a4"), new Connection("a4", "a5"), new Connection("a4", "a6"), new Connection("a5", "a7"), new Connection("a6", "a7"), new Connection("c3", "n4"), new Connection("c3", "n6"), new Connection("n4", "n5"), new Connection("n5", "c4"), new Connection("c4", "c5"), new Connection("c5", "n9"), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9")));
    Set<Dag> actual = dag.splitByControlNodes(ImmutableSet.of("c0", "c1", "c2", "c3", "c4", "c5"), ImmutableSet.of("a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9"));
    Set<Dag> expected = ImmutableSet.of(new Dag(ImmutableSet.of(new Connection("a0", "a1"), new Connection("a0", "a2"))), new Dag(ImmutableSet.of(new Connection("a1", "c0"), new Connection("a2", "c0"))), new Dag(ImmutableSet.of(new Connection("c0", "n0"), new Connection("n0", "n1"), new Connection("n1", "c1"))), new Dag(ImmutableSet.of(new Connection("c0", "c3"))), new Dag(ImmutableSet.of(new Connection("c1", "n2"), new Connection("n2", "a3"))), new Dag(ImmutableSet.of(new Connection("c1", "n3"), new Connection("n3", "c2"))), new Dag(ImmutableSet.of(new Connection("c2", "n8"), new Connection("n8", "a4"))), new Dag(ImmutableSet.of(new Connection("a4", "a5"), new Connection("a4", "a6"))), new Dag(ImmutableSet.of(new Connection("a5", "a7"), new Connection("a6", "a7"))), new Dag(ImmutableSet.of(new Connection("c3", "n4"), new Connection("n4", "n5"), new Connection("n5", "c4"))), new Dag(ImmutableSet.of(new Connection("c3", "n6"), new Connection("n6", "n7"), new Connection("n7", "a8"), new Connection("n7", "a9"))), new Dag(ImmutableSet.of(new Connection("c4", "c5"))), new Dag(ImmutableSet.of(new Connection("c5", "n9"))));
    Assert.assertEquals(expected, actual);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) Test(org.junit.Test)

Example 85 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by cdapio.

the class DagTest method testSubset.

@Test
public void testSubset() {
    /*
        n1 -- n2
              |
              v
        n3 -- n4 --- n8
              ^
              |
        n5-------- n6 -- n7
     */
    Dag fulldag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "n4"), new Connection("n3", "n4"), new Connection("n4", "n8"), new Connection("n5", "n4"), new Connection("n5", "n6"), new Connection("n6", "n7")));
    Dag expected = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "n4"), new Connection("n4", "n8")));
    Dag actual = fulldag.subsetFrom("n1");
    Assert.assertEquals(expected, actual);
    expected = new Dag(ImmutableSet.of(new Connection("n2", "n4"), new Connection("n4", "n8")));
    actual = fulldag.subsetFrom("n2");
    Assert.assertEquals(expected, actual);
    expected = new Dag(ImmutableSet.of(new Connection("n3", "n4"), new Connection("n4", "n8")));
    actual = fulldag.subsetFrom("n3");
    Assert.assertEquals(expected, actual);
    expected = new Dag(ImmutableSet.of(new Connection("n4", "n8"), new Connection("n5", "n4"), new Connection("n5", "n6"), new Connection("n6", "n7")));
    actual = fulldag.subsetFrom("n5");
    Assert.assertEquals(expected, actual);
    expected = new Dag(ImmutableSet.of(new Connection("n6", "n7")));
    actual = fulldag.subsetFrom("n6");
    Assert.assertEquals(expected, actual);
    // test subsets with stop nodes
    expected = new Dag(ImmutableSet.of(new Connection("n1", "n2")));
    actual = fulldag.subsetFrom("n1", ImmutableSet.of("n2"));
    Assert.assertEquals(expected, actual);
    expected = new Dag(ImmutableSet.of(new Connection("n5", "n4"), new Connection("n5", "n6")));
    actual = fulldag.subsetFrom("n5", ImmutableSet.of("n4", "n6"));
    Assert.assertEquals(expected, actual);
    /*
             |--- n2 ----------|
             |                 |                              |-- n10
        n1 --|--- n3 --- n5 ---|--- n6 --- n7 --- n8 --- n9 --|
             |                 |                              |-- n11
             |--- n4 ----------|

     */
    fulldag = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n1", "n3"), new Connection("n1", "n4"), new Connection("n2", "n6"), new Connection("n3", "n5"), new Connection("n4", "n6"), new Connection("n5", "n6"), new Connection("n6", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9"), new Connection("n9", "n10"), new Connection("n9", "n11")));
    expected = new Dag(ImmutableSet.of(new Connection("n3", "n5"), new Connection("n5", "n6"), new Connection("n6", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9")));
    actual = fulldag.subsetFrom("n3", ImmutableSet.of("n4", "n9"));
    Assert.assertEquals(expected, actual);
    expected = new Dag(ImmutableSet.of(new Connection("n2", "n6"), new Connection("n6", "n7"), new Connection("n7", "n8")));
    actual = fulldag.subsetFrom("n2", ImmutableSet.of("n4", "n8", "n1"));
    Assert.assertEquals(expected, actual);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) Test(org.junit.Test)

Aggregations

Connection (io.cdap.cdap.etl.proto.Connection)96 Test (org.junit.Test)78 HashSet (java.util.HashSet)70 HashMap (java.util.HashMap)44 ArrayList (java.util.ArrayList)32 Operation (io.cdap.cdap.api.lineage.field.Operation)28 FieldOperation (io.cdap.cdap.etl.api.lineage.field.FieldOperation)28 List (java.util.List)28 ImmutableList (com.google.common.collect.ImmutableList)26 ReadOperation (io.cdap.cdap.api.lineage.field.ReadOperation)26 TransformOperation (io.cdap.cdap.api.lineage.field.TransformOperation)26 WriteOperation (io.cdap.cdap.api.lineage.field.WriteOperation)26 FieldReadOperation (io.cdap.cdap.etl.api.lineage.field.FieldReadOperation)26 FieldWriteOperation (io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation)26 FieldTransformOperation (io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation)24 EndPoint (io.cdap.cdap.api.lineage.field.EndPoint)20 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)18 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)16 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)14 FieldLineageInfo (io.cdap.cdap.data2.metadata.lineage.field.FieldLineageInfo)8