Search in sources :

Example 66 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.

the class LineageOperationProcessorTest method testSimpleJoinWithRenameJoinKeys.

@Test
public void testSimpleJoinWithRenameJoinKeys() {
    Set<Connection> connections = new HashSet<>();
    connections.add(new Connection("n1", "n3"));
    connections.add(new Connection("n2", "n3"));
    connections.add(new Connection("n3", "n4"));
    EndPoint cEndPoint = EndPoint.of("default", "customer");
    EndPoint pEndPoint = EndPoint.of("default", "purchase");
    EndPoint cpEndPoint = EndPoint.of("default", "customer_purchase");
    // customer -> (id, name)------------
    // |
    // JOIN  ------->(id_from_customer, id_from_purchase, name, item)
    // |
    // purchase -> (customer_id, item)---
    Map<String, List<FieldOperation>> stageOperations = new HashMap<>();
    stageOperations.put("n1", Collections.singletonList(new FieldReadOperation("ReadCustomer", "read description", cEndPoint, "id", "name")));
    stageOperations.put("n2", Collections.singletonList(new FieldReadOperation("ReadPurchase", "read description", pEndPoint, "customer_id", "item")));
    List<FieldOperation> operationsFromJoin = new ArrayList<>();
    operationsFromJoin.add(new FieldTransformOperation("Join", "Join Operation", Arrays.asList("n1.id", "n2.customer_id"), Arrays.asList("id", "customer_id")));
    operationsFromJoin.add(new FieldTransformOperation("Rename id", "Rename id", Collections.singletonList("id"), "id_from_customer"));
    operationsFromJoin.add(new FieldTransformOperation("Rename customer_id", "Rename customer_id", Collections.singletonList("customer_id"), "id_from_purchase"));
    operationsFromJoin.add(new FieldTransformOperation("Identity name", "Identity Operation", Collections.singletonList("n1.name"), Collections.singletonList("name")));
    operationsFromJoin.add(new FieldTransformOperation("Identity item", "Identity Operation", Collections.singletonList("n2.item"), Collections.singletonList("item")));
    stageOperations.put("n3", operationsFromJoin);
    stageOperations.put("n4", Collections.singletonList(new FieldWriteOperation("Write", "write description", cpEndPoint, "id_from_customer", "id_from_purchase", "name", "item")));
    LineageOperationsProcessor processor = new LineageOperationsProcessor(connections, stageOperations, Collections.singleton("n3"));
    Set<Operation> processedOperations = processor.process();
    Set<Operation> expectedOperations = new HashSet<>();
    expectedOperations.add(new ReadOperation("n1.ReadCustomer", "read description", cEndPoint, "id", "name"));
    expectedOperations.add(new ReadOperation("n2.ReadPurchase", "read description", pEndPoint, "customer_id", "item"));
    expectedOperations.add(new TransformOperation("n3.Join", "Join Operation", Arrays.asList(InputField.of("n1.ReadCustomer", "id"), InputField.of("n2.ReadPurchase", "customer_id")), "id", "customer_id"));
    expectedOperations.add(new TransformOperation("n3.Rename id", "Rename id", Collections.singletonList(InputField.of("n3.Join", "id")), "id_from_customer"));
    expectedOperations.add(new TransformOperation("n3.Rename customer_id", "Rename customer_id", Collections.singletonList(InputField.of("n3.Join", "customer_id")), "id_from_purchase"));
    expectedOperations.add(new TransformOperation("n3.Identity name", "Identity Operation", Collections.singletonList(InputField.of("n1.ReadCustomer", "name")), "name"));
    expectedOperations.add(new TransformOperation("n3.Identity item", "Identity Operation", Collections.singletonList(InputField.of("n2.ReadPurchase", "item")), "item"));
    expectedOperations.add(new WriteOperation("n4.Write", "write description", cpEndPoint, Arrays.asList(InputField.of("n3.Rename id", "id_from_customer"), InputField.of("n3.Rename customer_id", "id_from_purchase"), InputField.of("n3.Identity name", "name"), InputField.of("n3.Identity item", "item"))));
    Assert.assertEquals(expectedOperations, processedOperations);
}
Also used : ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) HashMap(java.util.HashMap) Connection(io.cdap.cdap.etl.proto.Connection) ArrayList(java.util.ArrayList) EndPoint(io.cdap.cdap.api.lineage.field.EndPoint) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) FieldTransformOperation(io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) FieldTransformOperation(io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) FieldTransformOperation(io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 67 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.

the class ConnectorDagTest method testMultipleNonNestedConditions.

@Test
public void testMultipleNonNestedConditions() {
    /*
       n1-c1-n2-n3-c2-n4
     */
    Set<Connection> connections = ImmutableSet.of(new Connection("n1", "c1"), new Connection("c1", "n2"), new Connection("n2", "n3"), new Connection("n3", "c2"), new Connection("c2", "n4"));
    Set<String> conditions = new HashSet<>(Arrays.asList("c1", "c2"));
    Set<String> reduceNodes = new HashSet<>();
    Set<String> isolationNodes = new HashSet<>();
    Set<String> multiPortNodes = new HashSet<>();
    Set<Dag> actual = PipelinePlanner.split(connections, conditions, reduceNodes, isolationNodes, EMPTY_ACTIONS, multiPortNodes, EMPTY_CONNECTORS);
    Dag dag1 = new Dag(ImmutableSet.of(new Connection("n1", "c1")));
    Dag dag2 = new Dag(ImmutableSet.of(new Connection("c1", "n2"), new Connection("n2", "n3"), new Connection("n3", "c2")));
    Dag dag3 = new Dag(ImmutableSet.of(new Connection("c2", "n4")));
    Set<Dag> expected = ImmutableSet.of(dag1, dag2, dag3);
    Assert.assertEquals(actual, expected);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 68 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.

the class ConnectorDagTest method testSimpleCondition.

@Test
public void testSimpleCondition() {
    /*
      file - csv - condition - sink1
                      |
                      |-------sink2
     */
    Set<Connection> connections = ImmutableSet.of(new Connection("file", "csv"), new Connection("csv", "condition"), new Connection("condition", "sink1"), new Connection("condition", "sink2"));
    Set<String> conditions = Collections.singleton("condition");
    Set<String> reduceNodes = new HashSet<>();
    Set<String> isolationNodes = new HashSet<>();
    Set<String> multiPortNodes = new HashSet<>();
    Set<Dag> actual = PipelinePlanner.split(connections, conditions, reduceNodes, isolationNodes, EMPTY_ACTIONS, multiPortNodes, EMPTY_CONNECTORS);
    Dag dag1 = new Dag(ImmutableSet.of(new Connection("file", "csv"), new Connection("csv", "condition")));
    Dag dag2 = new Dag(ImmutableSet.of(new Connection("condition", "sink1")));
    Dag dag3 = new Dag(ImmutableSet.of(new Connection("condition", "sink2")));
    Set<Dag> expected = ImmutableSet.of(dag1, dag2, dag3);
    Assert.assertEquals(actual, expected);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 69 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.

the class ConnectorDagTest method testSplitDag.

@Test
public void testSplitDag() {
    /*
             |--- n2(r) ----------|
             |                    |                                    |-- n10
        n1 --|--- n3(r) --- n5 ---|--- n6 --- n7(r) --- n8 --- n9(r) --|
             |                    |                                    |-- n11
             |--- n4(r) ----------|

        There should be a connector after n1, before n7, and before n9. This should result in subdags:

        n1 --> n1.out.connector

        n1.out.connector --> n2(r) --> n6 --> n7.connector

        n1.out.connector --> n3(r) --> n5 --> n6 --> n7.connector

        n1.out.connector --> n4(r) --> n6 --> n7.connector

        n7.connector --> n7 --> n8 --> n9.connector

                              |--> n10
        n9.connector --> n9 --|
                              |--> n11
     */
    ConnectorDag cdag = ConnectorDag.builder().addConnection("n1", "n2").addConnection("n1", "n3").addConnection("n1", "n4").addConnection("n2", "n6").addConnection("n3", "n5").addConnection("n4", "n6").addConnection("n5", "n6").addConnection("n6", "n7").addConnection("n7", "n8").addConnection("n8", "n9").addConnection("n9", "n10").addConnection("n9", "n11").addReduceNodes("n2", "n3", "n4", "n7", "n9").build();
    cdag.insertConnectors();
    Set<Dag> actual = new HashSet<>(cdag.split());
    Dag dag1 = new Dag(ImmutableSet.of(new Connection("n1", "n1.out.connector")));
    Dag dag2 = new Dag(ImmutableSet.of(new Connection("n1.out.connector", "n2"), new Connection("n2", "n6"), new Connection("n6", "n7.connector")));
    Dag dag3 = new Dag(ImmutableSet.of(new Connection("n1.out.connector", "n3"), new Connection("n3", "n5"), new Connection("n5", "n6"), new Connection("n6", "n7.connector")));
    Dag dag4 = new Dag(ImmutableSet.of(new Connection("n1.out.connector", "n4"), new Connection("n4", "n6"), new Connection("n6", "n7.connector")));
    Dag dag5 = new Dag(ImmutableSet.of(new Connection("n7.connector", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9.connector")));
    Dag dag6 = new Dag(ImmutableSet.of(new Connection("n9.connector", "n9"), new Connection("n9", "n10"), new Connection("n9", "n11")));
    Set<Dag> expected = ImmutableSet.of(dag1, dag2, dag3, dag4, dag5, dag6);
    Assert.assertEquals(expected, actual);
    /*
             |---> n2(r)
             |      |
        n1 --|      |
             |      v
             |---> n3(r) ---> n4

        n2 and n3 should have connectors inserted in front of them to become:

             |---> n2.connector ---> n2(r)
             |                        |
        n1 --|                        |
             |                        v
             |-------------------> n3.connector ---> n3(r) ---> n4
     */
    cdag = ConnectorDag.builder().addConnection("n1", "n2").addConnection("n1", "n3").addConnection("n2", "n3").addConnection("n3", "n4").addReduceNodes("n2", "n3").build();
    cdag.insertConnectors();
    actual = new HashSet<>(cdag.split());
    /*
             |--> n2.connector
        n1 --|
             |--> n3.connector
     */
    dag1 = new Dag(ImmutableSet.of(new Connection("n1", "n2.connector"), new Connection("n1", "n3.connector")));
    /*
        n2.connector --> n2 --> n3.connector
     */
    dag2 = new Dag(ImmutableSet.of(new Connection("n2.connector", "n2"), new Connection("n2", "n3.connector")));
    /*
        n3.connector --> n3 --> n4
     */
    dag3 = new Dag(ImmutableSet.of(new Connection("n3.connector", "n3"), new Connection("n3", "n4")));
    expected = ImmutableSet.of(dag1, dag2, dag3);
    Assert.assertEquals(expected, actual);
    /*
         n1 --> n2 --|
                     |--> n3(r) --> n4 --|
         n7 --> n8 --|                   |--> n5(r) --> n6
                                         |
         n9 -----------------------------|

        only n5 should have a connector inserted in front of it to become:

         n1 --> n2 --|
                     |--> n3(r) --> n4 --|
         n7 --> n8 --|                   |--> n5.connector --> n5(r) --> n6
                                         |
         n9 -----------------------------|
     */
    cdag = ConnectorDag.builder().addConnection("n1", "n2").addConnection("n2", "n3").addConnection("n3", "n4").addConnection("n4", "n5").addConnection("n5", "n6").addConnection("n7", "n8").addConnection("n8", "n3").addConnection("n9", "n5").addReduceNodes("n3", "n5").build();
    cdag.insertConnectors();
    actual = new HashSet<>(cdag.split());
    /*
         n1 --> n2 --|
                     |--> n3(r) --> n4 --|
         n7 --> n8 --|                   |--> n5.connector
     */
    dag1 = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "n3"), new Connection("n3", "n4"), new Connection("n4", "n5.connector"), new Connection("n7", "n8"), new Connection("n8", "n3")));
    /*
                                         |--> n5.connector
                                         |
         n9 -----------------------------|
     */
    dag2 = new Dag(ImmutableSet.of(new Connection("n9", "n5.connector")));
    /*
         n5.connector --> n5(r) --> n6
     */
    dag3 = new Dag(ImmutableSet.of(new Connection("n5.connector", "n5"), new Connection("n5", "n6")));
    expected = ImmutableSet.of(dag1, dag2, dag3);
    Assert.assertEquals(expected, actual);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 70 with Connection

use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.

the class DagTest method testIdentitySplitByControl.

@Test
public void testIdentitySplitByControl() {
    // |-- n0 --|
    // a0 --|        |-- n2
    // |-- n1 --|
    Dag dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("a0", "n1"), new Connection("n0", "n2"), new Connection("n1", "n2")));
    Set<Dag> actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0"));
    Set<Dag> expectedDags = new HashSet<>();
    expectedDags.add(dag);
    Assert.assertEquals(expectedDags, actual);
    // a0 -- n0 --|
    // |-- n2
    // n1 --|
    dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("n0", "n2"), new Connection("n1", "n2")));
    actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0"));
    expectedDags.clear();
    expectedDags.add(dag);
    Assert.assertEquals(expectedDags, actual);
    // a0 -- n0 -- a1
    dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("n0", "a1")));
    actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0", "a1"));
    expectedDags.clear();
    expectedDags.add(dag);
    Assert.assertEquals(expectedDags, actual);
    // n0 --|
    // |-- a0
    // |---|
    // n1
    // |---|
    // |-- n2
    // a1 --|
    dag = new Dag(ImmutableSet.of(new Connection("n0", "a0"), new Connection("n1", "a0"), new Connection("n1", "n2"), new Connection("a1", "n2")));
    actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0", "a1"));
    expectedDags.clear();
    expectedDags.add(dag);
    Assert.assertEquals(expectedDags, actual);
}
Also used : Connection(io.cdap.cdap.etl.proto.Connection) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Connection (io.cdap.cdap.etl.proto.Connection)96 Test (org.junit.Test)78 HashSet (java.util.HashSet)70 HashMap (java.util.HashMap)44 ArrayList (java.util.ArrayList)32 Operation (io.cdap.cdap.api.lineage.field.Operation)28 FieldOperation (io.cdap.cdap.etl.api.lineage.field.FieldOperation)28 List (java.util.List)28 ImmutableList (com.google.common.collect.ImmutableList)26 ReadOperation (io.cdap.cdap.api.lineage.field.ReadOperation)26 TransformOperation (io.cdap.cdap.api.lineage.field.TransformOperation)26 WriteOperation (io.cdap.cdap.api.lineage.field.WriteOperation)26 FieldReadOperation (io.cdap.cdap.etl.api.lineage.field.FieldReadOperation)26 FieldWriteOperation (io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation)26 FieldTransformOperation (io.cdap.cdap.etl.api.lineage.field.FieldTransformOperation)24 EndPoint (io.cdap.cdap.api.lineage.field.EndPoint)20 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)18 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)16 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)14 FieldLineageInfo (io.cdap.cdap.data2.metadata.lineage.field.FieldLineageInfo)8