use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.
the class LineageOperationProcessorTest method testSimpleJoinWithRenameJoinKeys.
@Test
public void testSimpleJoinWithRenameJoinKeys() {
Set<Connection> connections = new HashSet<>();
connections.add(new Connection("n1", "n3"));
connections.add(new Connection("n2", "n3"));
connections.add(new Connection("n3", "n4"));
EndPoint cEndPoint = EndPoint.of("default", "customer");
EndPoint pEndPoint = EndPoint.of("default", "purchase");
EndPoint cpEndPoint = EndPoint.of("default", "customer_purchase");
// customer -> (id, name)------------
// |
// JOIN ------->(id_from_customer, id_from_purchase, name, item)
// |
// purchase -> (customer_id, item)---
Map<String, List<FieldOperation>> stageOperations = new HashMap<>();
stageOperations.put("n1", Collections.singletonList(new FieldReadOperation("ReadCustomer", "read description", cEndPoint, "id", "name")));
stageOperations.put("n2", Collections.singletonList(new FieldReadOperation("ReadPurchase", "read description", pEndPoint, "customer_id", "item")));
List<FieldOperation> operationsFromJoin = new ArrayList<>();
operationsFromJoin.add(new FieldTransformOperation("Join", "Join Operation", Arrays.asList("n1.id", "n2.customer_id"), Arrays.asList("id", "customer_id")));
operationsFromJoin.add(new FieldTransformOperation("Rename id", "Rename id", Collections.singletonList("id"), "id_from_customer"));
operationsFromJoin.add(new FieldTransformOperation("Rename customer_id", "Rename customer_id", Collections.singletonList("customer_id"), "id_from_purchase"));
operationsFromJoin.add(new FieldTransformOperation("Identity name", "Identity Operation", Collections.singletonList("n1.name"), Collections.singletonList("name")));
operationsFromJoin.add(new FieldTransformOperation("Identity item", "Identity Operation", Collections.singletonList("n2.item"), Collections.singletonList("item")));
stageOperations.put("n3", operationsFromJoin);
stageOperations.put("n4", Collections.singletonList(new FieldWriteOperation("Write", "write description", cpEndPoint, "id_from_customer", "id_from_purchase", "name", "item")));
LineageOperationsProcessor processor = new LineageOperationsProcessor(connections, stageOperations, Collections.singleton("n3"));
Set<Operation> processedOperations = processor.process();
Set<Operation> expectedOperations = new HashSet<>();
expectedOperations.add(new ReadOperation("n1.ReadCustomer", "read description", cEndPoint, "id", "name"));
expectedOperations.add(new ReadOperation("n2.ReadPurchase", "read description", pEndPoint, "customer_id", "item"));
expectedOperations.add(new TransformOperation("n3.Join", "Join Operation", Arrays.asList(InputField.of("n1.ReadCustomer", "id"), InputField.of("n2.ReadPurchase", "customer_id")), "id", "customer_id"));
expectedOperations.add(new TransformOperation("n3.Rename id", "Rename id", Collections.singletonList(InputField.of("n3.Join", "id")), "id_from_customer"));
expectedOperations.add(new TransformOperation("n3.Rename customer_id", "Rename customer_id", Collections.singletonList(InputField.of("n3.Join", "customer_id")), "id_from_purchase"));
expectedOperations.add(new TransformOperation("n3.Identity name", "Identity Operation", Collections.singletonList(InputField.of("n1.ReadCustomer", "name")), "name"));
expectedOperations.add(new TransformOperation("n3.Identity item", "Identity Operation", Collections.singletonList(InputField.of("n2.ReadPurchase", "item")), "item"));
expectedOperations.add(new WriteOperation("n4.Write", "write description", cpEndPoint, Arrays.asList(InputField.of("n3.Rename id", "id_from_customer"), InputField.of("n3.Rename customer_id", "id_from_purchase"), InputField.of("n3.Identity name", "name"), InputField.of("n3.Identity item", "item"))));
Assert.assertEquals(expectedOperations, processedOperations);
}
use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.
the class ConnectorDagTest method testMultipleNonNestedConditions.
@Test
public void testMultipleNonNestedConditions() {
/*
n1-c1-n2-n3-c2-n4
*/
Set<Connection> connections = ImmutableSet.of(new Connection("n1", "c1"), new Connection("c1", "n2"), new Connection("n2", "n3"), new Connection("n3", "c2"), new Connection("c2", "n4"));
Set<String> conditions = new HashSet<>(Arrays.asList("c1", "c2"));
Set<String> reduceNodes = new HashSet<>();
Set<String> isolationNodes = new HashSet<>();
Set<String> multiPortNodes = new HashSet<>();
Set<Dag> actual = PipelinePlanner.split(connections, conditions, reduceNodes, isolationNodes, EMPTY_ACTIONS, multiPortNodes, EMPTY_CONNECTORS);
Dag dag1 = new Dag(ImmutableSet.of(new Connection("n1", "c1")));
Dag dag2 = new Dag(ImmutableSet.of(new Connection("c1", "n2"), new Connection("n2", "n3"), new Connection("n3", "c2")));
Dag dag3 = new Dag(ImmutableSet.of(new Connection("c2", "n4")));
Set<Dag> expected = ImmutableSet.of(dag1, dag2, dag3);
Assert.assertEquals(actual, expected);
}
use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.
the class ConnectorDagTest method testSimpleCondition.
@Test
public void testSimpleCondition() {
/*
file - csv - condition - sink1
|
|-------sink2
*/
Set<Connection> connections = ImmutableSet.of(new Connection("file", "csv"), new Connection("csv", "condition"), new Connection("condition", "sink1"), new Connection("condition", "sink2"));
Set<String> conditions = Collections.singleton("condition");
Set<String> reduceNodes = new HashSet<>();
Set<String> isolationNodes = new HashSet<>();
Set<String> multiPortNodes = new HashSet<>();
Set<Dag> actual = PipelinePlanner.split(connections, conditions, reduceNodes, isolationNodes, EMPTY_ACTIONS, multiPortNodes, EMPTY_CONNECTORS);
Dag dag1 = new Dag(ImmutableSet.of(new Connection("file", "csv"), new Connection("csv", "condition")));
Dag dag2 = new Dag(ImmutableSet.of(new Connection("condition", "sink1")));
Dag dag3 = new Dag(ImmutableSet.of(new Connection("condition", "sink2")));
Set<Dag> expected = ImmutableSet.of(dag1, dag2, dag3);
Assert.assertEquals(actual, expected);
}
use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.
the class ConnectorDagTest method testSplitDag.
@Test
public void testSplitDag() {
/*
|--- n2(r) ----------|
| | |-- n10
n1 --|--- n3(r) --- n5 ---|--- n6 --- n7(r) --- n8 --- n9(r) --|
| | |-- n11
|--- n4(r) ----------|
There should be a connector after n1, before n7, and before n9. This should result in subdags:
n1 --> n1.out.connector
n1.out.connector --> n2(r) --> n6 --> n7.connector
n1.out.connector --> n3(r) --> n5 --> n6 --> n7.connector
n1.out.connector --> n4(r) --> n6 --> n7.connector
n7.connector --> n7 --> n8 --> n9.connector
|--> n10
n9.connector --> n9 --|
|--> n11
*/
ConnectorDag cdag = ConnectorDag.builder().addConnection("n1", "n2").addConnection("n1", "n3").addConnection("n1", "n4").addConnection("n2", "n6").addConnection("n3", "n5").addConnection("n4", "n6").addConnection("n5", "n6").addConnection("n6", "n7").addConnection("n7", "n8").addConnection("n8", "n9").addConnection("n9", "n10").addConnection("n9", "n11").addReduceNodes("n2", "n3", "n4", "n7", "n9").build();
cdag.insertConnectors();
Set<Dag> actual = new HashSet<>(cdag.split());
Dag dag1 = new Dag(ImmutableSet.of(new Connection("n1", "n1.out.connector")));
Dag dag2 = new Dag(ImmutableSet.of(new Connection("n1.out.connector", "n2"), new Connection("n2", "n6"), new Connection("n6", "n7.connector")));
Dag dag3 = new Dag(ImmutableSet.of(new Connection("n1.out.connector", "n3"), new Connection("n3", "n5"), new Connection("n5", "n6"), new Connection("n6", "n7.connector")));
Dag dag4 = new Dag(ImmutableSet.of(new Connection("n1.out.connector", "n4"), new Connection("n4", "n6"), new Connection("n6", "n7.connector")));
Dag dag5 = new Dag(ImmutableSet.of(new Connection("n7.connector", "n7"), new Connection("n7", "n8"), new Connection("n8", "n9.connector")));
Dag dag6 = new Dag(ImmutableSet.of(new Connection("n9.connector", "n9"), new Connection("n9", "n10"), new Connection("n9", "n11")));
Set<Dag> expected = ImmutableSet.of(dag1, dag2, dag3, dag4, dag5, dag6);
Assert.assertEquals(expected, actual);
/*
|---> n2(r)
| |
n1 --| |
| v
|---> n3(r) ---> n4
n2 and n3 should have connectors inserted in front of them to become:
|---> n2.connector ---> n2(r)
| |
n1 --| |
| v
|-------------------> n3.connector ---> n3(r) ---> n4
*/
cdag = ConnectorDag.builder().addConnection("n1", "n2").addConnection("n1", "n3").addConnection("n2", "n3").addConnection("n3", "n4").addReduceNodes("n2", "n3").build();
cdag.insertConnectors();
actual = new HashSet<>(cdag.split());
/*
|--> n2.connector
n1 --|
|--> n3.connector
*/
dag1 = new Dag(ImmutableSet.of(new Connection("n1", "n2.connector"), new Connection("n1", "n3.connector")));
/*
n2.connector --> n2 --> n3.connector
*/
dag2 = new Dag(ImmutableSet.of(new Connection("n2.connector", "n2"), new Connection("n2", "n3.connector")));
/*
n3.connector --> n3 --> n4
*/
dag3 = new Dag(ImmutableSet.of(new Connection("n3.connector", "n3"), new Connection("n3", "n4")));
expected = ImmutableSet.of(dag1, dag2, dag3);
Assert.assertEquals(expected, actual);
/*
n1 --> n2 --|
|--> n3(r) --> n4 --|
n7 --> n8 --| |--> n5(r) --> n6
|
n9 -----------------------------|
only n5 should have a connector inserted in front of it to become:
n1 --> n2 --|
|--> n3(r) --> n4 --|
n7 --> n8 --| |--> n5.connector --> n5(r) --> n6
|
n9 -----------------------------|
*/
cdag = ConnectorDag.builder().addConnection("n1", "n2").addConnection("n2", "n3").addConnection("n3", "n4").addConnection("n4", "n5").addConnection("n5", "n6").addConnection("n7", "n8").addConnection("n8", "n3").addConnection("n9", "n5").addReduceNodes("n3", "n5").build();
cdag.insertConnectors();
actual = new HashSet<>(cdag.split());
/*
n1 --> n2 --|
|--> n3(r) --> n4 --|
n7 --> n8 --| |--> n5.connector
*/
dag1 = new Dag(ImmutableSet.of(new Connection("n1", "n2"), new Connection("n2", "n3"), new Connection("n3", "n4"), new Connection("n4", "n5.connector"), new Connection("n7", "n8"), new Connection("n8", "n3")));
/*
|--> n5.connector
|
n9 -----------------------------|
*/
dag2 = new Dag(ImmutableSet.of(new Connection("n9", "n5.connector")));
/*
n5.connector --> n5(r) --> n6
*/
dag3 = new Dag(ImmutableSet.of(new Connection("n5.connector", "n5"), new Connection("n5", "n6")));
expected = ImmutableSet.of(dag1, dag2, dag3);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.Connection in project cdap by caskdata.
the class DagTest method testIdentitySplitByControl.
@Test
public void testIdentitySplitByControl() {
// |-- n0 --|
// a0 --| |-- n2
// |-- n1 --|
Dag dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("a0", "n1"), new Connection("n0", "n2"), new Connection("n1", "n2")));
Set<Dag> actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0"));
Set<Dag> expectedDags = new HashSet<>();
expectedDags.add(dag);
Assert.assertEquals(expectedDags, actual);
// a0 -- n0 --|
// |-- n2
// n1 --|
dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("n0", "n2"), new Connection("n1", "n2")));
actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0"));
expectedDags.clear();
expectedDags.add(dag);
Assert.assertEquals(expectedDags, actual);
// a0 -- n0 -- a1
dag = new Dag(ImmutableSet.of(new Connection("a0", "n0"), new Connection("n0", "a1")));
actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0", "a1"));
expectedDags.clear();
expectedDags.add(dag);
Assert.assertEquals(expectedDags, actual);
// n0 --|
// |-- a0
// |---|
// n1
// |---|
// |-- n2
// a1 --|
dag = new Dag(ImmutableSet.of(new Connection("n0", "a0"), new Connection("n1", "a0"), new Connection("n1", "n2"), new Connection("a1", "n2")));
actual = dag.splitByControlNodes(ImmutableSet.<String>of(), ImmutableSet.of("a0", "a1"));
expectedDags.clear();
expectedDags.add(dag);
Assert.assertEquals(expectedDags, actual);
}
Aggregations