use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class MapTaskToNetworkFunctionTest method testFlatten.
@Test
public void testFlatten() {
// ReadA --\
// |--> Flatten
// ReadB --/
InstructionOutput readOutputA = createInstructionOutput("ReadA.out");
ParallelInstruction readA = createParallelInstruction("ReadA", readOutputA);
readA.setRead(new ReadInstruction());
InstructionOutput readOutputB = createInstructionOutput("ReadB.out");
ParallelInstruction readB = createParallelInstruction("ReadB", readOutputB);
readB.setRead(new ReadInstruction());
FlattenInstruction flattenInstruction = new FlattenInstruction();
flattenInstruction.setInputs(ImmutableList.of(// ReadA.out
createInstructionInput(0, 0), // ReadB.out
createInstructionInput(1, 0)));
InstructionOutput flattenOutput = createInstructionOutput("Flatten.out");
ParallelInstruction flatten = createParallelInstruction("Flatten", flattenOutput);
flatten.setFlatten(flattenInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(readA, readB, flatten));
mapTask.setFactory(Transport.getJsonFactory());
Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
assertNetworkProperties(network);
assertEquals(6, network.nodes().size());
assertEquals(5, network.edges().size());
ParallelInstructionNode readANode = get(network, readA);
InstructionOutputNode readOutputANode = getOnlySuccessor(network, readANode);
assertEquals(readOutputA, readOutputANode.getInstructionOutput());
ParallelInstructionNode readBNode = get(network, readB);
InstructionOutputNode readOutputBNode = getOnlySuccessor(network, readBNode);
assertEquals(readOutputB, readOutputBNode.getInstructionOutput());
// Make sure the successors for both ReadA and ReadB output PCollections are the same
assertEquals(network.successors(readOutputANode), network.successors(readOutputBNode));
ParallelInstructionNode flattenNode = getOnlySuccessor(network, readOutputANode);
InstructionOutputNode flattenOutputNode = getOnlySuccessor(network, flattenNode);
assertEquals(flattenOutput, flattenOutputNode.getInstructionOutput());
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class MapTaskToNetworkFunctionTest method testPartialGroupByKey.
@Test
public void testPartialGroupByKey() {
// Read --> PGBK --> Write
InstructionOutput readOutput = createInstructionOutput("Read.out");
ParallelInstruction read = createParallelInstruction("Read", readOutput);
read.setRead(new ReadInstruction());
PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
// Read.out
pgbkInstruction.setInput(createInstructionInput(0, 0));
InstructionOutput pgbkOutput = createInstructionOutput("PGBK.out");
ParallelInstruction pgbk = createParallelInstruction("PGBK", pgbkOutput);
pgbk.setPartialGroupByKey(pgbkInstruction);
WriteInstruction writeInstruction = new WriteInstruction();
// PGBK.out
writeInstruction.setInput(createInstructionInput(1, 0));
ParallelInstruction write = createParallelInstruction("Write");
write.setWrite(writeInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(read, pgbk, write));
mapTask.setFactory(Transport.getJsonFactory());
Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
assertNetworkProperties(network);
assertEquals(5, network.nodes().size());
assertEquals(4, network.edges().size());
ParallelInstructionNode readNode = get(network, read);
InstructionOutputNode readOutputNode = getOnlySuccessor(network, readNode);
assertEquals(readOutput, readOutputNode.getInstructionOutput());
ParallelInstructionNode pgbkNode = getOnlySuccessor(network, readOutputNode);
InstructionOutputNode pgbkOutputNode = getOnlySuccessor(network, pgbkNode);
assertEquals(pgbkOutput, pgbkOutputNode.getInstructionOutput());
getOnlySuccessor(network, pgbkOutputNode);
assertNotNull(write);
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class MapTaskToNetworkFunctionTest method testWrite.
@Test
public void testWrite() {
InstructionOutput readOutput = createInstructionOutput("Read.out");
ParallelInstruction read = createParallelInstruction("Read", readOutput);
read.setRead(new ReadInstruction());
WriteInstruction writeInstruction = new WriteInstruction();
// Read.out
writeInstruction.setInput(createInstructionInput(0, 0));
ParallelInstruction write = createParallelInstruction("Write");
write.setWrite(writeInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(read, write));
mapTask.setFactory(Transport.getJsonFactory());
Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
assertNetworkProperties(network);
assertEquals(3, network.nodes().size());
assertEquals(2, network.edges().size());
ParallelInstructionNode readNode = get(network, read);
InstructionOutputNode readOutputNode = getOnlySuccessor(network, readNode);
assertEquals(readOutput, readOutputNode.getInstructionOutput());
ParallelInstructionNode writeNode = getOnlySuccessor(network, readOutputNode);
assertNotNull(writeNode);
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class CreateRegisterFnOperationFunctionTest method testRunnerAndSdkToRunnerAndSdkGraph.
@Test
public void testRunnerAndSdkToRunnerAndSdkGraph() {
// RunnerSource --\ /--> RunnerParDo
// out
// CustomSource --/ \--> SdkParDo
//
// Should produce:
// PortB --> out --\
// RunnerSource --> out --> RunnerParDo
// \--> PortA
// PortA --> out --\
// CustomSource --> out --> SdkParDo
// \--> PortB
Node firstSdkPortion = TestNode.create("FirstSdkPortion");
Node secondSdkPortion = TestNode.create("SecondSdkPortion");
@SuppressWarnings({ "unchecked", "rawtypes" }) ArgumentCaptor<MutableNetwork<Node, Edge>> networkCapture = ArgumentCaptor.forClass((Class) MutableNetwork.class);
when(registerFnOperationFunction.apply(networkCapture.capture())).thenReturn(firstSdkPortion, secondSdkPortion);
Node firstPort = TestNode.create("FirstPort");
Node secondPort = TestNode.create("SecondPort");
when(portSupplier.get()).thenReturn(firstPort, secondPort);
Node runnerReadNode = createReadNode("RunnerRead", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge runnerReadNodeEdge = DefaultEdge.create();
Node sdkReadNode = createReadNode("SdkRead", Nodes.ExecutionLocation.SDK_HARNESS);
Edge sdkReadNodeEdge = DefaultEdge.create();
Node readNodeOut = createInstructionOutputNode("Read.out");
Edge readNodeOutToRunnerEdge = DefaultEdge.create();
Edge readNodeOutToSdkEdge = DefaultEdge.create();
Node runnerParDoNode = createParDoNode("RunnerParDo", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge runnerParDoNodeEdge = DefaultEdge.create();
Node runnerParDoNodeOut = createInstructionOutputNode("RunnerParDo.out");
Node sdkParDoNode = createParDoNode("SdkParDo", Nodes.ExecutionLocation.SDK_HARNESS);
Edge sdkParDoNodeEdge = DefaultEdge.create();
Node sdkParDoNodeOut = createInstructionOutputNode("SdkParDo.out");
// Read -out-> RunnerParDo -out-> SdkParDo
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(sdkReadNode);
network.addNode(runnerReadNode);
network.addNode(readNodeOut);
network.addNode(runnerParDoNode);
network.addNode(runnerParDoNodeOut);
network.addNode(sdkParDoNodeOut);
network.addNode(sdkParDoNodeOut);
network.addEdge(sdkReadNode, readNodeOut, sdkReadNodeEdge);
network.addEdge(runnerReadNode, readNodeOut, runnerReadNodeEdge);
network.addEdge(readNodeOut, runnerParDoNode, readNodeOutToRunnerEdge);
network.addEdge(readNodeOut, sdkParDoNode, readNodeOutToSdkEdge);
network.addEdge(runnerParDoNode, runnerParDoNodeOut, runnerParDoNodeEdge);
network.addEdge(sdkParDoNode, sdkParDoNodeOut, sdkParDoNodeEdge);
MutableNetwork<Node, Edge> appliedNetwork = createRegisterFnOperation.apply(Graphs.copyOf(network));
assertNetworkMaintainsBipartiteStructure(appliedNetwork);
// Node wiring is indeterministic, must be detected from generated graph.
Node sdkPortionA;
Node sdkPortionB;
if (appliedNetwork.inDegree(firstSdkPortion) == 0) {
sdkPortionA = firstSdkPortion;
sdkPortionB = secondSdkPortion;
} else {
sdkPortionA = secondSdkPortion;
sdkPortionB = firstSdkPortion;
}
Node portA = Iterables.getOnlyElement(appliedNetwork.successors(sdkPortionA));
Node portB = Iterables.getOnlyElement(appliedNetwork.predecessors(sdkPortionB));
// On each rewire between runner and SDK, we use a new output node
Node newOutA = Iterables.getOnlyElement(appliedNetwork.successors(portA));
Node newOutB = Iterables.getOnlyElement(appliedNetwork.predecessors(portB));
// sdkPortionA -> portA -newOutA-> runnerParDoNode -> runnerParDoNodeOut
// runnerReadNode -newOutB-/
// \--> portB -> sdkPortionB
assertThat(appliedNetwork.nodes(), containsInAnyOrder(runnerReadNode, firstSdkPortion, secondSdkPortion, portA, newOutA, portB, newOutB, runnerParDoNode, runnerParDoNodeOut));
assertThat(appliedNetwork.successors(runnerReadNode), containsInAnyOrder(newOutB));
assertThat(appliedNetwork.successors(newOutB), containsInAnyOrder(runnerParDoNode, portB));
assertThat(appliedNetwork.successors(portB), containsInAnyOrder(sdkPortionB));
assertThat(appliedNetwork.successors(sdkPortionA), containsInAnyOrder(portA));
assertThat(appliedNetwork.successors(portA), containsInAnyOrder(newOutA));
assertThat(appliedNetwork.successors(newOutA), containsInAnyOrder(runnerParDoNode));
assertThat(appliedNetwork.successors(runnerParDoNode), containsInAnyOrder(runnerParDoNodeOut));
assertThat(appliedNetwork.edgesConnecting(sdkPortionA, portA), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
assertThat(appliedNetwork.edgesConnecting(portB, sdkPortionB), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
// Argument captor call order can be indeterministic
List<MutableNetwork<Node, Edge>> sdkSubnetworks = networkCapture.getAllValues();
MutableNetwork<Node, Edge> sdkSubnetworkA;
MutableNetwork<Node, Edge> sdkSubnetworkB;
if (sdkSubnetworks.get(0).nodes().contains(sdkReadNode)) {
sdkSubnetworkA = sdkSubnetworks.get(0);
sdkSubnetworkB = sdkSubnetworks.get(1);
} else {
sdkSubnetworkA = sdkSubnetworks.get(1);
sdkSubnetworkB = sdkSubnetworks.get(0);
}
assertNetworkMaintainsBipartiteStructure(sdkSubnetworkA);
assertNetworkMaintainsBipartiteStructure(sdkSubnetworkB);
// /-> portA
// sdkReadNode -sdkNewOutA-> sdkParDoNode -> sdkParDoNodeOut
Node sdkNewOutA = Iterables.getOnlyElement(sdkSubnetworkA.predecessors(portA));
assertThat(sdkSubnetworkA.nodes(), containsInAnyOrder(sdkReadNode, portA, sdkNewOutA, sdkParDoNode, sdkParDoNodeOut));
assertThat(sdkSubnetworkA.successors(sdkReadNode), containsInAnyOrder(sdkNewOutA));
assertThat(sdkSubnetworkA.successors(sdkNewOutA), containsInAnyOrder(portA, sdkParDoNode));
assertThat(sdkSubnetworkA.successors(sdkParDoNode), containsInAnyOrder(sdkParDoNodeOut));
// portB -sdkNewOutB-> sdkParDoNode -> sdkParDoNodeOut
Node sdkNewOutB = Iterables.getOnlyElement(sdkSubnetworkB.successors(portB));
assertThat(sdkSubnetworkB.nodes(), containsInAnyOrder(portB, sdkNewOutB, sdkParDoNode, sdkParDoNodeOut));
assertThat(sdkSubnetworkB.successors(portB), containsInAnyOrder(sdkNewOutB));
assertThat(sdkSubnetworkB.successors(sdkNewOutB), containsInAnyOrder(sdkParDoNode));
assertThat(sdkSubnetworkB.successors(sdkParDoNode), containsInAnyOrder(sdkParDoNodeOut));
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class CreateRegisterFnOperationFunctionTest method testRunnerToSdkToRunnerGraph.
@Test
public void testRunnerToSdkToRunnerGraph() {
Node sdkPortion = TestNode.create("SdkPortion");
@SuppressWarnings({ "unchecked", "rawtypes" }) ArgumentCaptor<MutableNetwork<Node, Edge>> networkCapture = ArgumentCaptor.forClass((Class) MutableNetwork.class);
when(registerFnOperationFunction.apply(networkCapture.capture())).thenReturn(sdkPortion);
Node firstPort = TestNode.create("FirstPort");
Node secondPort = TestNode.create("SecondPort");
when(portSupplier.get()).thenReturn(firstPort, secondPort);
Node readNode = createReadNode("Read", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge readNodeEdge = DefaultEdge.create();
Node readNodeOut = createInstructionOutputNode("Read.out");
Edge readNodeOutEdge = DefaultEdge.create();
Node sdkParDoNode = createParDoNode("SdkParDo", Nodes.ExecutionLocation.SDK_HARNESS);
Edge sdkParDoNodeEdge = DefaultEdge.create();
Node sdkParDoNodeOut = createInstructionOutputNode("SdkParDo.out");
Edge sdkParDoNodeOutEdge = DefaultEdge.create();
Node runnerParDoNode = createParDoNode("RunnerParDo", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge runnerParDoNodeEdge = DefaultEdge.create();
Node runnerParDoNodeOut = createInstructionOutputNode("RunnerParDo.out");
// Read -out-> SdkParDo -out-> RunnerParDo
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(readNode);
network.addNode(readNodeOut);
network.addNode(sdkParDoNodeOut);
network.addNode(sdkParDoNodeOut);
network.addNode(runnerParDoNode);
network.addNode(runnerParDoNodeOut);
network.addEdge(readNode, readNodeOut, readNodeEdge);
network.addEdge(readNodeOut, sdkParDoNode, readNodeOutEdge);
network.addEdge(sdkParDoNode, sdkParDoNodeOut, sdkParDoNodeEdge);
network.addEdge(sdkParDoNodeOut, runnerParDoNode, sdkParDoNodeOutEdge);
network.addEdge(runnerParDoNode, runnerParDoNodeOut, runnerParDoNodeEdge);
MutableNetwork<Node, Edge> appliedNetwork = createRegisterFnOperation.apply(Graphs.copyOf(network));
assertNetworkMaintainsBipartiteStructure(appliedNetwork);
// On each rewire between runner and SDK and vice versa, we use a new output node
Node newOutA = Iterables.getOnlyElement(appliedNetwork.predecessors(firstPort));
Node newOutB = Iterables.getOnlyElement(appliedNetwork.successors(secondPort));
// readNode -newOutA-> firstPort --> sdkPortion --> secondPort -newOutB-> runnerParDoNode
assertThat(appliedNetwork.nodes(), containsInAnyOrder(readNode, newOutA, firstPort, sdkPortion, secondPort, newOutB, runnerParDoNode, runnerParDoNodeOut));
assertThat(appliedNetwork.successors(readNode), containsInAnyOrder(newOutA));
assertThat(appliedNetwork.successors(newOutA), containsInAnyOrder(firstPort));
assertThat(appliedNetwork.successors(firstPort), containsInAnyOrder(sdkPortion));
assertThat(appliedNetwork.successors(sdkPortion), containsInAnyOrder(secondPort));
assertThat(appliedNetwork.successors(secondPort), containsInAnyOrder(newOutB));
assertThat(appliedNetwork.successors(newOutB), containsInAnyOrder(runnerParDoNode));
assertThat(appliedNetwork.successors(runnerParDoNode), containsInAnyOrder(runnerParDoNodeOut));
assertThat(appliedNetwork.edgesConnecting(firstPort, sdkPortion), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
assertThat(appliedNetwork.edgesConnecting(sdkPortion, secondPort), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
MutableNetwork<Node, Edge> sdkSubnetwork = networkCapture.getValue();
assertNetworkMaintainsBipartiteStructure(sdkSubnetwork);
Node sdkNewOutA = Iterables.getOnlyElement(sdkSubnetwork.successors(firstPort));
Node sdkNewOutB = Iterables.getOnlyElement(sdkSubnetwork.predecessors(secondPort));
// firstPort -sdkNewOutA-> sdkParDoNode -sdkNewOutB-> secondPort
assertThat(sdkSubnetwork.nodes(), containsInAnyOrder(firstPort, sdkNewOutA, sdkParDoNode, sdkNewOutB, secondPort));
assertThat(sdkSubnetwork.successors(firstPort), containsInAnyOrder(sdkNewOutA));
assertThat(sdkSubnetwork.successors(sdkNewOutA), containsInAnyOrder(sdkParDoNode));
assertThat(sdkSubnetwork.successors(sdkParDoNode), containsInAnyOrder(sdkNewOutB));
assertThat(sdkSubnetwork.successors(sdkNewOutB), containsInAnyOrder(secondPort));
}
Aggregations