use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class CreateRegisterFnOperationFunctionTest method testSdkToRunnerToSdkGraph.
@Test
public void testSdkToRunnerToSdkGraph() {
Node firstSdkPortion = TestNode.create("FirstSdkPortion");
Node secondSdkPortion = TestNode.create("SecondSdkPortion");
@SuppressWarnings({ "unchecked", "rawtypes" }) ArgumentCaptor<MutableNetwork<Node, Edge>> networkCapture = ArgumentCaptor.forClass((Class) MutableNetwork.class);
when(registerFnOperationFunction.apply(networkCapture.capture())).thenReturn(firstSdkPortion, secondSdkPortion);
Node firstPort = TestNode.create("FirstPort");
Node secondPort = TestNode.create("SecondPort");
when(portSupplier.get()).thenReturn(firstPort, secondPort);
Node readNode = createReadNode("Read", Nodes.ExecutionLocation.SDK_HARNESS);
Edge readNodeEdge = DefaultEdge.create();
Node readNodeOut = createInstructionOutputNode("Read.out");
Edge readNodeOutEdge = DefaultEdge.create();
Node runnerParDoNode = createParDoNode("RunnerParDo", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge runnerParDoNodeEdge = DefaultEdge.create();
Node runnerParDoNodeOut = createInstructionOutputNode("RunnerParDo.out");
Edge runnerParDoNodeOutEdge = DefaultEdge.create();
Node sdkParDoNode = createParDoNode("SdkParDo", Nodes.ExecutionLocation.SDK_HARNESS);
Edge sdkParDoNodeEdge = DefaultEdge.create();
Node sdkParDoNodeOut = createInstructionOutputNode("SdkParDo.out");
// Read -out-> RunnerParDo -out-> SdkParDo
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(readNode);
network.addNode(readNodeOut);
network.addNode(runnerParDoNode);
network.addNode(runnerParDoNodeOut);
network.addNode(sdkParDoNodeOut);
network.addNode(sdkParDoNodeOut);
network.addEdge(readNode, readNodeOut, readNodeEdge);
network.addEdge(readNodeOut, runnerParDoNode, readNodeOutEdge);
network.addEdge(runnerParDoNode, runnerParDoNodeOut, runnerParDoNodeEdge);
network.addEdge(runnerParDoNodeOut, sdkParDoNode, runnerParDoNodeOutEdge);
network.addEdge(sdkParDoNode, sdkParDoNodeOut, sdkParDoNodeEdge);
MutableNetwork<Node, Edge> appliedNetwork = createRegisterFnOperation.apply(Graphs.copyOf(network));
assertNetworkMaintainsBipartiteStructure(appliedNetwork);
// On each rewire between runner and SDK, we use a new output node
Node newOutA = Iterables.getOnlyElement(appliedNetwork.successors(firstPort));
Node newOutB = Iterables.getOnlyElement(appliedNetwork.predecessors(secondPort));
// firstSdkPortion -> firstPort -newOutA-> RunnerParDo -newOutB-> secondPort -> secondSdkPortion
assertThat(appliedNetwork.nodes(), containsInAnyOrder(firstSdkPortion, firstPort, newOutA, runnerParDoNode, newOutB, secondPort, secondSdkPortion));
assertThat(appliedNetwork.successors(firstSdkPortion), containsInAnyOrder(firstPort));
assertThat(appliedNetwork.successors(firstPort), containsInAnyOrder(newOutA));
assertThat(appliedNetwork.successors(newOutA), containsInAnyOrder(runnerParDoNode));
assertThat(appliedNetwork.successors(runnerParDoNode), containsInAnyOrder(newOutB));
assertThat(appliedNetwork.successors(newOutB), containsInAnyOrder(secondPort));
assertThat(appliedNetwork.successors(secondPort), containsInAnyOrder(secondSdkPortion));
assertThat(appliedNetwork.edgesConnecting(firstSdkPortion, firstPort), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
assertThat(appliedNetwork.edgesConnecting(secondPort, secondSdkPortion), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
// The order of the calls to create the SDK subnetworks is indeterminate
List<MutableNetwork<Node, Edge>> sdkSubnetworks = networkCapture.getAllValues();
MutableNetwork<Node, Edge> firstSdkSubnetwork;
MutableNetwork<Node, Edge> secondSdkSubnetwork;
if (sdkSubnetworks.get(0).nodes().contains(readNode)) {
firstSdkSubnetwork = sdkSubnetworks.get(0);
secondSdkSubnetwork = sdkSubnetworks.get(1);
} else {
firstSdkSubnetwork = sdkSubnetworks.get(1);
secondSdkSubnetwork = sdkSubnetworks.get(0);
}
assertNetworkMaintainsBipartiteStructure(firstSdkSubnetwork);
assertNetworkMaintainsBipartiteStructure(secondSdkSubnetwork);
Node sdkNewOutA = Iterables.getOnlyElement(firstSdkSubnetwork.predecessors(firstPort));
// readNode -sdkNewOutA-> firstPort
assertThat(firstSdkSubnetwork.nodes(), containsInAnyOrder(readNode, sdkNewOutA, firstPort));
assertThat(firstSdkSubnetwork.successors(readNode), containsInAnyOrder(sdkNewOutA));
assertThat(firstSdkSubnetwork.successors(sdkNewOutA), containsInAnyOrder(firstPort));
Node sdkNewOutB = Iterables.getOnlyElement(secondSdkSubnetwork.successors(secondPort));
// secondPort -sdkNewOutB-> sdkParDoNode -> sdkParDoNodeOut
assertThat(secondSdkSubnetwork.nodes(), containsInAnyOrder(secondPort, sdkNewOutB, sdkParDoNode, sdkParDoNodeOut));
assertThat(secondSdkSubnetwork.successors(secondPort), containsInAnyOrder(sdkNewOutB));
assertThat(secondSdkSubnetwork.successors(sdkNewOutB), containsInAnyOrder(sdkParDoNode));
assertThat(secondSdkSubnetwork.successors(sdkParDoNode), containsInAnyOrder(sdkParDoNodeOut));
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class RemoveFlattenInstructionsFunctionTest method assertThatFlattenIsProperlyRemoved.
private void assertThatFlattenIsProperlyRemoved(MutableNetwork<Node, Edge> network) {
Network<Node, Edge> originalNetwork = ImmutableNetwork.copyOf(network);
network = new RemoveFlattenInstructionsFunction().apply(network);
// Check that Flatten has been removed.
for (Node node : network.nodes()) {
assertFalse(isFlatten(node));
}
// Enumerate all the original paths removing Flatten and its PCollection manually.
List<List<Node>> originalNetworkPathsWithoutFlatten = Networks.allPathsFromRootsToLeaves(originalNetwork);
for (List<Node> path : originalNetworkPathsWithoutFlatten) {
Iterator<Node> nodeIterator = path.iterator();
while (nodeIterator.hasNext()) {
Node node = nodeIterator.next();
// Remove the flatten node and its PCollection
if (isFlatten(node)) {
nodeIterator.remove();
nodeIterator.next();
nodeIterator.remove();
}
}
}
// Check that all paths that used to exist still exist (minus the Flatten and its PCollection).
assertThat(originalNetworkPathsWithoutFlatten, containsInAnyOrder(Networks.allPathsFromRootsToLeaves(network).toArray()));
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class MapTaskToNetworkFunctionTest method assertNetworkProperties.
private static void assertNetworkProperties(Network<Node, Edge> network) {
assertTrue(network.isDirected());
assertFalse(network.allowsSelfLoops());
for (Node node : network.nodes()) {
if (node instanceof ParallelInstructionNode) {
assertNull(((ParallelInstructionNode) node).getParallelInstruction().getOutputs());
// except for ParDoInstructions
for (Node successor : network.successors(node)) {
assertThat(successor, instanceOf(InstructionOutputNode.class));
// Assert that all outgoing edges for a ParDo are MultiOutputInfoEdges
if (((ParallelInstructionNode) node).getParallelInstruction().getParDo() != null) {
for (Edge edge : network.edgesConnecting(node, successor)) {
assertThat(edge, instanceOf(MultiOutputInfoEdge.class));
}
} else {
for (Edge edge : network.edgesConnecting(node, successor)) {
assertThat(edge, instanceOf(DefaultEdge.class));
}
}
}
} else if (node instanceof InstructionOutputNode) {
assertThat(network.inDegree(node), greaterThanOrEqualTo(1));
// Validate that all successors are instructions with DefaultEdge outgoing edges
for (Node successor : network.successors(node)) {
assertThat(successor, instanceOf(ParallelInstructionNode.class));
for (Edge edge : network.edgesConnecting(node, successor)) {
assertThat(edge, instanceOf(DefaultEdge.class));
}
}
}
}
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class ReplacePgbkWithPrecombineFunctionTest method testPrecombinePgbkIsReplaced.
@Test
public void testPrecombinePgbkIsReplaced() throws Exception {
// Network:
// out1 --> precombine_pgbk --> out2
Map<String, Object> valueCombiningFn = new HashMap<>();
Node out1 = createInstructionOutputNode("out1");
String pgbkName = "precombine_pgbk";
Node precombinePgbk = createPrecombinePgbkNode(pgbkName, valueCombiningFn);
Node out2 = createInstructionOutputNode("out2");
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(out1);
network.addNode(precombinePgbk);
network.addNode(out2);
network.addEdge(out1, precombinePgbk, DefaultEdge.create());
network.addEdge(precombinePgbk, out2, DefaultEdge.create());
Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
network = new ReplacePgbkWithPrecombineFunction().apply(network);
// Assert that network has same structure (same number of nodes and paths).
assertEquals(inputNetwork.nodes().size(), network.nodes().size());
assertEquals(inputNetwork.edges().size(), network.edges().size());
List<List<Node>> oldPaths = Networks.allPathsFromRootsToLeaves(inputNetwork);
List<List<Node>> newPaths = Networks.allPathsFromRootsToLeaves(network);
assertEquals(oldPaths.size(), newPaths.size());
// Assert that the pgbk node has been replaced.
for (Node node : network.nodes()) {
if (node instanceof ParallelInstructionNode) {
ParallelInstructionNode createdCombineNode = (ParallelInstructionNode) node;
ParallelInstruction parallelInstruction = createdCombineNode.getParallelInstruction();
assertEquals(parallelInstruction.getName(), pgbkName);
assertNull(parallelInstruction.getPartialGroupByKey());
assertNotNull(parallelInstruction.getParDo());
ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
assertEquals(parDoInstruction.getUserFn(), valueCombiningFn);
break;
}
}
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class CloneAmbiguousFlattensFunctionTest method testNonAmbiguousFlattens.
/**
* Tests that a network with non-ambiguous flattens can still clone the ambiguous flattens
* properly, without leaving any ambiguous flattens, modifying the non-ambiguous flattens, or
* changing the number of paths.
*/
@Test
public void testNonAmbiguousFlattens() throws Exception {
// sdk2+out -\
// sdk_flatten+out --> sdk3+out
// sdk1+out ----\ /
// ambig_flatten+out
// runner1+out -/ \-> runner2+out -\
// runner_flatten+out --> runner4+out
// runner3+out -/
MutableNetwork<Node, Edge> network = createEmptyNetwork();
Node sdk1 = createSdkNode("sdk1");
Node sdk2 = createSdkNode("sdk2");
Node sdk3 = createSdkNode("sdk3");
Node sdk1Out = createPCollection("sdk1.out");
Node sdk2Out = createPCollection("sdk2.out");
Node sdk3Out = createPCollection("sdk3.out");
Node runner1 = createRunnerNode("runner1");
Node runner2 = createRunnerNode("runner2");
Node runner3 = createRunnerNode("runner3");
Node runner4 = createRunnerNode("runner4");
Node runner1Out = createPCollection("runner1.out");
Node runner2Out = createPCollection("runner2.out");
Node runner3Out = createPCollection("runner3.out");
Node runner4Out = createPCollection("runner4.out");
Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
Node ambiguousFlattenOut = createPCollection("ambiguous_flatten.out");
Node sdkFlatten = createFlatten("sdk_flatten", ExecutionLocation.SDK_HARNESS);
Node sdkFlattenOut = createPCollection("sdk_flatten.out");
Node runnerFlatten = createFlatten("runner_flatten", ExecutionLocation.RUNNER_HARNESS);
Node runnerFlattenOut = createPCollection("runner_flatten.out");
network.addNode(sdk1);
network.addNode(sdk2);
network.addNode(sdk3);
network.addNode(sdk1Out);
network.addNode(sdk2Out);
network.addNode(sdk3Out);
network.addNode(runner1);
network.addNode(runner2);
network.addNode(runner3);
network.addNode(runner4);
network.addNode(runner1Out);
network.addNode(runner2Out);
network.addNode(runner3Out);
network.addNode(runner4Out);
network.addNode(ambiguousFlatten);
network.addNode(ambiguousFlattenOut);
network.addNode(sdkFlatten);
network.addNode(sdkFlattenOut);
network.addNode(runnerFlatten);
network.addNode(runnerFlattenOut);
network.addEdge(sdk1, sdk1Out, DefaultEdge.create());
network.addEdge(sdk2, sdk2Out, DefaultEdge.create());
network.addEdge(sdk3, sdk3Out, DefaultEdge.create());
network.addEdge(runner1, runner1Out, DefaultEdge.create());
network.addEdge(runner2, runner2Out, DefaultEdge.create());
network.addEdge(runner3, runner3Out, DefaultEdge.create());
network.addEdge(runner4, runner4Out, DefaultEdge.create());
network.addEdge(ambiguousFlatten, ambiguousFlattenOut, DefaultEdge.create());
network.addEdge(sdkFlatten, sdkFlattenOut, DefaultEdge.create());
network.addEdge(runnerFlatten, runnerFlattenOut, DefaultEdge.create());
network.addEdge(sdk1Out, ambiguousFlatten, DefaultEdge.create());
network.addEdge(runner1Out, ambiguousFlatten, DefaultEdge.create());
network.addEdge(ambiguousFlattenOut, sdkFlatten, DefaultEdge.create());
network.addEdge(sdk2Out, sdkFlatten, DefaultEdge.create());
network.addEdge(sdkFlattenOut, sdk3, DefaultEdge.create());
network.addEdge(ambiguousFlattenOut, runner2, DefaultEdge.create());
network.addEdge(runner2Out, runnerFlatten, DefaultEdge.create());
network.addEdge(runner3Out, runnerFlatten, DefaultEdge.create());
network.addEdge(runnerFlattenOut, runner4, DefaultEdge.create());
// Apply function and perform assertions
List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
network = new CloneAmbiguousFlattensFunction().apply(network);
for (Node node : network.nodes()) {
if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
if ("sdk_flatten".equals(castNode.getParallelInstruction().getName())) {
assertSame("SDK flatten has been incorrectly modified.", sdkFlatten, castNode);
} else if ("runner_flatten".equals(castNode.getParallelInstruction().getName())) {
assertSame("Runner flatten has been incorrectly modified.", runnerFlatten, castNode);
}
}
}
assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
Aggregations