Search in sources :

Example 56 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class CreateRegisterFnOperationFunctionTest method testSdkToRunnerToSdkGraph.

@Test
public void testSdkToRunnerToSdkGraph() {
    Node firstSdkPortion = TestNode.create("FirstSdkPortion");
    Node secondSdkPortion = TestNode.create("SecondSdkPortion");
    @SuppressWarnings({ "unchecked", "rawtypes" }) ArgumentCaptor<MutableNetwork<Node, Edge>> networkCapture = ArgumentCaptor.forClass((Class) MutableNetwork.class);
    when(registerFnOperationFunction.apply(networkCapture.capture())).thenReturn(firstSdkPortion, secondSdkPortion);
    Node firstPort = TestNode.create("FirstPort");
    Node secondPort = TestNode.create("SecondPort");
    when(portSupplier.get()).thenReturn(firstPort, secondPort);
    Node readNode = createReadNode("Read", Nodes.ExecutionLocation.SDK_HARNESS);
    Edge readNodeEdge = DefaultEdge.create();
    Node readNodeOut = createInstructionOutputNode("Read.out");
    Edge readNodeOutEdge = DefaultEdge.create();
    Node runnerParDoNode = createParDoNode("RunnerParDo", Nodes.ExecutionLocation.RUNNER_HARNESS);
    Edge runnerParDoNodeEdge = DefaultEdge.create();
    Node runnerParDoNodeOut = createInstructionOutputNode("RunnerParDo.out");
    Edge runnerParDoNodeOutEdge = DefaultEdge.create();
    Node sdkParDoNode = createParDoNode("SdkParDo", Nodes.ExecutionLocation.SDK_HARNESS);
    Edge sdkParDoNodeEdge = DefaultEdge.create();
    Node sdkParDoNodeOut = createInstructionOutputNode("SdkParDo.out");
    // Read -out-> RunnerParDo -out-> SdkParDo
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(readNode);
    network.addNode(readNodeOut);
    network.addNode(runnerParDoNode);
    network.addNode(runnerParDoNodeOut);
    network.addNode(sdkParDoNodeOut);
    network.addNode(sdkParDoNodeOut);
    network.addEdge(readNode, readNodeOut, readNodeEdge);
    network.addEdge(readNodeOut, runnerParDoNode, readNodeOutEdge);
    network.addEdge(runnerParDoNode, runnerParDoNodeOut, runnerParDoNodeEdge);
    network.addEdge(runnerParDoNodeOut, sdkParDoNode, runnerParDoNodeOutEdge);
    network.addEdge(sdkParDoNode, sdkParDoNodeOut, sdkParDoNodeEdge);
    MutableNetwork<Node, Edge> appliedNetwork = createRegisterFnOperation.apply(Graphs.copyOf(network));
    assertNetworkMaintainsBipartiteStructure(appliedNetwork);
    // On each rewire between runner and SDK, we use a new output node
    Node newOutA = Iterables.getOnlyElement(appliedNetwork.successors(firstPort));
    Node newOutB = Iterables.getOnlyElement(appliedNetwork.predecessors(secondPort));
    // firstSdkPortion -> firstPort -newOutA-> RunnerParDo -newOutB-> secondPort -> secondSdkPortion
    assertThat(appliedNetwork.nodes(), containsInAnyOrder(firstSdkPortion, firstPort, newOutA, runnerParDoNode, newOutB, secondPort, secondSdkPortion));
    assertThat(appliedNetwork.successors(firstSdkPortion), containsInAnyOrder(firstPort));
    assertThat(appliedNetwork.successors(firstPort), containsInAnyOrder(newOutA));
    assertThat(appliedNetwork.successors(newOutA), containsInAnyOrder(runnerParDoNode));
    assertThat(appliedNetwork.successors(runnerParDoNode), containsInAnyOrder(newOutB));
    assertThat(appliedNetwork.successors(newOutB), containsInAnyOrder(secondPort));
    assertThat(appliedNetwork.successors(secondPort), containsInAnyOrder(secondSdkPortion));
    assertThat(appliedNetwork.edgesConnecting(firstSdkPortion, firstPort), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
    assertThat(appliedNetwork.edgesConnecting(secondPort, secondSdkPortion), everyItem(Matchers.<Edges.Edge>instanceOf(HappensBeforeEdge.class)));
    // The order of the calls to create the SDK subnetworks is indeterminate
    List<MutableNetwork<Node, Edge>> sdkSubnetworks = networkCapture.getAllValues();
    MutableNetwork<Node, Edge> firstSdkSubnetwork;
    MutableNetwork<Node, Edge> secondSdkSubnetwork;
    if (sdkSubnetworks.get(0).nodes().contains(readNode)) {
        firstSdkSubnetwork = sdkSubnetworks.get(0);
        secondSdkSubnetwork = sdkSubnetworks.get(1);
    } else {
        firstSdkSubnetwork = sdkSubnetworks.get(1);
        secondSdkSubnetwork = sdkSubnetworks.get(0);
    }
    assertNetworkMaintainsBipartiteStructure(firstSdkSubnetwork);
    assertNetworkMaintainsBipartiteStructure(secondSdkSubnetwork);
    Node sdkNewOutA = Iterables.getOnlyElement(firstSdkSubnetwork.predecessors(firstPort));
    // readNode -sdkNewOutA-> firstPort
    assertThat(firstSdkSubnetwork.nodes(), containsInAnyOrder(readNode, sdkNewOutA, firstPort));
    assertThat(firstSdkSubnetwork.successors(readNode), containsInAnyOrder(sdkNewOutA));
    assertThat(firstSdkSubnetwork.successors(sdkNewOutA), containsInAnyOrder(firstPort));
    Node sdkNewOutB = Iterables.getOnlyElement(secondSdkSubnetwork.successors(secondPort));
    // secondPort -sdkNewOutB-> sdkParDoNode -> sdkParDoNodeOut
    assertThat(secondSdkSubnetwork.nodes(), containsInAnyOrder(secondPort, sdkNewOutB, sdkParDoNode, sdkParDoNodeOut));
    assertThat(secondSdkSubnetwork.successors(secondPort), containsInAnyOrder(sdkNewOutB));
    assertThat(secondSdkSubnetwork.successors(sdkNewOutB), containsInAnyOrder(sdkParDoNode));
    assertThat(secondSdkSubnetwork.successors(sdkParDoNode), containsInAnyOrder(sdkParDoNodeOut));
}
Also used : MutableNetwork(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.graph.MutableNetwork) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) HappensBeforeEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.HappensBeforeEdge) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 57 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class RemoveFlattenInstructionsFunctionTest method assertThatFlattenIsProperlyRemoved.

private void assertThatFlattenIsProperlyRemoved(MutableNetwork<Node, Edge> network) {
    Network<Node, Edge> originalNetwork = ImmutableNetwork.copyOf(network);
    network = new RemoveFlattenInstructionsFunction().apply(network);
    // Check that Flatten has been removed.
    for (Node node : network.nodes()) {
        assertFalse(isFlatten(node));
    }
    // Enumerate all the original paths removing Flatten and its PCollection manually.
    List<List<Node>> originalNetworkPathsWithoutFlatten = Networks.allPathsFromRootsToLeaves(originalNetwork);
    for (List<Node> path : originalNetworkPathsWithoutFlatten) {
        Iterator<Node> nodeIterator = path.iterator();
        while (nodeIterator.hasNext()) {
            Node node = nodeIterator.next();
            // Remove the flatten node and its PCollection
            if (isFlatten(node)) {
                nodeIterator.remove();
                nodeIterator.next();
                nodeIterator.remove();
            }
        }
    }
    // Check that all paths that used to exist still exist (minus the Flatten and its PCollection).
    assertThat(originalNetworkPathsWithoutFlatten, containsInAnyOrder(Networks.allPathsFromRootsToLeaves(network).toArray()));
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)

Example 58 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class MapTaskToNetworkFunctionTest method assertNetworkProperties.

private static void assertNetworkProperties(Network<Node, Edge> network) {
    assertTrue(network.isDirected());
    assertFalse(network.allowsSelfLoops());
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode) {
            assertNull(((ParallelInstructionNode) node).getParallelInstruction().getOutputs());
            // except for ParDoInstructions
            for (Node successor : network.successors(node)) {
                assertThat(successor, instanceOf(InstructionOutputNode.class));
                // Assert that all outgoing edges for a ParDo are MultiOutputInfoEdges
                if (((ParallelInstructionNode) node).getParallelInstruction().getParDo() != null) {
                    for (Edge edge : network.edgesConnecting(node, successor)) {
                        assertThat(edge, instanceOf(MultiOutputInfoEdge.class));
                    }
                } else {
                    for (Edge edge : network.edgesConnecting(node, successor)) {
                        assertThat(edge, instanceOf(DefaultEdge.class));
                    }
                }
            }
        } else if (node instanceof InstructionOutputNode) {
            assertThat(network.inDegree(node), greaterThanOrEqualTo(1));
            // Validate that all successors are instructions with DefaultEdge outgoing edges
            for (Node successor : network.successors(node)) {
                assertThat(successor, instanceOf(ParallelInstructionNode.class));
                for (Edge edge : network.edgesConnecting(node, successor)) {
                    assertThat(edge, instanceOf(DefaultEdge.class));
                }
            }
        }
    }
}
Also used : InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)

Example 59 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class ReplacePgbkWithPrecombineFunctionTest method testPrecombinePgbkIsReplaced.

@Test
public void testPrecombinePgbkIsReplaced() throws Exception {
    // Network:
    // out1 --> precombine_pgbk --> out2
    Map<String, Object> valueCombiningFn = new HashMap<>();
    Node out1 = createInstructionOutputNode("out1");
    String pgbkName = "precombine_pgbk";
    Node precombinePgbk = createPrecombinePgbkNode(pgbkName, valueCombiningFn);
    Node out2 = createInstructionOutputNode("out2");
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(out1);
    network.addNode(precombinePgbk);
    network.addNode(out2);
    network.addEdge(out1, precombinePgbk, DefaultEdge.create());
    network.addEdge(precombinePgbk, out2, DefaultEdge.create());
    Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
    network = new ReplacePgbkWithPrecombineFunction().apply(network);
    // Assert that network has same structure (same number of nodes and paths).
    assertEquals(inputNetwork.nodes().size(), network.nodes().size());
    assertEquals(inputNetwork.edges().size(), network.edges().size());
    List<List<Node>> oldPaths = Networks.allPathsFromRootsToLeaves(inputNetwork);
    List<List<Node>> newPaths = Networks.allPathsFromRootsToLeaves(network);
    assertEquals(oldPaths.size(), newPaths.size());
    // Assert that the pgbk node has been replaced.
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode) {
            ParallelInstructionNode createdCombineNode = (ParallelInstructionNode) node;
            ParallelInstruction parallelInstruction = createdCombineNode.getParallelInstruction();
            assertEquals(parallelInstruction.getName(), pgbkName);
            assertNull(parallelInstruction.getPartialGroupByKey());
            assertNotNull(parallelInstruction.getParDo());
            ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
            assertEquals(parDoInstruction.getUserFn(), valueCombiningFn);
            break;
        }
    }
}
Also used : HashMap(java.util.HashMap) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 60 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class CloneAmbiguousFlattensFunctionTest method testNonAmbiguousFlattens.

/**
 * Tests that a network with non-ambiguous flattens can still clone the ambiguous flattens
 * properly, without leaving any ambiguous flattens, modifying the non-ambiguous flattens, or
 * changing the number of paths.
 */
@Test
public void testNonAmbiguousFlattens() throws Exception {
    // sdk2+out -\
    // sdk_flatten+out --> sdk3+out
    // sdk1+out ----\                 /
    // ambig_flatten+out
    // runner1+out -/                 \-> runner2+out -\
    // runner_flatten+out --> runner4+out
    // runner3+out -/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node sdk1 = createSdkNode("sdk1");
    Node sdk2 = createSdkNode("sdk2");
    Node sdk3 = createSdkNode("sdk3");
    Node sdk1Out = createPCollection("sdk1.out");
    Node sdk2Out = createPCollection("sdk2.out");
    Node sdk3Out = createPCollection("sdk3.out");
    Node runner1 = createRunnerNode("runner1");
    Node runner2 = createRunnerNode("runner2");
    Node runner3 = createRunnerNode("runner3");
    Node runner4 = createRunnerNode("runner4");
    Node runner1Out = createPCollection("runner1.out");
    Node runner2Out = createPCollection("runner2.out");
    Node runner3Out = createPCollection("runner3.out");
    Node runner4Out = createPCollection("runner4.out");
    Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlattenOut = createPCollection("ambiguous_flatten.out");
    Node sdkFlatten = createFlatten("sdk_flatten", ExecutionLocation.SDK_HARNESS);
    Node sdkFlattenOut = createPCollection("sdk_flatten.out");
    Node runnerFlatten = createFlatten("runner_flatten", ExecutionLocation.RUNNER_HARNESS);
    Node runnerFlattenOut = createPCollection("runner_flatten.out");
    network.addNode(sdk1);
    network.addNode(sdk2);
    network.addNode(sdk3);
    network.addNode(sdk1Out);
    network.addNode(sdk2Out);
    network.addNode(sdk3Out);
    network.addNode(runner1);
    network.addNode(runner2);
    network.addNode(runner3);
    network.addNode(runner4);
    network.addNode(runner1Out);
    network.addNode(runner2Out);
    network.addNode(runner3Out);
    network.addNode(runner4Out);
    network.addNode(ambiguousFlatten);
    network.addNode(ambiguousFlattenOut);
    network.addNode(sdkFlatten);
    network.addNode(sdkFlattenOut);
    network.addNode(runnerFlatten);
    network.addNode(runnerFlattenOut);
    network.addEdge(sdk1, sdk1Out, DefaultEdge.create());
    network.addEdge(sdk2, sdk2Out, DefaultEdge.create());
    network.addEdge(sdk3, sdk3Out, DefaultEdge.create());
    network.addEdge(runner1, runner1Out, DefaultEdge.create());
    network.addEdge(runner2, runner2Out, DefaultEdge.create());
    network.addEdge(runner3, runner3Out, DefaultEdge.create());
    network.addEdge(runner4, runner4Out, DefaultEdge.create());
    network.addEdge(ambiguousFlatten, ambiguousFlattenOut, DefaultEdge.create());
    network.addEdge(sdkFlatten, sdkFlattenOut, DefaultEdge.create());
    network.addEdge(runnerFlatten, runnerFlattenOut, DefaultEdge.create());
    network.addEdge(sdk1Out, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(runner1Out, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOut, sdkFlatten, DefaultEdge.create());
    network.addEdge(sdk2Out, sdkFlatten, DefaultEdge.create());
    network.addEdge(sdkFlattenOut, sdk3, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOut, runner2, DefaultEdge.create());
    network.addEdge(runner2Out, runnerFlatten, DefaultEdge.create());
    network.addEdge(runner3Out, runnerFlatten, DefaultEdge.create());
    network.addEdge(runnerFlattenOut, runner4, DefaultEdge.create());
    // Apply function and perform assertions
    List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
    network = new CloneAmbiguousFlattensFunction().apply(network);
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
            ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
            assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
            if ("sdk_flatten".equals(castNode.getParallelInstruction().getName())) {
                assertSame("SDK flatten has been incorrectly modified.", sdkFlatten, castNode);
            } else if ("runner_flatten".equals(castNode.getParallelInstruction().getName())) {
                assertSame("Runner flatten has been incorrectly modified.", runnerFlatten, castNode);
            }
        }
    }
    assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Aggregations

Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)65 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)64 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)59 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)50 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)41 Test (org.junit.Test)40 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)22 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)21 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)17 List (java.util.List)10 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)10 MapTask (com.google.api.services.dataflow.model.MapTask)9 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)9 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 RemoteGrpcPortNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode)8 FlattenInstruction (com.google.api.services.dataflow.model.FlattenInstruction)7 MultiOutputInfo (com.google.api.services.dataflow.model.MultiOutputInfo)7 ArrayList (java.util.ArrayList)7 HappensBeforeEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.HappensBeforeEdge)7 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)7