Search in sources :

Example 11 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class CloneAmbiguousFlattensFunctionTest method testConnectedFlattens.

/**
 * Tests that multiple connected ambiguous flattens in a network all get cloned without leaving
 * any ambiguous flattens left in the network and without changing the number of paths.
 */
@Test
public void testConnectedFlattens() throws Exception {
    // sdk1+out ----\
    // ambig_flatten1+out --> sdk3+out
    // runner1+out -/                    \                     /-> sdk4+out
    // sdk2+out ----\                    /-> ambig_flatten3+out
    // ambig_flatten2+out --> runner3+out        \-> runner4+out
    // runner2+out -/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node sdk1 = createSdkNode("sdk1");
    Node sdk2 = createSdkNode("sdk2");
    Node sdk3 = createSdkNode("sdk3");
    Node sdk4 = createSdkNode("sdk4");
    Node sdk1Out = createPCollection("sdk1.out");
    Node sdk2Out = createPCollection("sdk2.out");
    Node sdk3Out = createPCollection("sdk3.out");
    Node sdk4Out = createPCollection("sdk4.out");
    Node runner1 = createRunnerNode("runner1");
    Node runner2 = createRunnerNode("runner2");
    Node runner3 = createRunnerNode("runner3");
    Node runner4 = createRunnerNode("runner4");
    Node runner1Out = createPCollection("runner1.out");
    Node runner2Out = createPCollection("runner2.out");
    Node runner3Out = createPCollection("runner3.out");
    Node runner4Out = createPCollection("runner4.out");
    Node ambiguousFlatten1 = createFlatten("ambiguous_flatten1", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlatten2 = createFlatten("ambiguous_flatten2", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlatten3 = createFlatten("ambiguous_flatten3", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlatten1Out = createPCollection("ambiguous_flatten1.out");
    Node ambiguousFlatten2Out = createPCollection("ambiguous_flatten2.out");
    Node ambiguousFlatten3Out = createPCollection("ambiguous_flatten3.out");
    network.addNode(sdk1);
    network.addNode(sdk2);
    network.addNode(sdk3);
    network.addNode(sdk4);
    network.addNode(sdk1Out);
    network.addNode(sdk2Out);
    network.addNode(sdk3Out);
    network.addNode(sdk4Out);
    network.addNode(runner1);
    network.addNode(runner2);
    network.addNode(runner3);
    network.addNode(runner4);
    network.addNode(runner1Out);
    network.addNode(runner2Out);
    network.addNode(runner3Out);
    network.addNode(runner4Out);
    network.addNode(ambiguousFlatten1);
    network.addNode(ambiguousFlatten2);
    network.addNode(ambiguousFlatten3);
    network.addNode(ambiguousFlatten1Out);
    network.addNode(ambiguousFlatten2Out);
    network.addNode(ambiguousFlatten3Out);
    network.addEdge(sdk1, sdk1Out, DefaultEdge.create());
    network.addEdge(sdk2, sdk2Out, DefaultEdge.create());
    network.addEdge(sdk3, sdk3Out, DefaultEdge.create());
    network.addEdge(sdk4, sdk4Out, DefaultEdge.create());
    network.addEdge(runner1, runner1Out, DefaultEdge.create());
    network.addEdge(runner2, runner2Out, DefaultEdge.create());
    network.addEdge(runner3, runner3Out, DefaultEdge.create());
    network.addEdge(runner4, runner4Out, DefaultEdge.create());
    network.addEdge(ambiguousFlatten1, ambiguousFlatten1Out, DefaultEdge.create());
    network.addEdge(ambiguousFlatten2, ambiguousFlatten2Out, DefaultEdge.create());
    network.addEdge(ambiguousFlatten3, ambiguousFlatten3Out, DefaultEdge.create());
    network.addEdge(sdk1Out, ambiguousFlatten1, DefaultEdge.create());
    network.addEdge(runner1Out, ambiguousFlatten1, DefaultEdge.create());
    network.addEdge(sdk2Out, ambiguousFlatten2, DefaultEdge.create());
    network.addEdge(runner2Out, ambiguousFlatten2, DefaultEdge.create());
    network.addEdge(ambiguousFlatten1Out, sdk3, DefaultEdge.create());
    network.addEdge(ambiguousFlatten1Out, ambiguousFlatten3, DefaultEdge.create());
    network.addEdge(ambiguousFlatten2Out, ambiguousFlatten3, DefaultEdge.create());
    network.addEdge(ambiguousFlatten2Out, runner3, DefaultEdge.create());
    network.addEdge(ambiguousFlatten3Out, sdk4, DefaultEdge.create());
    network.addEdge(ambiguousFlatten3Out, runner4, DefaultEdge.create());
    // Apply function and perform assertions
    List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
    network = new CloneAmbiguousFlattensFunction().apply(network);
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
            ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
            assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
        }
    }
    assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 12 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class DeduceFlattenLocationsFunctionTest method assertSingleFlattenLocationDeduction.

/**
 * For testing deducing the location of a single flatten. This function checks that a flatten with
 * the given aggregated locations for predecessors and successors deduces to the expected {@code
 * ExecutionLocation}.
 */
private static void assertSingleFlattenLocationDeduction(ExecutionLocation predecessorLocations, ExecutionLocation successorLocations, ExecutionLocation expectedLocation) throws Exception {
    MutableNetwork<Node, Edge> network = createSingleFlattenNetwork(predecessorLocations, successorLocations);
    network = new DeduceFlattenLocationsFunction().apply(network);
    ExecutionLocation flattenLocation = getExecutionLocationOf("flatten", network);
    assertEquals(expectedLocation, flattenLocation);
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ExecutionLocation(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutionLocation) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)

Example 13 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class DeduceFlattenLocationsFunctionTest method testDeductionOfChainedFlattens.

/**
 * Test that when multiple flattens with PCollections are connected, they are deduced.
 */
@Test
public void testDeductionOfChainedFlattens() throws Exception {
    // sdk_node1 --> out --\
    // sdk_node2 --> out --> flatten1 --> out ----\                /-> sdk_node3 --> out
    // flatten3 --> out
    // runner_node1 --> out --> flatten2 --> out -/                \-> runner_node3 --> out
    // runner_node2 --> out --/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node sdkNode1 = createSdkNode("sdk_node1");
    Node sdkNode1Output = createPCollection("sdk_node1.out");
    Node sdkNode2 = createSdkNode("sdk_node2");
    Node sdkNode2Output = createPCollection("sdk_node2.out");
    Node sdkNode3 = createSdkNode("sdk_node3");
    Node sdkNode3Output = createPCollection("sdk_node3.out");
    Node runnerNode1 = createRunnerNode("runner_node1");
    Node runnerNode1Output = createPCollection("runner_node1.out");
    Node runnerNode2 = createRunnerNode("runner_node2");
    Node runnerNode2Output = createPCollection("runner_node2.out");
    Node runnerNode3 = createRunnerNode("runner_node3");
    Node runnerNode3Output = createPCollection("runner_node3.out");
    Node flatten1 = createFlatten("flatten1");
    Node flatten1Output = createPCollection("flatten1.out");
    Node flatten2 = createFlatten("flatten2");
    Node flatten2Output = createPCollection("flatten2.out");
    Node flatten3 = createFlatten("flatten3");
    Node flatten3Output = createPCollection("flatten3.out");
    network.addNode(sdkNode1);
    network.addNode(sdkNode2);
    network.addNode(sdkNode3);
    network.addNode(runnerNode1);
    network.addNode(runnerNode2);
    network.addNode(runnerNode3);
    network.addNode(flatten1);
    network.addNode(flatten1Output);
    network.addNode(flatten2);
    network.addNode(flatten2Output);
    network.addNode(flatten3);
    network.addNode(flatten3Output);
    network.addEdge(sdkNode1, sdkNode1Output, DefaultEdge.create());
    network.addEdge(sdkNode2, sdkNode2Output, DefaultEdge.create());
    network.addEdge(runnerNode1, runnerNode1Output, DefaultEdge.create());
    network.addEdge(runnerNode2, runnerNode2Output, DefaultEdge.create());
    network.addEdge(sdkNode1Output, flatten1, DefaultEdge.create());
    network.addEdge(sdkNode2Output, flatten1, DefaultEdge.create());
    network.addEdge(runnerNode1Output, flatten2, DefaultEdge.create());
    network.addEdge(runnerNode2Output, flatten2, DefaultEdge.create());
    network.addEdge(flatten1, flatten1Output, DefaultEdge.create());
    network.addEdge(flatten2, flatten2Output, DefaultEdge.create());
    network.addEdge(flatten1Output, flatten3, DefaultEdge.create());
    network.addEdge(flatten2Output, flatten3, DefaultEdge.create());
    network.addEdge(flatten3, flatten3Output, DefaultEdge.create());
    network.addEdge(flatten3Output, sdkNode3, DefaultEdge.create());
    network.addEdge(flatten3Output, runnerNode3, DefaultEdge.create());
    network.addEdge(sdkNode3, sdkNode3Output, DefaultEdge.create());
    network.addEdge(runnerNode3, runnerNode3Output, DefaultEdge.create());
    network = new DeduceFlattenLocationsFunction().apply(network);
    ExecutionLocation flatten1Location = getExecutionLocationOf("flatten1", network);
    assertEquals(flatten1Location, ExecutionLocation.SDK_HARNESS);
    ExecutionLocation flatten2Location = getExecutionLocationOf("flatten2", network);
    assertEquals(flatten2Location, ExecutionLocation.RUNNER_HARNESS);
    ExecutionLocation flatten3Location = getExecutionLocationOf("flatten3", network);
    assertEquals(flatten3Location, ExecutionLocation.AMBIGUOUS);
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ExecutionLocation(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutionLocation) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 14 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class DeduceFlattenLocationsFunctionTest method createSingleFlattenNetwork.

/**
 * In order to test the result of deducing a single flatten's result, this returns a network of a
 * single flatten with a PCollection, with predecessors and successors with specified {@link
 * ExecutionLocation}s. A location of {@code AMBIGUOUS} passed as a parameter for this function
 * indicates to include both predecessors/successors while a location of {@code UNKNOWN} passed as
 * a parameter indicates to include no predecessors/successors.
 *
 * <p>This function promises that the single flatten node will be named "flatten" and that the
 * network will be structured as follows:
 *
 * <pre>{@code
 * sdk_node --> out -----\                          /--> sdk_node --> out
 *                        -> flatten --> pcollection
 * runner_node --> out --/                          \--> runner_node --> out
 * }</pre>
 *
 * <p>With the possibility of one or both predecessor/successor being omitted depending on the
 * parameters.
 */
private static MutableNetwork<Node, Edge> createSingleFlattenNetwork(ExecutionLocation predecessorLocations, ExecutionLocation successorLocations) throws Exception {
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node flatten = createFlatten("flatten");
    Node flattenOutput = createPCollection("pcollection");
    network.addNode(flatten);
    network.addNode(flattenOutput);
    network.addEdge(flatten, flattenOutput, DefaultEdge.create());
    if (predecessorLocations == ExecutionLocation.SDK_HARNESS || predecessorLocations == ExecutionLocation.AMBIGUOUS) {
        Node node = createSdkNode("sdk_predecessor");
        Node out = createPCollection("sdk_predecessor.out");
        network.addNode(node);
        network.addNode(out);
        network.addEdge(node, out, DefaultEdge.create());
        network.addEdge(out, flatten, DefaultEdge.create());
    }
    if (predecessorLocations == ExecutionLocation.RUNNER_HARNESS || predecessorLocations == ExecutionLocation.AMBIGUOUS) {
        Node node = createRunnerNode("runner_predecessor");
        Node out = createPCollection("runner_predecessor.out");
        network.addNode(node);
        network.addNode(out);
        network.addEdge(node, out, DefaultEdge.create());
        network.addEdge(out, flatten, DefaultEdge.create());
    }
    if (successorLocations == ExecutionLocation.SDK_HARNESS || successorLocations == ExecutionLocation.AMBIGUOUS) {
        Node node = createSdkNode("sdk_successor");
        Node out = createPCollection("sdk_successor.out");
        network.addNode(node);
        network.addNode(out);
        network.addEdge(flatten, node, DefaultEdge.create());
        network.addEdge(node, out, DefaultEdge.create());
    }
    if (successorLocations == ExecutionLocation.RUNNER_HARNESS || successorLocations == ExecutionLocation.AMBIGUOUS) {
        Node node = createRunnerNode("runner_successor");
        Node out = createPCollection("runner_successor.out");
        network.addNode(node);
        network.addNode(out);
        network.addEdge(flatten, node, DefaultEdge.create());
        network.addEdge(node, out, DefaultEdge.create());
    }
    return network;
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)

Example 15 with Node

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.Node in project beam by apache.

the class DeduceNodeLocationsFunctionTest method testMultipleNodesDeduced.

/**
 * Tests that multiple deduced nodes with connecting edges are maintained correctly.
 */
@Test
public void testMultipleNodesDeduced() throws Exception {
    // A --\     /--> C
    // -> E
    // B --/     \--> D
    Node a = createReadNode("A", CUSTOM_SOURCE);
    Node b = createReadNode("B", RUNNER_SOURCE);
    Node c = createParDoNode("C", "RunnerDoFn");
    Node d = createParDoNode("D", DO_FN);
    Node e = createParDoNode("E", DO_FN);
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(a);
    network.addNode(b);
    network.addNode(c);
    network.addNode(d);
    network.addNode(e);
    network.addEdge(a, e, DefaultEdge.create());
    network.addEdge(b, e, DefaultEdge.create());
    network.addEdge(e, c, DefaultEdge.create());
    network.addEdge(e, d, DefaultEdge.create());
    Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
    network = new DeduceNodeLocationsFunction().apply(network);
    assertThatNetworksAreIdentical(inputNetwork, network);
    assertAllNodesDeducedExceptFlattens(network);
}
Also used : InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Aggregations

Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)65 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)64 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)59 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)50 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)41 Test (org.junit.Test)40 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)22 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)21 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)17 List (java.util.List)10 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)10 MapTask (com.google.api.services.dataflow.model.MapTask)9 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)9 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 RemoteGrpcPortNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode)8 FlattenInstruction (com.google.api.services.dataflow.model.FlattenInstruction)7 MultiOutputInfo (com.google.api.services.dataflow.model.MultiOutputInfo)7 ArrayList (java.util.ArrayList)7 HappensBeforeEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.HappensBeforeEdge)7 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)7