Search in sources :

Example 36 with ParallelInstructionNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.

the class ReplacePgbkWithPrecombineFunctionTest method testPrecombinePgbkIsReplaced.

@Test
public void testPrecombinePgbkIsReplaced() throws Exception {
    // Network:
    // out1 --> precombine_pgbk --> out2
    Map<String, Object> valueCombiningFn = new HashMap<>();
    Node out1 = createInstructionOutputNode("out1");
    String pgbkName = "precombine_pgbk";
    Node precombinePgbk = createPrecombinePgbkNode(pgbkName, valueCombiningFn);
    Node out2 = createInstructionOutputNode("out2");
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(out1);
    network.addNode(precombinePgbk);
    network.addNode(out2);
    network.addEdge(out1, precombinePgbk, DefaultEdge.create());
    network.addEdge(precombinePgbk, out2, DefaultEdge.create());
    Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
    network = new ReplacePgbkWithPrecombineFunction().apply(network);
    // Assert that network has same structure (same number of nodes and paths).
    assertEquals(inputNetwork.nodes().size(), network.nodes().size());
    assertEquals(inputNetwork.edges().size(), network.edges().size());
    List<List<Node>> oldPaths = Networks.allPathsFromRootsToLeaves(inputNetwork);
    List<List<Node>> newPaths = Networks.allPathsFromRootsToLeaves(network);
    assertEquals(oldPaths.size(), newPaths.size());
    // Assert that the pgbk node has been replaced.
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode) {
            ParallelInstructionNode createdCombineNode = (ParallelInstructionNode) node;
            ParallelInstruction parallelInstruction = createdCombineNode.getParallelInstruction();
            assertEquals(parallelInstruction.getName(), pgbkName);
            assertNull(parallelInstruction.getPartialGroupByKey());
            assertNotNull(parallelInstruction.getParDo());
            ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
            assertEquals(parDoInstruction.getUserFn(), valueCombiningFn);
            break;
        }
    }
}
Also used : HashMap(java.util.HashMap) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 37 with ParallelInstructionNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.

the class CloneAmbiguousFlattensFunctionTest method testNonAmbiguousFlattens.

/**
 * Tests that a network with non-ambiguous flattens can still clone the ambiguous flattens
 * properly, without leaving any ambiguous flattens, modifying the non-ambiguous flattens, or
 * changing the number of paths.
 */
@Test
public void testNonAmbiguousFlattens() throws Exception {
    // sdk2+out -\
    // sdk_flatten+out --> sdk3+out
    // sdk1+out ----\                 /
    // ambig_flatten+out
    // runner1+out -/                 \-> runner2+out -\
    // runner_flatten+out --> runner4+out
    // runner3+out -/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node sdk1 = createSdkNode("sdk1");
    Node sdk2 = createSdkNode("sdk2");
    Node sdk3 = createSdkNode("sdk3");
    Node sdk1Out = createPCollection("sdk1.out");
    Node sdk2Out = createPCollection("sdk2.out");
    Node sdk3Out = createPCollection("sdk3.out");
    Node runner1 = createRunnerNode("runner1");
    Node runner2 = createRunnerNode("runner2");
    Node runner3 = createRunnerNode("runner3");
    Node runner4 = createRunnerNode("runner4");
    Node runner1Out = createPCollection("runner1.out");
    Node runner2Out = createPCollection("runner2.out");
    Node runner3Out = createPCollection("runner3.out");
    Node runner4Out = createPCollection("runner4.out");
    Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlattenOut = createPCollection("ambiguous_flatten.out");
    Node sdkFlatten = createFlatten("sdk_flatten", ExecutionLocation.SDK_HARNESS);
    Node sdkFlattenOut = createPCollection("sdk_flatten.out");
    Node runnerFlatten = createFlatten("runner_flatten", ExecutionLocation.RUNNER_HARNESS);
    Node runnerFlattenOut = createPCollection("runner_flatten.out");
    network.addNode(sdk1);
    network.addNode(sdk2);
    network.addNode(sdk3);
    network.addNode(sdk1Out);
    network.addNode(sdk2Out);
    network.addNode(sdk3Out);
    network.addNode(runner1);
    network.addNode(runner2);
    network.addNode(runner3);
    network.addNode(runner4);
    network.addNode(runner1Out);
    network.addNode(runner2Out);
    network.addNode(runner3Out);
    network.addNode(runner4Out);
    network.addNode(ambiguousFlatten);
    network.addNode(ambiguousFlattenOut);
    network.addNode(sdkFlatten);
    network.addNode(sdkFlattenOut);
    network.addNode(runnerFlatten);
    network.addNode(runnerFlattenOut);
    network.addEdge(sdk1, sdk1Out, DefaultEdge.create());
    network.addEdge(sdk2, sdk2Out, DefaultEdge.create());
    network.addEdge(sdk3, sdk3Out, DefaultEdge.create());
    network.addEdge(runner1, runner1Out, DefaultEdge.create());
    network.addEdge(runner2, runner2Out, DefaultEdge.create());
    network.addEdge(runner3, runner3Out, DefaultEdge.create());
    network.addEdge(runner4, runner4Out, DefaultEdge.create());
    network.addEdge(ambiguousFlatten, ambiguousFlattenOut, DefaultEdge.create());
    network.addEdge(sdkFlatten, sdkFlattenOut, DefaultEdge.create());
    network.addEdge(runnerFlatten, runnerFlattenOut, DefaultEdge.create());
    network.addEdge(sdk1Out, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(runner1Out, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOut, sdkFlatten, DefaultEdge.create());
    network.addEdge(sdk2Out, sdkFlatten, DefaultEdge.create());
    network.addEdge(sdkFlattenOut, sdk3, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOut, runner2, DefaultEdge.create());
    network.addEdge(runner2Out, runnerFlatten, DefaultEdge.create());
    network.addEdge(runner3Out, runnerFlatten, DefaultEdge.create());
    network.addEdge(runnerFlattenOut, runner4, DefaultEdge.create());
    // Apply function and perform assertions
    List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
    network = new CloneAmbiguousFlattensFunction().apply(network);
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
            ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
            assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
            if ("sdk_flatten".equals(castNode.getParallelInstruction().getName())) {
                assertSame("SDK flatten has been incorrectly modified.", sdkFlatten, castNode);
            } else if ("runner_flatten".equals(castNode.getParallelInstruction().getName())) {
                assertSame("Runner flatten has been incorrectly modified.", runnerFlatten, castNode);
            }
        }
    }
    assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 38 with ParallelInstructionNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.

the class CloneAmbiguousFlattensFunctionTest method testSingleFlatten.

/**
 * Tests that a single ambiguous flatten clones properly, with the proper edges between
 * predecessors and successors, and that no new paths are created.
 */
@Test
public void testSingleFlatten() throws Exception {
    // sdk_predecessor -----> out -\                         /-> sdk_successor --> out
    // ambiguous_flatten --> out -> no_location_successor --> out
    // runner_predecessor --> out -/                         \-> runner_successor --> out
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node sdkPredecessor = createSdkNode("sdk_predecessor");
    Node runnerPredecessor = createRunnerNode("runner_predecessor");
    Node sdkPredecessorOutput = createPCollection("sdk_predecessor.out");
    Node runnerPredecessorOutput = createPCollection("runner_predecessor.out");
    Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlattenOutput = createPCollection("ambiguous_flatten.out");
    Node sdkSuccessor = createSdkNode("sdk_successor");
    Node runnerSuccessor = createRunnerNode("runner_successor");
    Node noLocationSuccessor = createNoLocationNode();
    Node sdkSuccessorOutput = createPCollection("sdk_successor.out");
    Node runnerSuccessorOutput = createPCollection("runner_successor.out");
    Node noLocationSuccessorOutput = createPCollection("no_location_successor.out");
    network.addNode(sdkPredecessor);
    network.addNode(runnerPredecessor);
    network.addNode(sdkPredecessorOutput);
    network.addNode(runnerPredecessorOutput);
    network.addNode(ambiguousFlatten);
    network.addNode(ambiguousFlattenOutput);
    network.addNode(sdkSuccessor);
    network.addNode(runnerSuccessor);
    network.addNode(noLocationSuccessor);
    network.addNode(sdkSuccessorOutput);
    network.addNode(runnerSuccessorOutput);
    network.addNode(noLocationSuccessorOutput);
    network.addEdge(sdkPredecessor, sdkPredecessorOutput, DefaultEdge.create());
    network.addEdge(runnerPredecessor, runnerPredecessorOutput, DefaultEdge.create());
    network.addEdge(sdkPredecessorOutput, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(runnerPredecessorOutput, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(ambiguousFlatten, ambiguousFlattenOutput, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOutput, sdkSuccessor, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOutput, runnerSuccessor, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOutput, noLocationSuccessor, DefaultEdge.create());
    network.addEdge(sdkSuccessor, sdkSuccessorOutput, DefaultEdge.create());
    network.addEdge(runnerSuccessor, runnerSuccessorOutput, DefaultEdge.create());
    network.addEdge(noLocationSuccessor, noLocationSuccessorOutput, DefaultEdge.create());
    // After:
    // SdkPredecessor -----> out --> SdkFlatten  --> out --> SdkSuccessor --> out
    // X
    // RunnerPredecessor --> out --> RunnerFlatten --> out --> RunnerSuccessor --> out
    // \-> NoLocationSuccessor --> out
    List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
    network = new CloneAmbiguousFlattensFunction().apply(network);
    // Get sdk and runner flattens and outputs.
    ParallelInstructionNode sdkFlatten = null;
    ParallelInstructionNode runnerFlatten = null;
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
            ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
            if (castNode.getExecutionLocation() == ExecutionLocation.SDK_HARNESS) {
                sdkFlatten = castNode;
            } else if (castNode.getExecutionLocation() == ExecutionLocation.RUNNER_HARNESS) {
                runnerFlatten = castNode;
            } else {
                assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
            }
        }
    }
    assertNotNull("Ambiguous flatten was not cloned into sdk flatten.", sdkFlatten);
    assertNotNull("Ambiguous flatten was not cloned into runner flatten.", runnerFlatten);
    Node sdkFlattenOutput = Iterables.getOnlyElement(network.successors(sdkFlatten));
    Node runnerFlattenOutput = Iterables.getOnlyElement(network.successors(runnerFlatten));
    assertEquals(2, network.predecessors(sdkFlatten).size());
    assertEquals(2, network.predecessors(runnerFlatten).size());
    assertEquals(1, network.successors(sdkFlattenOutput).size());
    assertEquals(2, network.successors(runnerFlattenOutput).size());
    assertSame(sdkSuccessor, Iterables.getOnlyElement(network.successors(sdkFlattenOutput)));
    assertThat(network.successors(runnerFlattenOutput), hasItems(runnerSuccessor, noLocationSuccessor));
    assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 39 with ParallelInstructionNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.

the class LengthPrefixUnknownCodersTest method testLengthPrefixAndReplaceForRunnerNetwork.

@Test
public void testLengthPrefixAndReplaceForRunnerNetwork() throws Exception {
    Node readNode = createReadNode("Read", "Source", windowedValueCoder);
    Edge readNodeEdge = DefaultEdge.create();
    Node readNodeOut = createInstructionOutputNode("Read.out", windowedValueCoder);
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(readNode);
    network.addNode(readNodeOut);
    network.addEdge(readNode, readNodeOut, readNodeEdge);
    ParallelInstructionNode prefixedReadNode = createReadNode("Read", "Source", prefixedAndReplacedWindowedValueCoder);
    InstructionOutputNode prefixedReadNodeOut = createInstructionOutputNode("Read.out", prefixedAndReplacedWindowedValueCoder);
    MutableNetwork<Node, Edge> prefixedNetwork = andReplaceForRunnerNetwork(network);
    ImmutableSet.Builder<GenericJson> prefixedInstructions = ImmutableSet.builder();
    for (Node node : prefixedNetwork.nodes()) {
        if (node instanceof ParallelInstructionNode) {
            prefixedInstructions.add(((ParallelInstructionNode) node).getParallelInstruction());
        } else if (node instanceof InstructionOutputNode) {
            prefixedInstructions.add(((InstructionOutputNode) node).getInstructionOutput());
        }
    }
    assertThat(prefixedInstructions.build(), containsInAnyOrder(jsonOf(prefixedReadNodeOut.getInstructionOutput()), jsonOf(prefixedReadNode.getParallelInstruction())));
}
Also used : GenericJson(com.google.api.client.json.GenericJson) LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 40 with ParallelInstructionNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.

the class LengthPrefixUnknownCodersTest method testLengthPrefixForInstructionOutputNodeWithNonGrpcNodeNeighbor.

@Test
public void testLengthPrefixForInstructionOutputNodeWithNonGrpcNodeNeighbor() {
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    ParallelInstructionNode readNode = createReadNode("read", "source", windowedValueCoder);
    network.addNode(instructionOutputNode);
    network.addNode(readNode);
    network.addEdge(readNode, instructionOutputNode, DefaultEdge.create());
    assertEqualsAsJson(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/
    null), ((InstructionOutputNode) forInstructionOutputNode(network).apply(instructionOutputNode)).getInstructionOutput().getCodec());
}
Also used : LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Aggregations

ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)40 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)35 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)31 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)24 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)21 Test (org.junit.Test)20 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)18 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)14 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)10 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)9 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 MapTask (com.google.api.services.dataflow.model.MapTask)8 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)8 List (java.util.List)6 RemoteGrpcPortNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 HashMap (java.util.HashMap)5 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)5 NameContext (org.apache.beam.runners.dataflow.worker.counters.NameContext)5 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)5