Search in sources :

Example 21 with ParDoInstruction

use of com.google.api.services.dataflow.model.ParDoInstruction in project beam by apache.

the class ReplacePgbkWithPrecombineFunctionTest method testPrecombinePgbkIsReplaced.

@Test
public void testPrecombinePgbkIsReplaced() throws Exception {
    // Network:
    // out1 --> precombine_pgbk --> out2
    Map<String, Object> valueCombiningFn = new HashMap<>();
    Node out1 = createInstructionOutputNode("out1");
    String pgbkName = "precombine_pgbk";
    Node precombinePgbk = createPrecombinePgbkNode(pgbkName, valueCombiningFn);
    Node out2 = createInstructionOutputNode("out2");
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(out1);
    network.addNode(precombinePgbk);
    network.addNode(out2);
    network.addEdge(out1, precombinePgbk, DefaultEdge.create());
    network.addEdge(precombinePgbk, out2, DefaultEdge.create());
    Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
    network = new ReplacePgbkWithPrecombineFunction().apply(network);
    // Assert that network has same structure (same number of nodes and paths).
    assertEquals(inputNetwork.nodes().size(), network.nodes().size());
    assertEquals(inputNetwork.edges().size(), network.edges().size());
    List<List<Node>> oldPaths = Networks.allPathsFromRootsToLeaves(inputNetwork);
    List<List<Node>> newPaths = Networks.allPathsFromRootsToLeaves(network);
    assertEquals(oldPaths.size(), newPaths.size());
    // Assert that the pgbk node has been replaced.
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode) {
            ParallelInstructionNode createdCombineNode = (ParallelInstructionNode) node;
            ParallelInstruction parallelInstruction = createdCombineNode.getParallelInstruction();
            assertEquals(parallelInstruction.getName(), pgbkName);
            assertNull(parallelInstruction.getPartialGroupByKey());
            assertNotNull(parallelInstruction.getParDo());
            ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
            assertEquals(parDoInstruction.getUserFn(), valueCombiningFn);
            break;
        }
    }
}
Also used : HashMap(java.util.HashMap) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Aggregations

ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)21 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)18 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)13 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)10 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)9 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)9 MultiOutputInfo (com.google.api.services.dataflow.model.MultiOutputInfo)8 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)8 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)8 HashMap (java.util.HashMap)7 Test (org.junit.Test)7 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)6 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)6 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)6 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)5 ArrayList (java.util.ArrayList)5 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)5 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)5 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)5 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)5