Search in sources :

Example 46 with Edge

use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.

the class CloneAmbiguousFlattensFunctionTest method testSingleFlatten.

/**
 * Tests that a single ambiguous flatten clones properly, with the proper edges between
 * predecessors and successors, and that no new paths are created.
 */
@Test
public void testSingleFlatten() throws Exception {
    // sdk_predecessor -----> out -\                         /-> sdk_successor --> out
    // ambiguous_flatten --> out -> no_location_successor --> out
    // runner_predecessor --> out -/                         \-> runner_successor --> out
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    Node sdkPredecessor = createSdkNode("sdk_predecessor");
    Node runnerPredecessor = createRunnerNode("runner_predecessor");
    Node sdkPredecessorOutput = createPCollection("sdk_predecessor.out");
    Node runnerPredecessorOutput = createPCollection("runner_predecessor.out");
    Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
    Node ambiguousFlattenOutput = createPCollection("ambiguous_flatten.out");
    Node sdkSuccessor = createSdkNode("sdk_successor");
    Node runnerSuccessor = createRunnerNode("runner_successor");
    Node noLocationSuccessor = createNoLocationNode();
    Node sdkSuccessorOutput = createPCollection("sdk_successor.out");
    Node runnerSuccessorOutput = createPCollection("runner_successor.out");
    Node noLocationSuccessorOutput = createPCollection("no_location_successor.out");
    network.addNode(sdkPredecessor);
    network.addNode(runnerPredecessor);
    network.addNode(sdkPredecessorOutput);
    network.addNode(runnerPredecessorOutput);
    network.addNode(ambiguousFlatten);
    network.addNode(ambiguousFlattenOutput);
    network.addNode(sdkSuccessor);
    network.addNode(runnerSuccessor);
    network.addNode(noLocationSuccessor);
    network.addNode(sdkSuccessorOutput);
    network.addNode(runnerSuccessorOutput);
    network.addNode(noLocationSuccessorOutput);
    network.addEdge(sdkPredecessor, sdkPredecessorOutput, DefaultEdge.create());
    network.addEdge(runnerPredecessor, runnerPredecessorOutput, DefaultEdge.create());
    network.addEdge(sdkPredecessorOutput, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(runnerPredecessorOutput, ambiguousFlatten, DefaultEdge.create());
    network.addEdge(ambiguousFlatten, ambiguousFlattenOutput, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOutput, sdkSuccessor, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOutput, runnerSuccessor, DefaultEdge.create());
    network.addEdge(ambiguousFlattenOutput, noLocationSuccessor, DefaultEdge.create());
    network.addEdge(sdkSuccessor, sdkSuccessorOutput, DefaultEdge.create());
    network.addEdge(runnerSuccessor, runnerSuccessorOutput, DefaultEdge.create());
    network.addEdge(noLocationSuccessor, noLocationSuccessorOutput, DefaultEdge.create());
    // After:
    // SdkPredecessor -----> out --> SdkFlatten  --> out --> SdkSuccessor --> out
    // X
    // RunnerPredecessor --> out --> RunnerFlatten --> out --> RunnerSuccessor --> out
    // \-> NoLocationSuccessor --> out
    List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
    network = new CloneAmbiguousFlattensFunction().apply(network);
    // Get sdk and runner flattens and outputs.
    ParallelInstructionNode sdkFlatten = null;
    ParallelInstructionNode runnerFlatten = null;
    for (Node node : network.nodes()) {
        if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
            ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
            if (castNode.getExecutionLocation() == ExecutionLocation.SDK_HARNESS) {
                sdkFlatten = castNode;
            } else if (castNode.getExecutionLocation() == ExecutionLocation.RUNNER_HARNESS) {
                runnerFlatten = castNode;
            } else {
                assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
            }
        }
    }
    assertNotNull("Ambiguous flatten was not cloned into sdk flatten.", sdkFlatten);
    assertNotNull("Ambiguous flatten was not cloned into runner flatten.", runnerFlatten);
    Node sdkFlattenOutput = Iterables.getOnlyElement(network.successors(sdkFlatten));
    Node runnerFlattenOutput = Iterables.getOnlyElement(network.successors(runnerFlatten));
    assertEquals(2, network.predecessors(sdkFlatten).size());
    assertEquals(2, network.predecessors(runnerFlatten).size());
    assertEquals(1, network.successors(sdkFlattenOutput).size());
    assertEquals(2, network.successors(runnerFlattenOutput).size());
    assertSame(sdkSuccessor, Iterables.getOnlyElement(network.successors(sdkFlattenOutput)));
    assertThat(network.successors(runnerFlattenOutput), hasItems(runnerSuccessor, noLocationSuccessor));
    assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
Also used : Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) List(java.util.List) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 47 with Edge

use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.

the class LengthPrefixUnknownCodersTest method testLengthPrefixAndReplaceForRunnerNetwork.

@Test
public void testLengthPrefixAndReplaceForRunnerNetwork() throws Exception {
    Node readNode = createReadNode("Read", "Source", windowedValueCoder);
    Edge readNodeEdge = DefaultEdge.create();
    Node readNodeOut = createInstructionOutputNode("Read.out", windowedValueCoder);
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(readNode);
    network.addNode(readNodeOut);
    network.addEdge(readNode, readNodeOut, readNodeEdge);
    ParallelInstructionNode prefixedReadNode = createReadNode("Read", "Source", prefixedAndReplacedWindowedValueCoder);
    InstructionOutputNode prefixedReadNodeOut = createInstructionOutputNode("Read.out", prefixedAndReplacedWindowedValueCoder);
    MutableNetwork<Node, Edge> prefixedNetwork = andReplaceForRunnerNetwork(network);
    ImmutableSet.Builder<GenericJson> prefixedInstructions = ImmutableSet.builder();
    for (Node node : prefixedNetwork.nodes()) {
        if (node instanceof ParallelInstructionNode) {
            prefixedInstructions.add(((ParallelInstructionNode) node).getParallelInstruction());
        } else if (node instanceof InstructionOutputNode) {
            prefixedInstructions.add(((InstructionOutputNode) node).getInstructionOutput());
        }
    }
    assertThat(prefixedInstructions.build(), containsInAnyOrder(jsonOf(prefixedReadNodeOut.getInstructionOutput()), jsonOf(prefixedReadNode.getParallelInstruction())));
}
Also used : GenericJson(com.google.api.client.json.GenericJson) LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 48 with Edge

use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.

the class LengthPrefixUnknownCodersTest method testLengthPrefixForInstructionOutputNodeWithNonGrpcNodeNeighbor.

@Test
public void testLengthPrefixForInstructionOutputNodeWithNonGrpcNodeNeighbor() {
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    ParallelInstructionNode readNode = createReadNode("read", "source", windowedValueCoder);
    network.addNode(instructionOutputNode);
    network.addNode(readNode);
    network.addEdge(readNode, instructionOutputNode, DefaultEdge.create());
    assertEqualsAsJson(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/
    null), ((InstructionOutputNode) forInstructionOutputNode(network).apply(instructionOutputNode)).getInstructionOutput().getCodec());
}
Also used : LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) LengthPrefixUnknownCoders.forInstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.LengthPrefixUnknownCoders.forInstructionOutputNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) Test(org.junit.Test)

Example 49 with Edge

use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.

the class RemoveFlattenInstructionsFunctionTest method testMultiLevelFlattenResultingInParallelEdges.

@Test
public void testMultiLevelFlattenResultingInParallelEdges() {
    Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
    Node aPCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out"), PCOLLECTION_ID);
    Edge aOutput = DefaultEdge.create();
    Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
    Node bOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
    Node bOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
    Edge bOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
    Edge bOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
    Node flatten1 = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten1").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flatten1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten1.out"), PCOLLECTION_ID);
    Node flatten2 = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten2").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flatten2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten2.out"), PCOLLECTION_ID);
    Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
    Edge cOutput = DefaultEdge.create();
    Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
    // A ------\
    // Flatten1 --\
    // B -out1-/            Flatten2 --> C
    // \-out2-------------/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(a);
    network.addNode(aPCollection);
    network.addNode(b);
    network.addNode(bOut1PCollection);
    network.addNode(bOut2PCollection);
    network.addNode(flatten1);
    network.addNode(flatten1PCollection);
    network.addNode(flatten2);
    network.addNode(flatten2PCollection);
    network.addNode(c);
    network.addNode(cPCollection);
    network.addEdge(a, aPCollection, aOutput);
    network.addEdge(aPCollection, flatten1, DefaultEdge.create());
    network.addEdge(b, bOut1PCollection, bOut1);
    network.addEdge(b, bOut2PCollection, bOut2);
    network.addEdge(bOut1PCollection, flatten1, DefaultEdge.create());
    network.addEdge(bOut2PCollection, flatten2, DefaultEdge.create());
    network.addEdge(flatten1, flatten1PCollection, DefaultEdge.create());
    network.addEdge(flatten1PCollection, flatten2, DefaultEdge.create());
    network.addEdge(flatten2, flatten2PCollection, DefaultEdge.create());
    network.addEdge(flatten2PCollection, c, DefaultEdge.create());
    network.addEdge(c, cPCollection, cOutput);
    // A ------\
    // B -out1--> C
    // \-out2-/
    assertThatFlattenIsProperlyRemoved(network);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) FlattenInstruction(com.google.api.services.dataflow.model.FlattenInstruction) Test(org.junit.Test)

Example 50 with Edge

use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.

the class RemoveFlattenInstructionsFunctionTest method testRemoveFlatten.

@Test
public void testRemoveFlatten() {
    Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
    Node aPCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out"), PCOLLECTION_ID);
    Edge aOutput = DefaultEdge.create();
    Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
    Edge bOutput = DefaultEdge.create();
    Node bPCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out"), PCOLLECTION_ID);
    Node flatten = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flattenPCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten.out"), PCOLLECTION_ID);
    Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
    Edge cOutput = DefaultEdge.create();
    Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
    // A --\
    // Flatten --> C
    // B --/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(a);
    network.addNode(aPCollection);
    network.addNode(b);
    network.addNode(bPCollection);
    network.addNode(flatten);
    network.addNode(flattenPCollection);
    network.addNode(c);
    network.addNode(cPCollection);
    network.addEdge(a, aPCollection, aOutput);
    network.addEdge(aPCollection, flatten, DefaultEdge.create());
    network.addEdge(b, bPCollection, bOutput);
    network.addEdge(bPCollection, flatten, DefaultEdge.create());
    network.addEdge(flatten, flattenPCollection, DefaultEdge.create());
    network.addEdge(flattenPCollection, c, DefaultEdge.create());
    network.addEdge(c, cPCollection, cOutput);
    // A --\
    // C
    // B --/
    assertThatFlattenIsProperlyRemoved(network);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) FlattenInstruction(com.google.api.services.dataflow.model.FlattenInstruction) Test(org.junit.Test)

Aggregations

Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)50 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)50 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)49 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)48 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)41 Test (org.junit.Test)34 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)21 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)20 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)17 MapTask (com.google.api.services.dataflow.model.MapTask)9 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)8 List (java.util.List)8 FlattenInstruction (com.google.api.services.dataflow.model.FlattenInstruction)7 MultiOutputInfo (com.google.api.services.dataflow.model.MultiOutputInfo)7 HappensBeforeEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.HappensBeforeEdge)7 RemoteGrpcPortNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode)7 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 FetchAndFilterStreamingSideInputsNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode)5