Search in sources :

Example 6 with FlattenInstruction

use of com.google.api.services.dataflow.model.FlattenInstruction in project beam by apache.

the class MapTaskToNetworkFunctionTest method testFlatten.

@Test
public void testFlatten() {
    // ReadA --\
    // |--> Flatten
    // ReadB --/
    InstructionOutput readOutputA = createInstructionOutput("ReadA.out");
    ParallelInstruction readA = createParallelInstruction("ReadA", readOutputA);
    readA.setRead(new ReadInstruction());
    InstructionOutput readOutputB = createInstructionOutput("ReadB.out");
    ParallelInstruction readB = createParallelInstruction("ReadB", readOutputB);
    readB.setRead(new ReadInstruction());
    FlattenInstruction flattenInstruction = new FlattenInstruction();
    flattenInstruction.setInputs(ImmutableList.of(// ReadA.out
    createInstructionInput(0, 0), // ReadB.out
    createInstructionInput(1, 0)));
    InstructionOutput flattenOutput = createInstructionOutput("Flatten.out");
    ParallelInstruction flatten = createParallelInstruction("Flatten", flattenOutput);
    flatten.setFlatten(flattenInstruction);
    MapTask mapTask = new MapTask();
    mapTask.setInstructions(ImmutableList.of(readA, readB, flatten));
    mapTask.setFactory(Transport.getJsonFactory());
    Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
    assertNetworkProperties(network);
    assertEquals(6, network.nodes().size());
    assertEquals(5, network.edges().size());
    ParallelInstructionNode readANode = get(network, readA);
    InstructionOutputNode readOutputANode = getOnlySuccessor(network, readANode);
    assertEquals(readOutputA, readOutputANode.getInstructionOutput());
    ParallelInstructionNode readBNode = get(network, readB);
    InstructionOutputNode readOutputBNode = getOnlySuccessor(network, readBNode);
    assertEquals(readOutputB, readOutputBNode.getInstructionOutput());
    // Make sure the successors for both ReadA and ReadB output PCollections are the same
    assertEquals(network.successors(readOutputANode), network.successors(readOutputBNode));
    ParallelInstructionNode flattenNode = getOnlySuccessor(network, readOutputANode);
    InstructionOutputNode flattenOutputNode = getOnlySuccessor(network, flattenNode);
    assertEquals(flattenOutput, flattenOutputNode.getInstructionOutput());
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) MapTask(com.google.api.services.dataflow.model.MapTask) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) FlattenInstruction(com.google.api.services.dataflow.model.FlattenInstruction) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) Test(org.junit.Test)

Example 7 with FlattenInstruction

use of com.google.api.services.dataflow.model.FlattenInstruction in project beam by apache.

the class RemoveFlattenInstructionsFunctionTest method testMultiLevelFlattenResultingInParallelEdges.

@Test
public void testMultiLevelFlattenResultingInParallelEdges() {
    Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
    Node aPCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out"), PCOLLECTION_ID);
    Edge aOutput = DefaultEdge.create();
    Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
    Node bOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
    Node bOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
    Edge bOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
    Edge bOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
    Node flatten1 = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten1").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flatten1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten1.out"), PCOLLECTION_ID);
    Node flatten2 = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten2").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flatten2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten2.out"), PCOLLECTION_ID);
    Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
    Edge cOutput = DefaultEdge.create();
    Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
    // A ------\
    // Flatten1 --\
    // B -out1-/            Flatten2 --> C
    // \-out2-------------/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(a);
    network.addNode(aPCollection);
    network.addNode(b);
    network.addNode(bOut1PCollection);
    network.addNode(bOut2PCollection);
    network.addNode(flatten1);
    network.addNode(flatten1PCollection);
    network.addNode(flatten2);
    network.addNode(flatten2PCollection);
    network.addNode(c);
    network.addNode(cPCollection);
    network.addEdge(a, aPCollection, aOutput);
    network.addEdge(aPCollection, flatten1, DefaultEdge.create());
    network.addEdge(b, bOut1PCollection, bOut1);
    network.addEdge(b, bOut2PCollection, bOut2);
    network.addEdge(bOut1PCollection, flatten1, DefaultEdge.create());
    network.addEdge(bOut2PCollection, flatten2, DefaultEdge.create());
    network.addEdge(flatten1, flatten1PCollection, DefaultEdge.create());
    network.addEdge(flatten1PCollection, flatten2, DefaultEdge.create());
    network.addEdge(flatten2, flatten2PCollection, DefaultEdge.create());
    network.addEdge(flatten2PCollection, c, DefaultEdge.create());
    network.addEdge(c, cPCollection, cOutput);
    // A ------\
    // B -out1--> C
    // \-out2-/
    assertThatFlattenIsProperlyRemoved(network);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) FlattenInstruction(com.google.api.services.dataflow.model.FlattenInstruction) Test(org.junit.Test)

Example 8 with FlattenInstruction

use of com.google.api.services.dataflow.model.FlattenInstruction in project beam by apache.

the class RemoveFlattenInstructionsFunctionTest method testRemoveFlatten.

@Test
public void testRemoveFlatten() {
    Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
    Node aPCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out"), PCOLLECTION_ID);
    Edge aOutput = DefaultEdge.create();
    Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
    Edge bOutput = DefaultEdge.create();
    Node bPCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out"), PCOLLECTION_ID);
    Node flatten = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
    Node flattenPCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten.out"), PCOLLECTION_ID);
    Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
    Edge cOutput = DefaultEdge.create();
    Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
    // A --\
    // Flatten --> C
    // B --/
    MutableNetwork<Node, Edge> network = createEmptyNetwork();
    network.addNode(a);
    network.addNode(aPCollection);
    network.addNode(b);
    network.addNode(bPCollection);
    network.addNode(flatten);
    network.addNode(flattenPCollection);
    network.addNode(c);
    network.addNode(cPCollection);
    network.addEdge(a, aPCollection, aOutput);
    network.addEdge(aPCollection, flatten, DefaultEdge.create());
    network.addEdge(b, bPCollection, bOutput);
    network.addEdge(bPCollection, flatten, DefaultEdge.create());
    network.addEdge(flatten, flattenPCollection, DefaultEdge.create());
    network.addEdge(flattenPCollection, c, DefaultEdge.create());
    network.addEdge(c, cPCollection, cOutput);
    // A --\
    // C
    // B --/
    assertThatFlattenIsProperlyRemoved(network);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) FlattenInstruction(com.google.api.services.dataflow.model.FlattenInstruction) Test(org.junit.Test)

Aggregations

FlattenInstruction (com.google.api.services.dataflow.model.FlattenInstruction)8 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)8 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)8 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)7 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)7 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)7 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)7 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)7 Test (org.junit.Test)7 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)6 MapTask (com.google.api.services.dataflow.model.MapTask)2 MultiOutputInfo (com.google.api.services.dataflow.model.MultiOutputInfo)2 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)2 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)1 ArrayList (java.util.ArrayList)1