use of com.google.api.services.dataflow.model.MultiOutputInfo in project beam by apache.
the class StreamingDataflowWorkerTest method makeDoFnInstruction.
private ParallelInstruction makeDoFnInstruction(DoFn<?, ?> doFn, int producerIndex, Coder<?> outputCoder, WindowingStrategy<?, ?> windowingStrategy) {
CloudObject spec = CloudObject.forClassName("DoFn");
addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(doFn, windowingStrategy, /* windowing strategy */
null, /* side input views */
null, /* input coder */
new TupleTag<>(PropertyNames.OUTPUT), /* main output id */
DoFnSchemaInformation.create(), Collections.emptyMap()))));
return new ParallelInstruction().setSystemName(DEFAULT_PARDO_SYSTEM_NAME).setName(DEFAULT_PARDO_USER_NAME).setOriginalName(DEFAULT_PARDO_ORIGINAL_NAME).setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0)).setNumOutputs(1).setUserFn(spec).setMultiOutputInfos(Arrays.asList(new MultiOutputInfo().setTag(PropertyNames.OUTPUT)))).setOutputs(Arrays.asList(new InstructionOutput().setName(PropertyNames.OUTPUT).setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(outputCoder, windowingStrategy.getWindowFn().windowCoder()), /*sdkComponents=*/
null))));
}
use of com.google.api.services.dataflow.model.MultiOutputInfo in project beam by apache.
the class MapTaskToNetworkFunctionTest method testParDo.
@Test
public void testParDo() {
InstructionOutput readOutput = createInstructionOutput("Read.out");
ParallelInstruction read = createParallelInstruction("Read", readOutput);
read.setRead(new ReadInstruction());
MultiOutputInfo parDoMultiOutput = createMultiOutputInfo("output");
ParDoInstruction parDoInstruction = new ParDoInstruction();
// Read.out
parDoInstruction.setInput(createInstructionInput(0, 0));
parDoInstruction.setMultiOutputInfos(ImmutableList.of(parDoMultiOutput));
InstructionOutput parDoOutput = createInstructionOutput("ParDo.out");
ParallelInstruction parDo = createParallelInstruction("ParDo", parDoOutput);
parDo.setParDo(parDoInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(read, parDo));
mapTask.setFactory(Transport.getJsonFactory());
Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
assertNetworkProperties(network);
assertEquals(4, network.nodes().size());
assertEquals(3, network.edges().size());
ParallelInstructionNode readNode = get(network, read);
InstructionOutputNode readOutputNode = getOnlySuccessor(network, readNode);
assertEquals(readOutput, readOutputNode.getInstructionOutput());
ParallelInstructionNode parDoNode = getOnlySuccessor(network, readOutputNode);
InstructionOutputNode parDoOutputNode = getOnlySuccessor(network, parDoNode);
assertEquals(parDoOutput, parDoOutputNode.getInstructionOutput());
assertEquals(parDoMultiOutput, ((MultiOutputInfoEdge) Iterables.getOnlyElement(network.edgesConnecting(parDoNode, parDoOutputNode))).getMultiOutputInfo());
}
use of com.google.api.services.dataflow.model.MultiOutputInfo in project beam by apache.
the class RemoveFlattenInstructionsFunctionTest method testMultiLevelFlattenResultingInParallelEdges.
@Test
public void testMultiLevelFlattenResultingInParallelEdges() {
Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
Node aPCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out"), PCOLLECTION_ID);
Edge aOutput = DefaultEdge.create();
Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
Node bOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
Node bOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
Edge bOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
Edge bOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
Node flatten1 = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten1").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
Node flatten1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten1.out"), PCOLLECTION_ID);
Node flatten2 = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten2").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
Node flatten2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten2.out"), PCOLLECTION_ID);
Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
Edge cOutput = DefaultEdge.create();
Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
// A ------\
// Flatten1 --\
// B -out1-/ Flatten2 --> C
// \-out2-------------/
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(a);
network.addNode(aPCollection);
network.addNode(b);
network.addNode(bOut1PCollection);
network.addNode(bOut2PCollection);
network.addNode(flatten1);
network.addNode(flatten1PCollection);
network.addNode(flatten2);
network.addNode(flatten2PCollection);
network.addNode(c);
network.addNode(cPCollection);
network.addEdge(a, aPCollection, aOutput);
network.addEdge(aPCollection, flatten1, DefaultEdge.create());
network.addEdge(b, bOut1PCollection, bOut1);
network.addEdge(b, bOut2PCollection, bOut2);
network.addEdge(bOut1PCollection, flatten1, DefaultEdge.create());
network.addEdge(bOut2PCollection, flatten2, DefaultEdge.create());
network.addEdge(flatten1, flatten1PCollection, DefaultEdge.create());
network.addEdge(flatten1PCollection, flatten2, DefaultEdge.create());
network.addEdge(flatten2, flatten2PCollection, DefaultEdge.create());
network.addEdge(flatten2PCollection, c, DefaultEdge.create());
network.addEdge(c, cPCollection, cOutput);
// A ------\
// B -out1--> C
// \-out2-/
assertThatFlattenIsProperlyRemoved(network);
}
Aggregations