use of com.google.api.services.dataflow.model.ParDoInstruction in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createParDoOperation.
private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ParDoInstruction parDo = instruction.getParDo();
TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
int successorOffset = 0;
for (Node successor : network.successors(node)) {
for (Edge edge : network.edgesConnecting(node, successor)) {
outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
}
successorOffset += 1;
}
ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of com.google.api.services.dataflow.model.ParDoInstruction in project beam by apache.
the class LengthPrefixUnknownCodersTest method testLengthPrefixParDoInstructionCoder.
@Test
public void testLengthPrefixParDoInstructionCoder() throws Exception {
ParDoInstruction parDo = new ParDoInstruction();
CloudObject spec = CloudObject.forClassName(MERGE_BUCKETS_DO_FN);
spec.put(WorkerPropertyNames.INPUT_CODER, CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/
null));
parDo.setUserFn(spec);
instruction.setParDo(parDo);
ParallelInstruction prefixedInstruction = forParallelInstruction(instruction, false);
assertEqualsAsJson(CloudObjects.asCloudObject(prefixedWindowedValueCoder, /*sdkComponents=*/
null), prefixedInstruction.getParDo().getUserFn().get(WorkerPropertyNames.INPUT_CODER));
// Should not mutate the instruction.
assertEqualsAsJson(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/
null), parDo.getUserFn().get(WorkerPropertyNames.INPUT_CODER));
}
use of com.google.api.services.dataflow.model.ParDoInstruction in project beam by apache.
the class FixMultiOutputInfosOnParDoInstructionsTest method createMapTaskWithParDo.
private static MapTask createMapTaskWithParDo(int numOutputs, String... tags) {
ParDoInstruction parDoInstruction = new ParDoInstruction();
parDoInstruction.setNumOutputs(numOutputs);
List<MultiOutputInfo> multiOutputInfos = new ArrayList<>(tags.length);
for (String tag : tags) {
MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
multiOutputInfo.setTag(tag);
multiOutputInfos.add(multiOutputInfo);
}
parDoInstruction.setMultiOutputInfos(multiOutputInfos);
ParallelInstruction instruction = new ParallelInstruction();
instruction.setParDo(parDoInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(instruction));
return mapTask;
}
use of com.google.api.services.dataflow.model.ParDoInstruction in project beam by apache.
the class MapTaskToNetworkFunctionTest method testMultipleOutput.
@Test
public void testMultipleOutput() {
// /---> WriteA
// Read ---> ParDo
// \---> WriteB
InstructionOutput readOutput = createInstructionOutput("Read.out");
ParallelInstruction read = createParallelInstruction("Read", readOutput);
read.setRead(new ReadInstruction());
MultiOutputInfo parDoMultiOutput1 = createMultiOutputInfo("output1");
MultiOutputInfo parDoMultiOutput2 = createMultiOutputInfo("output2");
ParDoInstruction parDoInstruction = new ParDoInstruction();
// Read.out
parDoInstruction.setInput(createInstructionInput(0, 0));
parDoInstruction.setMultiOutputInfos(ImmutableList.of(parDoMultiOutput1, parDoMultiOutput2));
InstructionOutput parDoOutput1 = createInstructionOutput("ParDo.out1");
InstructionOutput parDoOutput2 = createInstructionOutput("ParDo.out2");
ParallelInstruction parDo = createParallelInstruction("ParDo", parDoOutput1, parDoOutput2);
parDo.setParDo(parDoInstruction);
WriteInstruction writeAInstruction = new WriteInstruction();
// ParDo.out1
writeAInstruction.setInput(createInstructionInput(1, 0));
ParallelInstruction writeA = createParallelInstruction("WriteA");
writeA.setWrite(writeAInstruction);
WriteInstruction writeBInstruction = new WriteInstruction();
// ParDo.out2
writeBInstruction.setInput(createInstructionInput(1, 1));
ParallelInstruction writeB = createParallelInstruction("WriteB");
writeB.setWrite(writeBInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(read, parDo, writeA, writeB));
mapTask.setFactory(Transport.getJsonFactory());
Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
assertNetworkProperties(network);
assertEquals(7, network.nodes().size());
assertEquals(6, network.edges().size());
ParallelInstructionNode parDoNode = get(network, parDo);
ParallelInstructionNode writeANode = get(network, writeA);
ParallelInstructionNode writeBNode = get(network, writeB);
InstructionOutputNode parDoOutput1Node = getOnlyPredecessor(network, writeANode);
assertEquals(parDoOutput1, parDoOutput1Node.getInstructionOutput());
InstructionOutputNode parDoOutput2Node = getOnlyPredecessor(network, writeBNode);
assertEquals(parDoOutput2, parDoOutput2Node.getInstructionOutput());
assertThat(network.successors(parDoNode), Matchers.<Node>containsInAnyOrder(parDoOutput1Node, parDoOutput2Node));
assertEquals(parDoMultiOutput1, ((MultiOutputInfoEdge) Iterables.getOnlyElement(network.edgesConnecting(parDoNode, parDoOutput1Node))).getMultiOutputInfo());
assertEquals(parDoMultiOutput2, ((MultiOutputInfoEdge) Iterables.getOnlyElement(network.edgesConnecting(parDoNode, parDoOutput2Node))).getMultiOutputInfo());
}
use of com.google.api.services.dataflow.model.ParDoInstruction in project beam by apache.
the class LengthPrefixUnknownCoders method forParDoInstruction.
/**
* Wrap unknown coders with a {@link LengthPrefixCoder} for the given {@link ParDoInstruction}.
*/
private static ParDoInstruction forParDoInstruction(ParDoInstruction input, boolean replaceWithByteArrayCoder) throws Exception {
ParDoInstruction parDo = input;
CloudObject userFn = CloudObject.fromSpec(parDo.getUserFn());
String parDoFnType = userFn.getClassName();
if (MERGE_WINDOWS_DO_FN.contains(parDoFnType)) {
parDo = clone(input, ParDoInstruction.class);
Map<String, Object> inputCoderObject = Structs.getObject(parDo.getUserFn(), WorkerPropertyNames.INPUT_CODER);
parDo.getUserFn().put(WorkerPropertyNames.INPUT_CODER, forCodec(inputCoderObject, replaceWithByteArrayCoder));
} else if ("CreateIsmShardKeyAndSortKeyDoFn".equals(parDoFnType) || "ToIsmRecordForMultimapDoFn".equals(parDoFnType) || "StreamingPCollectionViewWriterDoFn".equals(parDoFnType)) {
parDo = clone(input, ParDoInstruction.class);
Map<String, Object> inputCoderObject = Structs.getObject(parDo.getUserFn(), PropertyNames.ENCODING);
parDo.getUserFn().put(PropertyNames.ENCODING, forCodec(inputCoderObject, replaceWithByteArrayCoder));
}
// TODO: Handle other types of ParDos.
return parDo;
}
Aggregations