use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createParDoOperation.
private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ParDoInstruction parDo = instruction.getParDo();
TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
int successorOffset = 0;
for (Node successor : network.successors(node)) {
for (Edge edge : network.edgesConnecting(node, successor)) {
outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
}
successorOffset += 1;
}
ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperation.
@Test
public void testCreatePartialGroupByKeyOperation() throws Exception {
int producerIndex = 1;
int producerOutputNum = 2;
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createPartialGroupByKeyInstruction(producerIndex, producerOutputNum), ExecutionLocation.UNKNOWN);
when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
when(network.outDegree(instructionNode)).thenReturn(1);
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, PipelineOptionsFactory.create(), readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
assertEquals(1, pgbkOperation.receivers.length);
assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreateParDoOperation.
@Test
public void testCreateParDoOperation() throws Exception {
int producerIndex = 1;
int producerOutputNum = 2;
BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, counterSet, "testStage");
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createParDoInstruction(producerIndex, producerOutputNum, "DoFn"), ExecutionLocation.UNKNOWN);
Node outputReceiverNode = IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID));
when(network.successors(instructionNode)).thenReturn(ImmutableSet.of(outputReceiverNode));
when(network.outDegree(instructionNode)).thenReturn(1);
when(network.edgesConnecting(instructionNode, outputReceiverNode)).thenReturn(ImmutableSet.<Edge>of(MultiOutputInfoEdge.create(instructionNode.getParallelInstruction().getParDo().getMultiOutputInfos().get(0))));
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, context).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
ParDoOperation parDoOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
assertEquals(1, parDoOperation.receivers.length);
assertEquals(0, parDoOperation.receivers[0].getReceiverCount());
assertEquals(Operation.InitializationState.UNSTARTED, parDoOperation.initializationState);
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperationWithCombine.
@Test
public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
int producerIndex = 1;
int producerOutputNum = 2;
ParallelInstruction instruction = createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
addString(cloudCombineFn, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn)));
instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(instruction, ExecutionLocation.UNKNOWN);
when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
when(network.outDegree(instructionNode)).thenReturn(1);
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
assertEquals(1, pgbkOperation.receivers.length);
assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreateReadOperation.
@Test
public void testCreateReadOperation() throws Exception {
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createReadInstruction("Read"), ExecutionLocation.UNKNOWN);
when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
when(network.outDegree(instructionNode)).thenReturn(1);
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, PipelineOptionsFactory.create(), readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ReadOperation.class));
ReadOperation readOperation = (ReadOperation) ((OperationNode) operationNode).getOperation();
assertEquals(1, readOperation.receivers.length);
assertEquals(0, readOperation.receivers[0].getReceiverCount());
assertEquals(Operation.InitializationState.UNSTARTED, readOperation.initializationState);
assertThat(readOperation.reader, instanceOf(ReaderFactoryTest.TestReader.class));
counterSet.extractUpdates(false, updateExtractor);
verifyOutputCounters(updateExtractor, "read_output_name");
verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
verifyNoMoreInteractions(updateExtractor);
}
Aggregations