use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createOperationTransformForParallelInstructionNodes.
/**
* Creates an {@link Operation} from the given {@link ParallelInstruction} definition using the
* provided {@link ReaderFactory}.
*/
Function<Node, Node> createOperationTransformForParallelInstructionNodes(final String stageName, final Network<Node, Edge> network, final PipelineOptions options, final ReaderFactory readerFactory, final SinkFactory sinkFactory, final DataflowExecutionContext<?> executionContext) {
return new TypeSafeNodeFunction<ParallelInstructionNode>(ParallelInstructionNode.class) {
@Override
public Node typedApply(ParallelInstructionNode node) {
ParallelInstruction instruction = node.getParallelInstruction();
NameContext nameContext = NameContext.create(stageName, instruction.getOriginalName(), instruction.getSystemName(), instruction.getName());
try {
DataflowOperationContext context = executionContext.createOperationContext(nameContext);
if (instruction.getRead() != null) {
return createReadOperation(network, node, options, readerFactory, executionContext, context);
} else if (instruction.getWrite() != null) {
return createWriteOperation(node, options, sinkFactory, executionContext, context);
} else if (instruction.getParDo() != null) {
return createParDoOperation(network, node, options, executionContext, context);
} else if (instruction.getPartialGroupByKey() != null) {
return createPartialGroupByKeyOperation(network, node, options, executionContext, context);
} else if (instruction.getFlatten() != null) {
return createFlattenOperation(network, node, context);
} else {
throw new IllegalArgumentException(String.format("Unexpected instruction: %s", instruction));
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createParDoOperation.
private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ParDoInstruction parDo = instruction.getParDo();
TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
int successorOffset = 0;
for (Node successor : network.successors(node)) {
for (Edge edge : network.edgesConnecting(node, successor)) {
outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
}
successorOffset += 1;
}
ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreateFlattenOperation.
@Test
public void testCreateFlattenOperation() throws Exception {
int producerIndex1 = 1;
int producerOutputNum1 = 2;
int producerIndex2 = 0;
int producerOutputNum2 = 1;
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createFlattenInstruction(producerIndex1, producerOutputNum1, producerIndex2, producerOutputNum2, "Flatten"), ExecutionLocation.UNKNOWN);
when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
when(network.outDegree(instructionNode)).thenReturn(1);
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(FlattenOperation.class));
FlattenOperation flattenOperation = (FlattenOperation) ((OperationNode) operationNode).getOperation();
assertEquals(1, flattenOperation.receivers.length);
assertEquals(0, flattenOperation.receivers[0].getReceiverCount());
assertEquals(Operation.InitializationState.UNSTARTED, flattenOperation.initializationState);
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreateWriteOperation.
@SuppressWarnings("unchecked")
@Test
public void testCreateWriteOperation() throws Exception {
int producerIndex = 1;
int producerOutputNum = 2;
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createWriteInstruction(producerIndex, producerOutputNum, "WriteOperation"), ExecutionLocation.UNKNOWN);
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(WriteOperation.class));
WriteOperation writeOperation = (WriteOperation) ((OperationNode) operationNode).getOperation();
assertEquals(0, writeOperation.receivers.length);
assertEquals(Operation.InitializationState.UNSTARTED, writeOperation.initializationState);
assertThat(writeOperation.sink, instanceOf(SizeReportingSinkWrapper.class));
assertThat(((SizeReportingSinkWrapper<?>) writeOperation.sink).getUnderlyingSink(), instanceOf(TestSink.class));
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(eq(named("WriteOperation-ByteCount")), anyBoolean(), anyLong());
verifyNoMoreInteractions(updateExtractor);
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createOperationTransformForParallelInstructionNodes.
/**
* Creates an {@link Operation} from the given {@link ParallelInstruction} definition using the
* provided {@link ReaderFactory}.
*/
Function<Node, Node> createOperationTransformForParallelInstructionNodes(final String stageName, final Network<Node, Edge> network, final PipelineOptions options, final ReaderFactory readerFactory, final SinkFactory sinkFactory, final DataflowExecutionContext<?> executionContext) {
return new TypeSafeNodeFunction<ParallelInstructionNode>(ParallelInstructionNode.class) {
@Override
public Node typedApply(ParallelInstructionNode node) {
ParallelInstruction instruction = node.getParallelInstruction();
NameContext nameContext = NameContext.create(stageName, instruction.getOriginalName(), instruction.getSystemName(), instruction.getName());
try {
DataflowOperationContext context = executionContext.createOperationContext(nameContext);
if (instruction.getRead() != null) {
return createReadOperation(network, node, options, readerFactory, executionContext, context);
} else if (instruction.getWrite() != null) {
return createWriteOperation(node, options, sinkFactory, executionContext, context);
} else if (instruction.getParDo() != null) {
return createParDoOperation(network, node, options, executionContext, context);
} else if (instruction.getPartialGroupByKey() != null) {
return createPartialGroupByKeyOperation(network, node, options, executionContext, context);
} else if (instruction.getFlatten() != null) {
return createFlattenOperation(network, node, context);
} else {
throw new IllegalArgumentException(String.format("Unexpected instruction: %s", instruction));
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
}
Aggregations