use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreateMapTaskExecutor.
@Test
public void testCreateMapTaskExecutor() throws Exception {
List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"), createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
MapTask mapTask = new MapTask();
mapTask.setStageName(STAGE);
mapTask.setSystemName("systemName");
mapTask.setInstructions(instructions);
mapTask.setFactory(Transport.getJsonFactory());
try (DataflowMapTaskExecutor executor = mapTaskExecutorFactory.create(null, /* beamFnControlClientHandler */
null, /* GrpcFnServer<GrpcDataService> */
null, /* ApiServiceDescriptor */
null, /* GrpcFnServer<GrpcStateService> */
mapTaskToNetwork.apply(mapTask), options, STAGE, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage"), counterSet, idGenerator)) {
// Safe covariant cast not expressible without rawtypes.
@SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
"rawtypes", "unchecked" }) List<Object> operations = (List) executor.operations;
assertThat(operations, hasItems(instanceOf(ReadOperation.class), instanceOf(ParDoOperation.class), instanceOf(ParDoOperation.class), instanceOf(FlattenOperation.class), instanceOf(WriteOperation.class)));
// Verify that the inputs are attached.
ReadOperation readOperation = Iterables.getOnlyElement(Iterables.filter(operations, ReadOperation.class));
assertEquals(2, readOperation.receivers[0].getReceiverCount());
FlattenOperation flattenOperation = Iterables.getOnlyElement(Iterables.filter(operations, FlattenOperation.class));
for (ParDoOperation operation : Iterables.filter(operations, ParDoOperation.class)) {
assertSame(flattenOperation, operation.receivers[0].getOnlyReceiver());
}
WriteOperation writeOperation = Iterables.getOnlyElement(Iterables.filter(operations, WriteOperation.class));
assertSame(writeOperation, flattenOperation.receivers[0].getOnlyReceiver());
}
@SuppressWarnings("unchecked") Counter<Long, ?> otherMsecCounter = (Counter<Long, ?>) counterSet.getExistingCounter("test-other-msecs");
// "other" state only got created upon MapTaskExecutor.execute().
assertNull(otherMsecCounter);
counterSet.extractUpdates(false, updateExtractor);
verifyOutputCounters(updateExtractor, "read_output_name", "DoFn1_output", "DoFnWithContext_output", "flatten_output_name");
verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
verify(updateExtractor).longSum(eq(named("Write-ByteCount")), anyBoolean(), anyLong());
verifyNoMoreInteractions(updateExtractor);
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createOperationTransformForParallelInstructionNodes.
/**
* Creates an {@link Operation} from the given {@link ParallelInstruction} definition using the
* provided {@link ReaderFactory}.
*/
Function<Node, Node> createOperationTransformForParallelInstructionNodes(final String stageName, final Network<Node, Edge> network, final PipelineOptions options, final ReaderFactory readerFactory, final SinkFactory sinkFactory, final DataflowExecutionContext<?> executionContext) {
return new TypeSafeNodeFunction<ParallelInstructionNode>(ParallelInstructionNode.class) {
@Override
public Node typedApply(ParallelInstructionNode node) {
ParallelInstruction instruction = node.getParallelInstruction();
NameContext nameContext = NameContext.create(stageName, instruction.getOriginalName(), instruction.getSystemName(), instruction.getName());
try {
DataflowOperationContext context = executionContext.createOperationContext(nameContext);
if (instruction.getRead() != null) {
return createReadOperation(network, node, options, readerFactory, executionContext, context);
} else if (instruction.getWrite() != null) {
return createWriteOperation(node, options, sinkFactory, executionContext, context);
} else if (instruction.getParDo() != null) {
return createParDoOperation(network, node, options, executionContext, context);
} else if (instruction.getPartialGroupByKey() != null) {
return createPartialGroupByKeyOperation(network, node, options, executionContext, context);
} else if (instruction.getFlatten() != null) {
return createFlattenOperation(network, node, context);
} else {
throw new IllegalArgumentException(String.format("Unexpected instruction: %s", instruction));
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createWriteOperation.
OperationNode createWriteOperation(ParallelInstructionNode node, PipelineOptions options, SinkFactory sinkFactory, DataflowExecutionContext executionContext, DataflowOperationContext context) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
WriteInstruction write = instruction.getWrite();
Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(write.getSink().getCodec()));
CloudObject cloudSink = CloudObject.fromSpec(write.getSink().getSpec());
Sink<?> sink = sinkFactory.create(cloudSink, coder, options, executionContext, context);
return OperationNode.create(WriteOperation.create(sink, EMPTY_OUTPUT_RECEIVER_ARRAY, context));
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class FixMultiOutputInfosOnParDoInstructionsTest method createMapTaskWithParDo.
private static MapTask createMapTaskWithParDo(int numOutputs, String... tags) {
ParDoInstruction parDoInstruction = new ParDoInstruction();
parDoInstruction.setNumOutputs(numOutputs);
List<MultiOutputInfo> multiOutputInfos = new ArrayList<>(tags.length);
for (String tag : tags) {
MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
multiOutputInfo.setTag(tag);
multiOutputInfos.add(multiOutputInfo);
}
parDoInstruction.setMultiOutputInfos(multiOutputInfos);
ParallelInstruction instruction = new ParallelInstruction();
instruction.setParDo(parDoInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(instruction));
return mapTask;
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class DeduceNodeLocationsFunctionTest method assertNodesIdenticalExceptForExecutionLocation.
/**
* Asserts two nodes are identical except for ExecutionLocation, which can differ.
*/
private void assertNodesIdenticalExceptForExecutionLocation(Node expected, Node actual) {
assertThat(expected, instanceOf(ParallelInstructionNode.class));
assertThat(actual, instanceOf(ParallelInstructionNode.class));
ParallelInstruction expectedContents = ((ParallelInstructionNode) expected).getParallelInstruction();
ParallelInstruction actualContents = ((ParallelInstructionNode) actual).getParallelInstruction();
assertEquals(expectedContents, actualContents);
}
Aggregations