use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createReadOperation.
OperationNode createReadOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, ReaderFactory readerFactory, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ReadInstruction read = instruction.getRead();
Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(cloudSource.getCodec()));
NativeReader<?> reader = readerFactory.create(sourceSpec, coder, options, executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(ReadOperation.create(reader, receivers, operationContext));
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createPartialGroupByKeyOperation.
<K> OperationNode createPartialGroupByKeyOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
OutputReceiver[] receivers = getOutputReceivers(network, node);
Coder<?> windowedCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(pgbk.getInputElementCodec()));
if (!(windowedCoder instanceof WindowedValueCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input coder for PartialGroupByKeyOperation: %s", windowedCoder));
}
Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
if (!(elemCoder instanceof KvCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input element coder for PartialGroupByKeyOperation: %s", elemCoder));
}
@SuppressWarnings("unchecked") KvCoder<K, ?> keyedElementCoder = (KvCoder<K, ?>) elemCoder;
CloudObject cloudUserFn = pgbk.getValueCombiningFn() != null ? CloudObject.fromSpec(pgbk.getValueCombiningFn()) : null;
ParDoFn fn = PartialGroupByKeyParDoFns.create(options, keyedElementCoder, cloudUserFn, pgbk.getSideInputs(), Arrays.<Receiver>asList(receivers), executionContext, operationContext);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createPartialGroupByKeyInstruction.
static ParallelInstruction createPartialGroupByKeyInstruction(int producerIndex, int producerOutputNum) {
InstructionInput cloudInput = new InstructionInput();
cloudInput.setProducerInstructionIndex(producerIndex);
cloudInput.setOutputNum(producerOutputNum);
PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
pgbkInstruction.setInput(cloudInput);
pgbkInstruction.setInputElementCodec(CloudObjects.asCloudObject(FullWindowedValueCoder.of(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()), IntervalWindowCoder.of()), /*sdkComponents=*/
null));
InstructionOutput output = new InstructionOutput();
output.setName("pgbk_output_name");
output.setCodec(CloudObjects.asCloudObject(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(BigEndianIntegerCoder.of())), /*sdkComponents=*/
null));
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setOriginalName("pgbk_original_name");
instruction.setSystemName("pgbk_system_name");
instruction.setPartialGroupByKey(pgbkInstruction);
instruction.setOutputs(Arrays.asList(output));
return instruction;
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createFlattenInstruction.
static ParallelInstruction createFlattenInstruction(int producerIndex1, int producerOutputNum1, int producerIndex2, int producerOutputNum2, String systemName) {
List<InstructionInput> cloudInputs = new ArrayList<>();
InstructionInput cloudInput1 = new InstructionInput();
cloudInput1.setProducerInstructionIndex(producerIndex1);
cloudInput1.setOutputNum(producerOutputNum1);
cloudInputs.add(cloudInput1);
InstructionInput cloudInput2 = new InstructionInput();
cloudInput2.setProducerInstructionIndex(producerIndex2);
cloudInput2.setOutputNum(producerOutputNum2);
cloudInputs.add(cloudInput2);
FlattenInstruction flattenInstruction = new FlattenInstruction();
flattenInstruction.setInputs(cloudInputs);
InstructionOutput output = new InstructionOutput();
output.setName("flatten_output_name");
output.setCodec(CloudObjects.asCloudObject(StringUtf8Coder.of(), /*sdkComponents=*/
null));
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setFlatten(flattenInstruction);
instruction.setOutputs(Arrays.asList(output));
instruction.setSystemName(systemName);
instruction.setOriginalName(systemName + "OriginalName");
return instruction;
}
use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testExecutionContextPlumbing.
@Test
public void testExecutionContextPlumbing() throws Exception {
List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read", ReaderFactoryTest.SingletonTestReaderFactory.class), createParDoInstruction(0, 0, "DoFn1", "DoFnUserName"), createParDoInstruction(1, 0, "DoFnWithContext", "DoFnWithContextUserName"));
MapTask mapTask = new MapTask();
mapTask.setStageName(STAGE);
mapTask.setInstructions(instructions);
mapTask.setFactory(Transport.getJsonFactory());
BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, counterSet, "testStage");
try (DataflowMapTaskExecutor executor = mapTaskExecutorFactory.create(null, /* beamFnControlClientHandler */
null, /* beamFnDataService */
null, /* beamFnStateService */
null, mapTaskToNetwork.apply(mapTask), options, STAGE, readerRegistry, sinkRegistry, context, counterSet, idGenerator)) {
executor.execute();
}
List<String> stepNames = new ArrayList<>();
for (BatchModeExecutionContext.StepContext stepContext : context.getAllStepContexts()) {
stepNames.add(stepContext.getNameContext().systemName());
}
assertThat(stepNames, hasItems("DoFn1", "DoFnWithContext"));
}
Aggregations