use of com.google.api.services.dataflow.model.PartialGroupByKeyInstruction in project beam by apache.
the class LengthPrefixUnknownCoders method forParallelInstruction.
/**
* Wrap unknown coders with a {@link LengthPrefixCoder} for the given {@link ParallelInstruction}.
*/
@VisibleForTesting
static ParallelInstruction forParallelInstruction(ParallelInstruction input, boolean replaceWithByteArrayCoder) throws Exception {
try {
ParallelInstruction instruction = clone(input, ParallelInstruction.class);
if (instruction.getRead() != null) {
Source cloudSource = instruction.getRead().getSource();
cloudSource.setCodec(forCodec(cloudSource.getCodec(), replaceWithByteArrayCoder));
} else if (instruction.getWrite() != null) {
com.google.api.services.dataflow.model.Sink cloudSink = instruction.getWrite().getSink();
cloudSink.setCodec(forCodec(cloudSink.getCodec(), replaceWithByteArrayCoder));
} else if (instruction.getParDo() != null) {
instruction.setParDo(forParDoInstruction(instruction.getParDo(), replaceWithByteArrayCoder));
} else if (instruction.getPartialGroupByKey() != null) {
PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
pgbk.setInputElementCodec(forCodec(pgbk.getInputElementCodec(), replaceWithByteArrayCoder));
} else if (instruction.getFlatten() != null) {
// FlattenInstructions have no codecs to wrap.
} else {
throw new RuntimeException("Unknown parallel instruction: " + input);
}
return instruction;
} catch (IOException e) {
throw new RuntimeException(String.format("Failed to replace unknown coder with " + "LengthPrefixCoder for : {%s}", input), e);
}
}
use of com.google.api.services.dataflow.model.PartialGroupByKeyInstruction in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createPartialGroupByKeyOperation.
<K> OperationNode createPartialGroupByKeyOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
OutputReceiver[] receivers = getOutputReceivers(network, node);
Coder<?> windowedCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(pgbk.getInputElementCodec()));
if (!(windowedCoder instanceof WindowedValueCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input coder for PartialGroupByKeyOperation: %s", windowedCoder));
}
Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
if (!(elemCoder instanceof KvCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input element coder for PartialGroupByKeyOperation: %s", elemCoder));
}
@SuppressWarnings("unchecked") KvCoder<K, ?> keyedElementCoder = (KvCoder<K, ?>) elemCoder;
CloudObject cloudUserFn = pgbk.getValueCombiningFn() != null ? CloudObject.fromSpec(pgbk.getValueCombiningFn()) : null;
ParDoFn fn = PartialGroupByKeyParDoFns.create(options, keyedElementCoder, cloudUserFn, pgbk.getSideInputs(), Arrays.<Receiver>asList(receivers), executionContext, operationContext);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of com.google.api.services.dataflow.model.PartialGroupByKeyInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createPartialGroupByKeyOperation.
<K> OperationNode createPartialGroupByKeyOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
OutputReceiver[] receivers = getOutputReceivers(network, node);
Coder<?> windowedCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(pgbk.getInputElementCodec()));
if (!(windowedCoder instanceof WindowedValueCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input coder for PartialGroupByKeyOperation: %s", windowedCoder));
}
Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
if (!(elemCoder instanceof KvCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input element coder for PartialGroupByKeyOperation: %s", elemCoder));
}
@SuppressWarnings("unchecked") KvCoder<K, ?> keyedElementCoder = (KvCoder<K, ?>) elemCoder;
CloudObject cloudUserFn = pgbk.getValueCombiningFn() != null ? CloudObject.fromSpec(pgbk.getValueCombiningFn()) : null;
ParDoFn fn = PartialGroupByKeyParDoFns.create(options, keyedElementCoder, cloudUserFn, pgbk.getSideInputs(), Arrays.<Receiver>asList(receivers), executionContext, operationContext);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of com.google.api.services.dataflow.model.PartialGroupByKeyInstruction in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createPartialGroupByKeyInstruction.
static ParallelInstruction createPartialGroupByKeyInstruction(int producerIndex, int producerOutputNum) {
InstructionInput cloudInput = new InstructionInput();
cloudInput.setProducerInstructionIndex(producerIndex);
cloudInput.setOutputNum(producerOutputNum);
PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
pgbkInstruction.setInput(cloudInput);
pgbkInstruction.setInputElementCodec(CloudObjects.asCloudObject(FullWindowedValueCoder.of(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()), IntervalWindowCoder.of()), /*sdkComponents=*/
null));
InstructionOutput output = new InstructionOutput();
output.setName("pgbk_output_name");
output.setCodec(CloudObjects.asCloudObject(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(BigEndianIntegerCoder.of())), /*sdkComponents=*/
null));
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setOriginalName("pgbk_original_name");
instruction.setSystemName("pgbk_system_name");
instruction.setPartialGroupByKey(pgbkInstruction);
instruction.setOutputs(Arrays.asList(output));
return instruction;
}
use of com.google.api.services.dataflow.model.PartialGroupByKeyInstruction in project beam by apache.
the class MapTaskToNetworkFunctionTest method testPartialGroupByKey.
@Test
public void testPartialGroupByKey() {
// Read --> PGBK --> Write
InstructionOutput readOutput = createInstructionOutput("Read.out");
ParallelInstruction read = createParallelInstruction("Read", readOutput);
read.setRead(new ReadInstruction());
PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
// Read.out
pgbkInstruction.setInput(createInstructionInput(0, 0));
InstructionOutput pgbkOutput = createInstructionOutput("PGBK.out");
ParallelInstruction pgbk = createParallelInstruction("PGBK", pgbkOutput);
pgbk.setPartialGroupByKey(pgbkInstruction);
WriteInstruction writeInstruction = new WriteInstruction();
// PGBK.out
writeInstruction.setInput(createInstructionInput(1, 0));
ParallelInstruction write = createParallelInstruction("Write");
write.setWrite(writeInstruction);
MapTask mapTask = new MapTask();
mapTask.setInstructions(ImmutableList.of(read, pgbk, write));
mapTask.setFactory(Transport.getJsonFactory());
Network<Node, Edge> network = new MapTaskToNetworkFunction(IdGenerators.decrementingLongs()).apply(mapTask);
assertNetworkProperties(network);
assertEquals(5, network.nodes().size());
assertEquals(4, network.edges().size());
ParallelInstructionNode readNode = get(network, read);
InstructionOutputNode readOutputNode = getOnlySuccessor(network, readNode);
assertEquals(readOutput, readOutputNode.getInstructionOutput());
ParallelInstructionNode pgbkNode = getOnlySuccessor(network, readOutputNode);
InstructionOutputNode pgbkOutputNode = getOnlySuccessor(network, pgbkNode);
assertEquals(pgbkOutput, pgbkOutputNode.getInstructionOutput());
getOnlySuccessor(network, pgbkOutputNode);
assertNotNull(write);
}
Aggregations